encodeblock_amd64.s 563 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335933693379338933993409341934293439344934593469347934893499350935193529353935493559356935793589359936093619362936393649365936693679368936993709371937293739374937593769377937893799380938193829383938493859386938793889389939093919392939393949395939693979398939994009401940294039404940594069407940894099410941194129413941494159416941794189419942094219422942394249425942694279428942994309431943294339434943594369437943894399440944194429443944494459446944794489449945094519452945394549455945694579458945994609461946294639464946594669467946894699470947194729473947494759476947794789479948094819482948394849485948694879488948994909491949294939494949594969497949894999500950195029503950495059506950795089509951095119512951395149515951695179518951995209521952295239524952595269527952895299530953195329533953495359536953795389539954095419542954395449545954695479548954995509551955295539554955595569557955895599560956195629563956495659566956795689569957095719572957395749575957695779578957995809581958295839584958595869587958895899590959195929593959495959596959795989599960096019602960396049605960696079608960996109611961296139614961596169617961896199620962196229623962496259626962796289629963096319632963396349635963696379638963996409641964296439644964596469647964896499650965196529653965496559656965796589659966096619662966396649665966696679668966996709671967296739674967596769677967896799680968196829683968496859686968796889689969096919692969396949695969696979698969997009701970297039704970597069707970897099710971197129713971497159716971797189719972097219722972397249725972697279728972997309731973297339734973597369737973897399740974197429743974497459746974797489749975097519752975397549755975697579758975997609761976297639764976597669767976897699770977197729773977497759776977797789779978097819782978397849785978697879788978997909791979297939794979597969797979897999800980198029803980498059806980798089809981098119812981398149815981698179818981998209821982298239824982598269827982898299830983198329833983498359836983798389839984098419842984398449845984698479848984998509851985298539854985598569857985898599860986198629863986498659866986798689869987098719872987398749875987698779878987998809881988298839884988598869887988898899890989198929893989498959896989798989899990099019902990399049905990699079908990999109911991299139914991599169917991899199920992199229923992499259926992799289929993099319932993399349935993699379938993999409941994299439944994599469947994899499950995199529953995499559956995799589959996099619962996399649965996699679968996999709971997299739974997599769977997899799980998199829983998499859986998799889989999099919992999399949995999699979998999910000100011000210003100041000510006100071000810009100101001110012100131001410015100161001710018100191002010021100221002310024100251002610027100281002910030100311003210033100341003510036100371003810039100401004110042100431004410045100461004710048100491005010051100521005310054100551005610057100581005910060100611006210063100641006510066100671006810069100701007110072100731007410075100761007710078100791008010081100821008310084100851008610087100881008910090100911009210093100941009510096100971009810099101001010110102101031010410105101061010710108101091011010111101121011310114101151011610117101181011910120101211012210123101241012510126101271012810129101301013110132101331013410135101361013710138101391014010141101421014310144101451014610147101481014910150101511015210153101541015510156101571015810159101601016110162101631016410165101661016710168101691017010171101721017310174101751017610177101781017910180101811018210183101841018510186101871018810189101901019110192101931019410195101961019710198101991020010201102021020310204102051020610207102081020910210102111021210213102141021510216102171021810219102201022110222102231022410225102261022710228102291023010231102321023310234102351023610237102381023910240102411024210243102441024510246102471024810249102501025110252102531025410255102561025710258102591026010261102621026310264102651026610267102681026910270102711027210273102741027510276102771027810279102801028110282102831028410285102861028710288102891029010291102921029310294102951029610297102981029910300103011030210303103041030510306103071030810309103101031110312103131031410315103161031710318103191032010321103221032310324103251032610327103281032910330103311033210333103341033510336103371033810339103401034110342103431034410345103461034710348103491035010351103521035310354103551035610357103581035910360103611036210363103641036510366103671036810369103701037110372103731037410375103761037710378103791038010381103821038310384103851038610387103881038910390103911039210393103941039510396103971039810399104001040110402104031040410405104061040710408104091041010411104121041310414104151041610417104181041910420104211042210423104241042510426104271042810429104301043110432104331043410435104361043710438104391044010441104421044310444104451044610447104481044910450104511045210453104541045510456104571045810459104601046110462104631046410465104661046710468104691047010471104721047310474104751047610477104781047910480104811048210483104841048510486104871048810489104901049110492104931049410495104961049710498104991050010501105021050310504105051050610507105081050910510105111051210513105141051510516105171051810519105201052110522105231052410525105261052710528105291053010531105321053310534105351053610537105381053910540105411054210543105441054510546105471054810549105501055110552105531055410555105561055710558105591056010561105621056310564105651056610567105681056910570105711057210573105741057510576105771057810579105801058110582105831058410585105861058710588105891059010591105921059310594105951059610597105981059910600106011060210603106041060510606106071060810609106101061110612106131061410615106161061710618106191062010621106221062310624106251062610627106281062910630106311063210633106341063510636106371063810639106401064110642106431064410645106461064710648106491065010651106521065310654106551065610657106581065910660106611066210663106641066510666106671066810669106701067110672106731067410675106761067710678106791068010681106821068310684106851068610687106881068910690106911069210693106941069510696106971069810699107001070110702107031070410705107061070710708107091071010711107121071310714107151071610717107181071910720107211072210723107241072510726107271072810729107301073110732107331073410735107361073710738107391074010741107421074310744107451074610747107481074910750107511075210753107541075510756107571075810759107601076110762107631076410765107661076710768107691077010771107721077310774107751077610777107781077910780107811078210783107841078510786107871078810789107901079110792107931079410795107961079710798107991080010801108021080310804108051080610807108081080910810108111081210813108141081510816108171081810819108201082110822108231082410825108261082710828108291083010831108321083310834108351083610837108381083910840108411084210843108441084510846108471084810849108501085110852108531085410855108561085710858108591086010861108621086310864108651086610867108681086910870108711087210873108741087510876108771087810879108801088110882108831088410885108861088710888108891089010891108921089310894108951089610897108981089910900109011090210903109041090510906109071090810909109101091110912109131091410915109161091710918109191092010921109221092310924109251092610927109281092910930109311093210933109341093510936109371093810939109401094110942109431094410945109461094710948109491095010951109521095310954109551095610957109581095910960109611096210963109641096510966109671096810969109701097110972109731097410975109761097710978109791098010981109821098310984109851098610987109881098910990109911099210993109941099510996109971099810999110001100111002110031100411005110061100711008110091101011011110121101311014110151101611017110181101911020110211102211023110241102511026110271102811029110301103111032110331103411035110361103711038110391104011041110421104311044110451104611047110481104911050110511105211053110541105511056110571105811059110601106111062110631106411065110661106711068110691107011071110721107311074110751107611077110781107911080110811108211083110841108511086110871108811089110901109111092110931109411095110961109711098110991110011101111021110311104111051110611107111081110911110111111111211113111141111511116111171111811119111201112111122111231112411125111261112711128111291113011131111321113311134111351113611137111381113911140111411114211143111441114511146111471114811149111501115111152111531115411155111561115711158111591116011161111621116311164111651116611167111681116911170111711117211173111741117511176111771117811179111801118111182111831118411185111861118711188111891119011191111921119311194111951119611197111981119911200112011120211203112041120511206112071120811209112101121111212112131121411215112161121711218112191122011221112221122311224112251122611227112281122911230112311123211233112341123511236112371123811239112401124111242112431124411245112461124711248112491125011251112521125311254112551125611257112581125911260112611126211263112641126511266112671126811269112701127111272112731127411275112761127711278112791128011281112821128311284112851128611287112881128911290112911129211293112941129511296112971129811299113001130111302113031130411305113061130711308113091131011311113121131311314113151131611317113181131911320113211132211323113241132511326113271132811329113301133111332113331133411335113361133711338113391134011341113421134311344113451134611347113481134911350113511135211353113541135511356113571135811359113601136111362113631136411365113661136711368113691137011371113721137311374113751137611377113781137911380113811138211383113841138511386113871138811389113901139111392113931139411395113961139711398113991140011401114021140311404114051140611407114081140911410114111141211413114141141511416114171141811419114201142111422114231142411425114261142711428114291143011431114321143311434114351143611437114381143911440114411144211443114441144511446114471144811449114501145111452114531145411455114561145711458114591146011461114621146311464114651146611467114681146911470114711147211473114741147511476114771147811479114801148111482114831148411485114861148711488114891149011491114921149311494114951149611497114981149911500115011150211503115041150511506115071150811509115101151111512115131151411515115161151711518115191152011521115221152311524115251152611527115281152911530115311153211533115341153511536115371153811539115401154111542115431154411545115461154711548115491155011551115521155311554115551155611557115581155911560115611156211563115641156511566115671156811569115701157111572115731157411575115761157711578115791158011581115821158311584115851158611587115881158911590115911159211593115941159511596115971159811599116001160111602116031160411605116061160711608116091161011611116121161311614116151161611617116181161911620116211162211623116241162511626116271162811629116301163111632116331163411635116361163711638116391164011641116421164311644116451164611647116481164911650116511165211653116541165511656116571165811659116601166111662116631166411665116661166711668116691167011671116721167311674116751167611677116781167911680116811168211683116841168511686116871168811689116901169111692116931169411695116961169711698116991170011701117021170311704117051170611707117081170911710117111171211713117141171511716117171171811719117201172111722117231172411725117261172711728117291173011731117321173311734117351173611737117381173911740117411174211743117441174511746117471174811749117501175111752117531175411755117561175711758117591176011761117621176311764117651176611767117681176911770117711177211773117741177511776117771177811779117801178111782117831178411785117861178711788117891179011791117921179311794117951179611797117981179911800118011180211803118041180511806118071180811809118101181111812118131181411815118161181711818118191182011821118221182311824118251182611827118281182911830118311183211833118341183511836118371183811839118401184111842118431184411845118461184711848118491185011851118521185311854118551185611857118581185911860118611186211863118641186511866118671186811869118701187111872118731187411875118761187711878118791188011881118821188311884118851188611887118881188911890118911189211893118941189511896118971189811899119001190111902119031190411905119061190711908119091191011911119121191311914119151191611917119181191911920119211192211923119241192511926119271192811929119301193111932119331193411935119361193711938119391194011941119421194311944119451194611947119481194911950119511195211953119541195511956119571195811959119601196111962119631196411965119661196711968119691197011971119721197311974119751197611977119781197911980119811198211983119841198511986119871198811989119901199111992119931199411995119961199711998119991200012001120021200312004120051200612007120081200912010120111201212013120141201512016120171201812019120201202112022120231202412025120261202712028120291203012031120321203312034120351203612037120381203912040120411204212043120441204512046120471204812049120501205112052120531205412055120561205712058120591206012061120621206312064120651206612067120681206912070120711207212073120741207512076120771207812079120801208112082120831208412085120861208712088120891209012091120921209312094120951209612097120981209912100121011210212103121041210512106121071210812109121101211112112121131211412115121161211712118121191212012121121221212312124121251212612127121281212912130121311213212133121341213512136121371213812139121401214112142121431214412145121461214712148121491215012151121521215312154121551215612157121581215912160121611216212163121641216512166121671216812169121701217112172121731217412175121761217712178121791218012181121821218312184121851218612187121881218912190121911219212193121941219512196121971219812199122001220112202122031220412205122061220712208122091221012211122121221312214122151221612217122181221912220122211222212223122241222512226122271222812229122301223112232122331223412235122361223712238122391224012241122421224312244122451224612247122481224912250122511225212253122541225512256122571225812259122601226112262122631226412265122661226712268122691227012271122721227312274122751227612277122781227912280122811228212283122841228512286122871228812289122901229112292122931229412295122961229712298122991230012301123021230312304123051230612307123081230912310123111231212313123141231512316123171231812319123201232112322123231232412325123261232712328123291233012331123321233312334123351233612337123381233912340123411234212343123441234512346123471234812349123501235112352123531235412355123561235712358123591236012361123621236312364123651236612367123681236912370123711237212373123741237512376123771237812379123801238112382123831238412385123861238712388123891239012391123921239312394123951239612397123981239912400124011240212403124041240512406124071240812409124101241112412124131241412415124161241712418124191242012421124221242312424124251242612427124281242912430124311243212433124341243512436124371243812439124401244112442124431244412445124461244712448124491245012451124521245312454124551245612457124581245912460124611246212463124641246512466124671246812469124701247112472124731247412475124761247712478124791248012481124821248312484124851248612487124881248912490124911249212493124941249512496124971249812499125001250112502125031250412505125061250712508125091251012511125121251312514125151251612517125181251912520125211252212523125241252512526125271252812529125301253112532125331253412535125361253712538125391254012541125421254312544125451254612547125481254912550125511255212553125541255512556125571255812559125601256112562125631256412565125661256712568125691257012571125721257312574125751257612577125781257912580125811258212583125841258512586125871258812589125901259112592125931259412595125961259712598125991260012601126021260312604126051260612607126081260912610126111261212613126141261512616126171261812619126201262112622126231262412625126261262712628126291263012631126321263312634126351263612637126381263912640126411264212643126441264512646126471264812649126501265112652126531265412655126561265712658126591266012661126621266312664126651266612667126681266912670126711267212673126741267512676126771267812679126801268112682126831268412685126861268712688126891269012691126921269312694126951269612697126981269912700127011270212703127041270512706127071270812709127101271112712127131271412715127161271712718127191272012721127221272312724127251272612727127281272912730127311273212733127341273512736127371273812739127401274112742127431274412745127461274712748127491275012751127521275312754127551275612757127581275912760127611276212763127641276512766127671276812769127701277112772127731277412775127761277712778127791278012781127821278312784127851278612787127881278912790127911279212793127941279512796127971279812799128001280112802128031280412805128061280712808128091281012811128121281312814128151281612817128181281912820128211282212823128241282512826128271282812829128301283112832128331283412835128361283712838128391284012841128421284312844128451284612847128481284912850128511285212853128541285512856128571285812859128601286112862128631286412865128661286712868128691287012871128721287312874128751287612877128781287912880128811288212883128841288512886128871288812889128901289112892128931289412895128961289712898128991290012901129021290312904129051290612907129081290912910129111291212913129141291512916129171291812919129201292112922129231292412925129261292712928129291293012931129321293312934129351293612937129381293912940129411294212943129441294512946129471294812949129501295112952129531295412955129561295712958129591296012961129621296312964129651296612967129681296912970129711297212973129741297512976129771297812979129801298112982129831298412985129861298712988129891299012991129921299312994129951299612997129981299913000130011300213003130041300513006130071300813009130101301113012130131301413015130161301713018130191302013021130221302313024130251302613027130281302913030130311303213033130341303513036130371303813039130401304113042130431304413045130461304713048130491305013051130521305313054130551305613057130581305913060130611306213063130641306513066130671306813069130701307113072130731307413075130761307713078130791308013081130821308313084130851308613087130881308913090130911309213093130941309513096130971309813099131001310113102131031310413105131061310713108131091311013111131121311313114131151311613117131181311913120131211312213123131241312513126131271312813129131301313113132131331313413135131361313713138131391314013141131421314313144131451314613147131481314913150131511315213153131541315513156131571315813159131601316113162131631316413165131661316713168131691317013171131721317313174131751317613177131781317913180131811318213183131841318513186131871318813189131901319113192131931319413195131961319713198131991320013201132021320313204132051320613207132081320913210132111321213213132141321513216132171321813219132201322113222132231322413225132261322713228132291323013231132321323313234132351323613237132381323913240132411324213243132441324513246132471324813249132501325113252132531325413255132561325713258132591326013261132621326313264132651326613267132681326913270132711327213273132741327513276132771327813279132801328113282132831328413285132861328713288132891329013291132921329313294132951329613297132981329913300133011330213303133041330513306133071330813309133101331113312133131331413315133161331713318133191332013321133221332313324133251332613327133281332913330133311333213333133341333513336133371333813339133401334113342133431334413345133461334713348133491335013351133521335313354133551335613357133581335913360133611336213363133641336513366133671336813369133701337113372133731337413375133761337713378133791338013381133821338313384133851338613387133881338913390133911339213393133941339513396133971339813399134001340113402134031340413405134061340713408134091341013411134121341313414134151341613417134181341913420134211342213423134241342513426134271342813429134301343113432134331343413435134361343713438134391344013441134421344313444134451344613447134481344913450134511345213453134541345513456134571345813459134601346113462134631346413465134661346713468134691347013471134721347313474134751347613477134781347913480134811348213483134841348513486134871348813489134901349113492134931349413495134961349713498134991350013501135021350313504135051350613507135081350913510135111351213513135141351513516135171351813519135201352113522135231352413525135261352713528135291353013531135321353313534135351353613537135381353913540135411354213543135441354513546135471354813549135501355113552135531355413555135561355713558135591356013561135621356313564135651356613567135681356913570135711357213573135741357513576135771357813579135801358113582135831358413585135861358713588135891359013591135921359313594135951359613597135981359913600136011360213603136041360513606136071360813609136101361113612136131361413615136161361713618136191362013621136221362313624136251362613627136281362913630136311363213633136341363513636136371363813639136401364113642136431364413645136461364713648136491365013651136521365313654136551365613657136581365913660136611366213663136641366513666136671366813669136701367113672136731367413675136761367713678136791368013681136821368313684136851368613687136881368913690136911369213693136941369513696136971369813699137001370113702137031370413705137061370713708137091371013711137121371313714137151371613717137181371913720137211372213723137241372513726137271372813729137301373113732137331373413735137361373713738137391374013741137421374313744137451374613747137481374913750137511375213753137541375513756137571375813759137601376113762137631376413765137661376713768137691377013771137721377313774137751377613777137781377913780137811378213783137841378513786137871378813789137901379113792137931379413795137961379713798137991380013801138021380313804138051380613807138081380913810138111381213813138141381513816138171381813819138201382113822138231382413825138261382713828138291383013831138321383313834138351383613837138381383913840138411384213843138441384513846138471384813849138501385113852138531385413855138561385713858138591386013861138621386313864138651386613867138681386913870138711387213873138741387513876138771387813879138801388113882138831388413885138861388713888138891389013891138921389313894138951389613897138981389913900139011390213903139041390513906139071390813909139101391113912139131391413915139161391713918139191392013921139221392313924139251392613927139281392913930139311393213933139341393513936139371393813939139401394113942139431394413945139461394713948139491395013951139521395313954139551395613957139581395913960139611396213963139641396513966139671396813969139701397113972139731397413975139761397713978139791398013981139821398313984139851398613987139881398913990139911399213993139941399513996139971399813999140001400114002140031400414005140061400714008140091401014011140121401314014140151401614017140181401914020140211402214023140241402514026140271402814029140301403114032140331403414035140361403714038140391404014041140421404314044140451404614047140481404914050140511405214053140541405514056140571405814059140601406114062140631406414065140661406714068140691407014071140721407314074140751407614077140781407914080140811408214083140841408514086140871408814089140901409114092140931409414095140961409714098140991410014101141021410314104141051410614107141081410914110141111411214113141141411514116141171411814119141201412114122141231412414125141261412714128141291413014131141321413314134141351413614137141381413914140141411414214143141441414514146141471414814149141501415114152141531415414155141561415714158141591416014161141621416314164141651416614167141681416914170141711417214173141741417514176141771417814179141801418114182141831418414185141861418714188141891419014191141921419314194141951419614197141981419914200142011420214203142041420514206142071420814209142101421114212142131421414215142161421714218142191422014221142221422314224142251422614227142281422914230142311423214233142341423514236142371423814239142401424114242142431424414245142461424714248142491425014251142521425314254142551425614257142581425914260142611426214263142641426514266142671426814269142701427114272142731427414275142761427714278142791428014281142821428314284142851428614287142881428914290142911429214293142941429514296142971429814299143001430114302143031430414305143061430714308143091431014311143121431314314143151431614317143181431914320143211432214323143241432514326143271432814329143301433114332143331433414335143361433714338143391434014341143421434314344143451434614347143481434914350143511435214353143541435514356143571435814359143601436114362143631436414365143661436714368143691437014371143721437314374143751437614377143781437914380143811438214383143841438514386143871438814389143901439114392143931439414395143961439714398143991440014401144021440314404144051440614407144081440914410144111441214413144141441514416144171441814419144201442114422144231442414425144261442714428144291443014431144321443314434144351443614437144381443914440144411444214443144441444514446144471444814449144501445114452144531445414455144561445714458144591446014461144621446314464144651446614467144681446914470144711447214473144741447514476144771447814479144801448114482144831448414485144861448714488144891449014491144921449314494144951449614497144981449914500145011450214503145041450514506145071450814509145101451114512145131451414515145161451714518145191452014521145221452314524145251452614527145281452914530145311453214533145341453514536145371453814539145401454114542145431454414545145461454714548145491455014551145521455314554145551455614557145581455914560145611456214563145641456514566145671456814569145701457114572145731457414575145761457714578145791458014581145821458314584145851458614587145881458914590145911459214593145941459514596145971459814599146001460114602146031460414605146061460714608146091461014611146121461314614146151461614617146181461914620146211462214623146241462514626146271462814629146301463114632146331463414635146361463714638146391464014641146421464314644146451464614647146481464914650146511465214653146541465514656146571465814659146601466114662146631466414665146661466714668146691467014671146721467314674146751467614677146781467914680146811468214683146841468514686146871468814689146901469114692146931469414695146961469714698146991470014701147021470314704147051470614707147081470914710147111471214713147141471514716147171471814719147201472114722147231472414725147261472714728147291473014731147321473314734147351473614737147381473914740147411474214743147441474514746147471474814749147501475114752147531475414755147561475714758147591476014761147621476314764147651476614767147681476914770147711477214773147741477514776147771477814779147801478114782147831478414785147861478714788147891479014791147921479314794147951479614797147981479914800148011480214803148041480514806148071480814809148101481114812148131481414815148161481714818148191482014821148221482314824148251482614827148281482914830148311483214833148341483514836148371483814839148401484114842148431484414845148461484714848148491485014851148521485314854148551485614857148581485914860148611486214863148641486514866148671486814869148701487114872148731487414875148761487714878148791488014881148821488314884148851488614887148881488914890148911489214893148941489514896148971489814899149001490114902149031490414905149061490714908149091491014911149121491314914149151491614917149181491914920149211492214923149241492514926149271492814929149301493114932149331493414935149361493714938149391494014941149421494314944149451494614947149481494914950149511495214953149541495514956149571495814959149601496114962149631496414965149661496714968149691497014971149721497314974149751497614977149781497914980149811498214983149841498514986149871498814989149901499114992149931499414995149961499714998149991500015001150021500315004150051500615007150081500915010150111501215013150141501515016150171501815019150201502115022150231502415025150261502715028150291503015031150321503315034150351503615037150381503915040150411504215043150441504515046150471504815049150501505115052150531505415055150561505715058150591506015061150621506315064150651506615067150681506915070150711507215073150741507515076150771507815079150801508115082150831508415085150861508715088150891509015091150921509315094150951509615097150981509915100151011510215103151041510515106151071510815109151101511115112151131511415115151161511715118151191512015121151221512315124151251512615127151281512915130151311513215133151341513515136151371513815139151401514115142151431514415145151461514715148151491515015151151521515315154151551515615157151581515915160151611516215163151641516515166151671516815169151701517115172151731517415175151761517715178151791518015181151821518315184151851518615187151881518915190151911519215193151941519515196151971519815199152001520115202152031520415205152061520715208152091521015211152121521315214152151521615217152181521915220152211522215223152241522515226152271522815229152301523115232152331523415235152361523715238152391524015241152421524315244152451524615247152481524915250152511525215253152541525515256152571525815259152601526115262152631526415265152661526715268152691527015271152721527315274152751527615277152781527915280152811528215283152841528515286152871528815289152901529115292152931529415295152961529715298152991530015301153021530315304153051530615307153081530915310153111531215313153141531515316153171531815319153201532115322153231532415325153261532715328153291533015331153321533315334153351533615337153381533915340153411534215343153441534515346153471534815349153501535115352153531535415355153561535715358153591536015361153621536315364153651536615367153681536915370153711537215373153741537515376153771537815379153801538115382153831538415385153861538715388153891539015391153921539315394153951539615397153981539915400154011540215403154041540515406154071540815409154101541115412154131541415415154161541715418154191542015421154221542315424154251542615427154281542915430154311543215433154341543515436154371543815439154401544115442154431544415445154461544715448154491545015451154521545315454154551545615457154581545915460154611546215463154641546515466154671546815469154701547115472154731547415475154761547715478154791548015481154821548315484154851548615487154881548915490154911549215493154941549515496154971549815499155001550115502155031550415505155061550715508155091551015511155121551315514155151551615517155181551915520155211552215523155241552515526155271552815529155301553115532155331553415535155361553715538155391554015541155421554315544155451554615547155481554915550155511555215553155541555515556155571555815559155601556115562155631556415565155661556715568155691557015571155721557315574155751557615577155781557915580155811558215583155841558515586155871558815589155901559115592155931559415595155961559715598155991560015601156021560315604156051560615607156081560915610156111561215613156141561515616156171561815619156201562115622156231562415625156261562715628156291563015631156321563315634156351563615637156381563915640156411564215643156441564515646156471564815649156501565115652156531565415655156561565715658156591566015661156621566315664156651566615667156681566915670156711567215673156741567515676156771567815679156801568115682156831568415685156861568715688156891569015691156921569315694156951569615697156981569915700157011570215703157041570515706157071570815709157101571115712157131571415715157161571715718157191572015721157221572315724157251572615727157281572915730157311573215733157341573515736157371573815739157401574115742157431574415745157461574715748157491575015751157521575315754157551575615757157581575915760157611576215763157641576515766157671576815769157701577115772157731577415775157761577715778157791578015781157821578315784157851578615787157881578915790157911579215793157941579515796157971579815799158001580115802158031580415805158061580715808158091581015811158121581315814158151581615817158181581915820158211582215823158241582515826158271582815829158301583115832158331583415835158361583715838158391584015841158421584315844158451584615847158481584915850158511585215853158541585515856158571585815859158601586115862158631586415865158661586715868158691587015871158721587315874158751587615877158781587915880158811588215883158841588515886158871588815889158901589115892158931589415895158961589715898158991590015901159021590315904159051590615907159081590915910159111591215913159141591515916159171591815919159201592115922159231592415925159261592715928159291593015931159321593315934159351593615937159381593915940159411594215943159441594515946159471594815949159501595115952159531595415955159561595715958159591596015961159621596315964159651596615967159681596915970159711597215973159741597515976159771597815979159801598115982159831598415985159861598715988159891599015991159921599315994159951599615997159981599916000160011600216003160041600516006160071600816009160101601116012160131601416015160161601716018160191602016021160221602316024160251602616027160281602916030160311603216033160341603516036160371603816039160401604116042160431604416045160461604716048160491605016051160521605316054160551605616057160581605916060160611606216063160641606516066160671606816069160701607116072160731607416075160761607716078160791608016081160821608316084160851608616087160881608916090160911609216093160941609516096160971609816099161001610116102161031610416105161061610716108161091611016111161121611316114161151611616117161181611916120161211612216123161241612516126161271612816129161301613116132161331613416135161361613716138161391614016141161421614316144161451614616147161481614916150161511615216153161541615516156161571615816159161601616116162161631616416165161661616716168161691617016171161721617316174161751617616177161781617916180161811618216183161841618516186161871618816189161901619116192161931619416195161961619716198161991620016201162021620316204162051620616207162081620916210162111621216213162141621516216162171621816219162201622116222162231622416225162261622716228162291623016231162321623316234162351623616237162381623916240162411624216243162441624516246162471624816249162501625116252162531625416255162561625716258162591626016261162621626316264162651626616267162681626916270162711627216273162741627516276162771627816279162801628116282162831628416285162861628716288162891629016291162921629316294162951629616297162981629916300163011630216303163041630516306163071630816309163101631116312163131631416315163161631716318163191632016321163221632316324163251632616327163281632916330163311633216333163341633516336163371633816339163401634116342163431634416345163461634716348163491635016351163521635316354163551635616357163581635916360163611636216363163641636516366163671636816369163701637116372163731637416375163761637716378163791638016381163821638316384163851638616387163881638916390163911639216393163941639516396163971639816399164001640116402164031640416405164061640716408164091641016411164121641316414164151641616417164181641916420164211642216423164241642516426164271642816429164301643116432164331643416435164361643716438164391644016441164421644316444164451644616447164481644916450164511645216453164541645516456164571645816459164601646116462164631646416465164661646716468164691647016471164721647316474164751647616477164781647916480164811648216483164841648516486164871648816489164901649116492164931649416495164961649716498164991650016501165021650316504165051650616507165081650916510165111651216513165141651516516165171651816519165201652116522165231652416525165261652716528165291653016531165321653316534165351653616537165381653916540165411654216543165441654516546165471654816549165501655116552165531655416555165561655716558165591656016561165621656316564165651656616567165681656916570165711657216573165741657516576165771657816579165801658116582165831658416585165861658716588165891659016591165921659316594165951659616597165981659916600166011660216603166041660516606166071660816609166101661116612166131661416615166161661716618166191662016621166221662316624166251662616627166281662916630166311663216633166341663516636166371663816639166401664116642166431664416645166461664716648166491665016651166521665316654166551665616657166581665916660166611666216663166641666516666166671666816669166701667116672166731667416675166761667716678166791668016681166821668316684166851668616687166881668916690166911669216693166941669516696166971669816699167001670116702167031670416705167061670716708167091671016711167121671316714167151671616717167181671916720167211672216723167241672516726167271672816729167301673116732167331673416735167361673716738167391674016741167421674316744167451674616747167481674916750167511675216753167541675516756167571675816759167601676116762167631676416765167661676716768167691677016771167721677316774167751677616777167781677916780167811678216783167841678516786167871678816789167901679116792167931679416795167961679716798167991680016801168021680316804168051680616807168081680916810168111681216813168141681516816168171681816819168201682116822168231682416825168261682716828168291683016831168321683316834168351683616837168381683916840168411684216843168441684516846168471684816849168501685116852168531685416855168561685716858168591686016861168621686316864168651686616867168681686916870168711687216873168741687516876168771687816879168801688116882168831688416885168861688716888168891689016891168921689316894168951689616897168981689916900169011690216903169041690516906169071690816909169101691116912169131691416915169161691716918169191692016921169221692316924169251692616927169281692916930169311693216933169341693516936169371693816939169401694116942169431694416945169461694716948169491695016951169521695316954169551695616957169581695916960169611696216963169641696516966169671696816969169701697116972169731697416975169761697716978169791698016981169821698316984169851698616987169881698916990169911699216993169941699516996169971699816999170001700117002170031700417005170061700717008170091701017011170121701317014170151701617017170181701917020170211702217023170241702517026170271702817029170301703117032170331703417035170361703717038170391704017041170421704317044170451704617047170481704917050170511705217053170541705517056170571705817059170601706117062170631706417065170661706717068170691707017071170721707317074170751707617077170781707917080170811708217083170841708517086170871708817089170901709117092170931709417095170961709717098170991710017101171021710317104171051710617107171081710917110171111711217113171141711517116171171711817119171201712117122171231712417125171261712717128171291713017131171321713317134171351713617137171381713917140171411714217143171441714517146171471714817149171501715117152171531715417155171561715717158171591716017161171621716317164171651716617167171681716917170171711717217173171741717517176171771717817179171801718117182171831718417185171861718717188171891719017191171921719317194171951719617197171981719917200172011720217203172041720517206172071720817209172101721117212172131721417215172161721717218172191722017221172221722317224172251722617227172281722917230172311723217233172341723517236172371723817239172401724117242172431724417245172461724717248172491725017251172521725317254172551725617257172581725917260172611726217263172641726517266172671726817269172701727117272172731727417275172761727717278172791728017281172821728317284172851728617287172881728917290172911729217293172941729517296172971729817299173001730117302173031730417305173061730717308173091731017311173121731317314173151731617317173181731917320173211732217323173241732517326173271732817329173301733117332173331733417335173361733717338173391734017341173421734317344173451734617347173481734917350173511735217353173541735517356173571735817359173601736117362173631736417365173661736717368173691737017371173721737317374173751737617377173781737917380173811738217383173841738517386173871738817389173901739117392173931739417395173961739717398173991740017401174021740317404174051740617407174081740917410174111741217413174141741517416174171741817419174201742117422174231742417425174261742717428174291743017431174321743317434174351743617437174381743917440174411744217443174441744517446174471744817449174501745117452174531745417455174561745717458174591746017461174621746317464174651746617467174681746917470174711747217473174741747517476174771747817479174801748117482174831748417485174861748717488174891749017491174921749317494174951749617497174981749917500175011750217503175041750517506175071750817509175101751117512175131751417515175161751717518175191752017521175221752317524175251752617527175281752917530175311753217533175341753517536175371753817539175401754117542175431754417545175461754717548175491755017551175521755317554175551755617557175581755917560175611756217563175641756517566175671756817569175701757117572175731757417575175761757717578175791758017581175821758317584175851758617587175881758917590175911759217593175941759517596175971759817599176001760117602176031760417605176061760717608176091761017611176121761317614176151761617617176181761917620176211762217623176241762517626176271762817629176301763117632176331763417635176361763717638176391764017641176421764317644176451764617647176481764917650176511765217653176541765517656176571765817659176601766117662176631766417665176661766717668176691767017671176721767317674176751767617677176781767917680176811768217683176841768517686176871768817689176901769117692176931769417695176961769717698176991770017701177021770317704177051770617707177081770917710177111771217713177141771517716177171771817719177201772117722177231772417725177261772717728177291773017731177321773317734177351773617737177381773917740177411774217743177441774517746177471774817749177501775117752177531775417755177561775717758177591776017761177621776317764177651776617767177681776917770177711777217773177741777517776177771777817779177801778117782177831778417785177861778717788177891779017791177921779317794177951779617797177981779917800178011780217803178041780517806178071780817809178101781117812178131781417815178161781717818178191782017821178221782317824178251782617827178281782917830178311783217833178341783517836178371783817839178401784117842178431784417845178461784717848178491785017851178521785317854178551785617857178581785917860178611786217863178641786517866178671786817869178701787117872178731787417875178761787717878178791788017881178821788317884178851788617887178881788917890178911789217893178941789517896178971789817899179001790117902179031790417905179061790717908179091791017911179121791317914179151791617917179181791917920179211792217923179241792517926179271792817929179301793117932179331793417935179361793717938179391794017941179421794317944179451794617947179481794917950179511795217953179541795517956179571795817959179601796117962179631796417965179661796717968179691797017971179721797317974179751797617977179781797917980179811798217983179841798517986179871798817989179901799117992179931799417995179961799717998179991800018001180021800318004180051800618007180081800918010180111801218013180141801518016180171801818019180201802118022180231802418025180261802718028180291803018031180321803318034180351803618037180381803918040180411804218043180441804518046180471804818049180501805118052180531805418055180561805718058180591806018061180621806318064180651806618067180681806918070180711807218073180741807518076180771807818079180801808118082180831808418085180861808718088180891809018091180921809318094180951809618097180981809918100181011810218103181041810518106181071810818109181101811118112181131811418115181161811718118181191812018121181221812318124181251812618127181281812918130181311813218133181341813518136181371813818139181401814118142181431814418145181461814718148181491815018151181521815318154181551815618157181581815918160181611816218163181641816518166181671816818169181701817118172181731817418175181761817718178181791818018181181821818318184181851818618187181881818918190181911819218193181941819518196181971819818199182001820118202182031820418205182061820718208182091821018211182121821318214182151821618217182181821918220182211822218223182241822518226182271822818229182301823118232182331823418235182361823718238182391824018241182421824318244182451824618247182481824918250182511825218253182541825518256182571825818259182601826118262182631826418265182661826718268182691827018271182721827318274182751827618277182781827918280182811828218283182841828518286182871828818289182901829118292182931829418295182961829718298182991830018301183021830318304183051830618307183081830918310183111831218313183141831518316183171831818319183201832118322183231832418325183261832718328183291833018331183321833318334183351833618337183381833918340183411834218343183441834518346183471834818349183501835118352183531835418355183561835718358183591836018361183621836318364183651836618367183681836918370183711837218373183741837518376183771837818379183801838118382183831838418385183861838718388183891839018391183921839318394183951839618397183981839918400184011840218403184041840518406184071840818409184101841118412184131841418415184161841718418184191842018421184221842318424184251842618427184281842918430184311843218433184341843518436184371843818439184401844118442184431844418445184461844718448184491845018451184521845318454184551845618457184581845918460184611846218463184641846518466184671846818469184701847118472184731847418475184761847718478184791848018481184821848318484184851848618487184881848918490184911849218493184941849518496184971849818499185001850118502185031850418505185061850718508185091851018511185121851318514185151851618517185181851918520185211852218523185241852518526185271852818529185301853118532185331853418535185361853718538185391854018541185421854318544185451854618547185481854918550185511855218553185541855518556185571855818559185601856118562185631856418565185661856718568185691857018571185721857318574185751857618577185781857918580185811858218583185841858518586185871858818589185901859118592185931859418595185961859718598185991860018601186021860318604186051860618607186081860918610186111861218613186141861518616186171861818619186201862118622186231862418625186261862718628186291863018631186321863318634186351863618637186381863918640186411864218643186441864518646186471864818649186501865118652186531865418655186561865718658186591866018661186621866318664186651866618667186681866918670186711867218673186741867518676186771867818679186801868118682186831868418685186861868718688186891869018691186921869318694186951869618697186981869918700187011870218703187041870518706187071870818709187101871118712187131871418715187161871718718187191872018721187221872318724187251872618727187281872918730187311873218733187341873518736187371873818739187401874118742187431874418745187461874718748187491875018751187521875318754187551875618757187581875918760187611876218763187641876518766187671876818769187701877118772187731877418775187761877718778187791878018781187821878318784187851878618787187881878918790187911879218793187941879518796187971879818799188001880118802188031880418805188061880718808188091881018811188121881318814188151881618817188181881918820188211882218823188241882518826188271882818829188301883118832188331883418835188361883718838188391884018841188421884318844188451884618847188481884918850188511885218853188541885518856188571885818859188601886118862188631886418865188661886718868188691887018871188721887318874188751887618877188781887918880188811888218883188841888518886188871888818889188901889118892188931889418895188961889718898188991890018901189021890318904189051890618907189081890918910189111891218913189141891518916189171891818919189201892118922189231892418925189261892718928189291893018931189321893318934189351893618937189381893918940189411894218943189441894518946189471894818949189501895118952189531895418955189561895718958189591896018961189621896318964189651896618967189681896918970189711897218973189741897518976189771897818979189801898118982189831898418985189861898718988189891899018991189921899318994189951899618997189981899919000190011900219003190041900519006190071900819009190101901119012190131901419015190161901719018190191902019021190221902319024190251902619027190281902919030190311903219033190341903519036190371903819039190401904119042190431904419045190461904719048190491905019051190521905319054190551905619057190581905919060190611906219063190641906519066190671906819069190701907119072190731907419075190761907719078190791908019081190821908319084190851908619087190881908919090190911909219093190941909519096190971909819099191001910119102191031910419105191061910719108191091911019111191121911319114191151911619117191181911919120191211912219123191241912519126191271912819129191301913119132191331913419135191361913719138191391914019141191421914319144191451914619147191481914919150191511915219153191541915519156191571915819159191601916119162191631916419165191661916719168191691917019171191721917319174191751917619177191781917919180191811918219183191841918519186191871918819189191901919119192191931919419195191961919719198191991920019201192021920319204192051920619207192081920919210192111921219213192141921519216192171921819219192201922119222192231922419225192261922719228192291923019231192321923319234192351923619237192381923919240192411924219243192441924519246192471924819249192501925119252192531925419255192561925719258192591926019261192621926319264192651926619267192681926919270192711927219273192741927519276192771927819279192801928119282192831928419285192861928719288192891929019291192921929319294192951929619297192981929919300193011930219303193041930519306193071930819309193101931119312193131931419315193161931719318193191932019321193221932319324193251932619327193281932919330193311933219333193341933519336193371933819339193401934119342193431934419345193461934719348193491935019351193521935319354193551935619357193581935919360193611936219363193641936519366193671936819369193701937119372193731937419375193761937719378193791938019381193821938319384193851938619387193881938919390193911939219393193941939519396193971939819399194001940119402194031940419405194061940719408194091941019411194121941319414194151941619417194181941919420194211942219423194241942519426194271942819429194301943119432194331943419435194361943719438194391944019441194421944319444194451944619447194481944919450194511945219453194541945519456194571945819459194601946119462194631946419465194661946719468194691947019471194721947319474194751947619477194781947919480194811948219483194841948519486194871948819489194901949119492194931949419495194961949719498194991950019501195021950319504195051950619507195081950919510195111951219513195141951519516195171951819519195201952119522195231952419525195261952719528195291953019531195321953319534195351953619537195381953919540195411954219543195441954519546195471954819549195501955119552195531955419555195561955719558195591956019561195621956319564195651956619567195681956919570195711957219573195741957519576195771957819579195801958119582195831958419585195861958719588195891959019591195921959319594195951959619597195981959919600196011960219603196041960519606196071960819609196101961119612196131961419615196161961719618196191962019621196221962319624196251962619627196281962919630196311963219633196341963519636196371963819639196401964119642196431964419645196461964719648196491965019651196521965319654196551965619657196581965919660196611966219663196641966519666196671966819669196701967119672196731967419675196761967719678196791968019681196821968319684196851968619687196881968919690196911969219693196941969519696196971969819699197001970119702197031970419705197061970719708197091971019711197121971319714197151971619717197181971919720197211972219723197241972519726197271972819729197301973119732197331973419735197361973719738197391974019741197421974319744197451974619747197481974919750197511975219753197541975519756197571975819759197601976119762197631976419765197661976719768197691977019771197721977319774197751977619777197781977919780197811978219783197841978519786197871978819789197901979119792197931979419795197961979719798197991980019801198021980319804198051980619807198081980919810198111981219813198141981519816198171981819819198201982119822198231982419825198261982719828198291983019831198321983319834198351983619837198381983919840198411984219843198441984519846198471984819849198501985119852198531985419855198561985719858198591986019861198621986319864198651986619867198681986919870198711987219873198741987519876198771987819879198801988119882198831988419885198861988719888198891989019891198921989319894198951989619897198981989919900199011990219903199041990519906199071990819909199101991119912199131991419915199161991719918199191992019921199221992319924199251992619927199281992919930199311993219933199341993519936199371993819939199401994119942199431994419945199461994719948199491995019951199521995319954199551995619957199581995919960199611996219963199641996519966199671996819969199701997119972199731997419975199761997719978199791998019981199821998319984199851998619987199881998919990199911999219993199941999519996199971999819999200002000120002200032000420005200062000720008200092001020011200122001320014200152001620017200182001920020200212002220023200242002520026200272002820029200302003120032200332003420035200362003720038200392004020041200422004320044200452004620047200482004920050200512005220053200542005520056200572005820059200602006120062200632006420065200662006720068200692007020071200722007320074200752007620077200782007920080200812008220083200842008520086200872008820089200902009120092200932009420095200962009720098200992010020101201022010320104201052010620107201082010920110201112011220113201142011520116201172011820119201202012120122201232012420125201262012720128201292013020131201322013320134201352013620137201382013920140201412014220143201442014520146201472014820149201502015120152201532015420155201562015720158201592016020161201622016320164201652016620167201682016920170201712017220173201742017520176201772017820179201802018120182201832018420185201862018720188201892019020191201922019320194201952019620197201982019920200202012020220203202042020520206202072020820209202102021120212202132021420215202162021720218202192022020221202222022320224202252022620227202282022920230202312023220233202342023520236202372023820239202402024120242202432024420245202462024720248202492025020251202522025320254202552025620257202582025920260202612026220263202642026520266202672026820269202702027120272202732027420275202762027720278202792028020281202822028320284202852028620287202882028920290202912029220293202942029520296202972029820299203002030120302203032030420305203062030720308203092031020311203122031320314203152031620317203182031920320203212032220323203242032520326203272032820329203302033120332203332033420335203362033720338203392034020341203422034320344203452034620347203482034920350203512035220353203542035520356203572035820359203602036120362203632036420365203662036720368203692037020371203722037320374203752037620377203782037920380203812038220383203842038520386203872038820389203902039120392203932039420395203962039720398203992040020401204022040320404204052040620407204082040920410204112041220413204142041520416204172041820419204202042120422204232042420425204262042720428204292043020431204322043320434204352043620437204382043920440204412044220443204442044520446204472044820449204502045120452204532045420455204562045720458204592046020461204622046320464204652046620467204682046920470204712047220473204742047520476204772047820479204802048120482204832048420485204862048720488204892049020491204922049320494204952049620497204982049920500205012050220503205042050520506205072050820509205102051120512205132051420515205162051720518205192052020521205222052320524205252052620527205282052920530205312053220533205342053520536205372053820539205402054120542205432054420545205462054720548205492055020551205522055320554205552055620557205582055920560205612056220563205642056520566205672056820569205702057120572205732057420575205762057720578205792058020581205822058320584205852058620587205882058920590205912059220593205942059520596205972059820599206002060120602206032060420605206062060720608206092061020611206122061320614206152061620617206182061920620206212062220623206242062520626206272062820629206302063120632206332063420635206362063720638206392064020641206422064320644206452064620647206482064920650206512065220653206542065520656206572065820659206602066120662206632066420665206662066720668206692067020671206722067320674206752067620677206782067920680206812068220683206842068520686206872068820689206902069120692206932069420695206962069720698206992070020701207022070320704207052070620707207082070920710207112071220713207142071520716207172071820719207202072120722207232072420725207262072720728207292073020731207322073320734207352073620737207382073920740207412074220743207442074520746207472074820749207502075120752207532075420755207562075720758207592076020761207622076320764207652076620767207682076920770207712077220773207742077520776207772077820779207802078120782207832078420785207862078720788207892079020791207922079320794207952079620797207982079920800208012080220803208042080520806208072080820809208102081120812208132081420815208162081720818208192082020821208222082320824208252082620827208282082920830208312083220833208342083520836208372083820839208402084120842208432084420845208462084720848208492085020851208522085320854208552085620857208582085920860208612086220863208642086520866208672086820869208702087120872208732087420875208762087720878208792088020881208822088320884208852088620887208882088920890208912089220893208942089520896208972089820899209002090120902209032090420905209062090720908209092091020911209122091320914209152091620917209182091920920209212092220923209242092520926209272092820929209302093120932209332093420935209362093720938209392094020941209422094320944209452094620947209482094920950209512095220953209542095520956209572095820959209602096120962209632096420965209662096720968209692097020971209722097320974209752097620977209782097920980209812098220983209842098520986209872098820989209902099120992209932099420995209962099720998209992100021001210022100321004210052100621007210082100921010210112101221013210142101521016210172101821019210202102121022210232102421025210262102721028210292103021031210322103321034210352103621037210382103921040210412104221043210442104521046210472104821049210502105121052210532105421055210562105721058210592106021061210622106321064210652106621067210682106921070210712107221073210742107521076210772107821079210802108121082210832108421085210862108721088210892109021091210922109321094210952109621097210982109921100211012110221103211042110521106211072110821109211102111121112211132111421115211162111721118211192112021121211222112321124211252112621127211282112921130211312113221133211342113521136211372113821139211402114121142211432114421145211462114721148211492115021151211522115321154211552115621157211582115921160211612116221163211642116521166211672116821169211702117121172211732117421175211762117721178211792118021181211822118321184211852118621187211882118921190211912119221193211942119521196211972119821199212002120121202212032120421205212062120721208212092121021211212122121321214212152121621217212182121921220212212122221223212242122521226212272122821229212302123121232212332123421235212362123721238212392124021241212422124321244212452124621247212482124921250212512125221253212542125521256212572125821259212602126121262212632126421265212662126721268212692127021271212722127321274212752127621277212782127921280212812128221283212842128521286212872128821289212902129121292212932129421295212962129721298212992130021301213022130321304213052130621307213082130921310213112131221313213142131521316213172131821319213202132121322213232132421325213262132721328213292133021331213322133321334213352133621337213382133921340213412134221343213442134521346213472134821349213502135121352213532135421355213562135721358213592136021361213622136321364213652136621367213682136921370213712137221373213742137521376213772137821379213802138121382213832138421385213862138721388213892139021391213922139321394213952139621397213982139921400214012140221403214042140521406214072140821409214102141121412214132141421415214162141721418214192142021421214222142321424214252142621427214282142921430214312143221433214342143521436214372143821439214402144121442214432144421445214462144721448214492145021451214522145321454214552145621457214582145921460214612146221463214642146521466214672146821469214702147121472214732147421475214762147721478214792148021481214822148321484214852148621487214882148921490214912149221493214942149521496214972149821499215002150121502215032150421505215062150721508215092151021511215122151321514215152151621517215182151921520215212152221523215242152521526215272152821529215302153121532215332153421535215362153721538215392154021541215422154321544215452154621547215482154921550215512155221553215542155521556215572155821559215602156121562215632156421565215662156721568215692157021571215722157321574215752157621577215782157921580215812158221583215842158521586215872158821589215902159121592215932159421595215962159721598215992160021601216022160321604216052160621607216082160921610216112161221613216142161521616216172161821619216202162121622216232162421625216262162721628216292163021631216322163321634216352163621637216382163921640216412164221643216442164521646216472164821649216502165121652216532165421655216562165721658216592166021661216622166321664216652166621667216682166921670216712167221673216742167521676216772167821679216802168121682216832168421685216862168721688216892169021691216922169321694216952169621697216982169921700217012170221703217042170521706217072170821709217102171121712217132171421715217162171721718217192172021721217222172321724217252172621727217282172921730217312173221733217342173521736217372173821739217402174121742217432174421745217462174721748217492175021751217522175321754217552175621757217582175921760217612176221763217642176521766217672176821769217702177121772217732177421775217762177721778217792178021781217822178321784217852178621787217882178921790217912179221793217942179521796217972179821799218002180121802218032180421805218062180721808218092181021811218122181321814218152181621817218182181921820218212182221823218242182521826218272182821829218302183121832218332183421835218362183721838218392184021841218422184321844218452184621847218482184921850218512185221853218542185521856218572185821859218602186121862218632186421865218662186721868218692187021871218722187321874218752187621877218782187921880218812188221883218842188521886218872188821889218902189121892218932189421895218962189721898218992190021901219022190321904219052190621907219082190921910219112191221913219142191521916219172191821919219202192121922219232192421925219262192721928219292193021931219322193321934219352193621937219382193921940219412194221943219442194521946219472194821949219502195121952219532195421955219562195721958219592196021961219622196321964219652196621967219682196921970219712197221973219742197521976219772197821979219802198121982219832198421985219862198721988219892199021991219922199321994219952199621997219982199922000220012200222003220042200522006220072200822009220102201122012220132201422015220162201722018220192202022021220222202322024220252202622027220282202922030220312203222033220342203522036220372203822039220402204122042220432204422045220462204722048220492205022051220522205322054220552205622057
  1. // Code generated by command: go run gen.go -out encodeblock_amd64.s -stubs encodeblock_amd64.go. DO NOT EDIT.
  2. // +build !appengine
  3. // +build !noasm
  4. // +build gc
  5. #include "textflag.h"
  6. // func encodeBlockAsm(dst []byte, src []byte) int
  7. // Requires: SSE2
  8. TEXT ·encodeBlockAsm(SB), $65560-56
  9. MOVQ dst_base+0(FP), AX
  10. MOVQ $0x00000200, CX
  11. LEAQ 24(SP), DX
  12. PXOR X0, X0
  13. zero_loop_encodeBlockAsm:
  14. MOVOU X0, (DX)
  15. MOVOU X0, 16(DX)
  16. MOVOU X0, 32(DX)
  17. MOVOU X0, 48(DX)
  18. MOVOU X0, 64(DX)
  19. MOVOU X0, 80(DX)
  20. MOVOU X0, 96(DX)
  21. MOVOU X0, 112(DX)
  22. ADDQ $0x80, DX
  23. DECQ CX
  24. JNZ zero_loop_encodeBlockAsm
  25. MOVL $0x00000000, 12(SP)
  26. MOVQ src_len+32(FP), CX
  27. LEAQ -5(CX), DX
  28. LEAQ -8(CX), BP
  29. MOVL BP, 8(SP)
  30. SHRQ $0x05, CX
  31. SUBL CX, DX
  32. LEAQ (AX)(DX*1), DX
  33. MOVQ DX, (SP)
  34. MOVL $0x00000001, CX
  35. MOVL CX, 16(SP)
  36. MOVQ src_base+24(FP), DX
  37. search_loop_encodeBlockAsm:
  38. MOVQ (DX)(CX*1), SI
  39. MOVL CX, BP
  40. SUBL 12(SP), BP
  41. SHRL $0x06, BP
  42. LEAL 4(CX)(BP*1), BP
  43. MOVL 8(SP), DI
  44. CMPL BP, DI
  45. JGT emit_remainder_encodeBlockAsm
  46. MOVL BP, 20(SP)
  47. MOVQ $0x0000cf1bbcdcbf9b, R8
  48. MOVQ SI, R9
  49. MOVQ SI, R10
  50. SHRQ $0x08, R10
  51. SHLQ $0x10, R9
  52. IMULQ R8, R9
  53. SHRQ $0x32, R9
  54. SHLQ $0x10, R10
  55. IMULQ R8, R10
  56. SHRQ $0x32, R10
  57. MOVL 24(SP)(R9*4), BP
  58. MOVL 24(SP)(R10*4), DI
  59. MOVL CX, 24(SP)(R9*4)
  60. LEAL 1(CX), R9
  61. MOVL R9, 24(SP)(R10*4)
  62. MOVQ SI, R9
  63. SHRQ $0x10, R9
  64. SHLQ $0x10, R9
  65. IMULQ R8, R9
  66. SHRQ $0x32, R9
  67. MOVL CX, R8
  68. SUBL 16(SP), R8
  69. MOVL 1(DX)(R8*1), R10
  70. MOVQ SI, R8
  71. SHRQ $0x08, R8
  72. CMPL R8, R10
  73. JNE no_repeat_found_encodeBlockAsm
  74. LEAL 1(CX), SI
  75. MOVL 12(SP), DI
  76. MOVL SI, BP
  77. SUBL 16(SP), BP
  78. JZ repeat_extend_back_end_encodeBlockAsm
  79. repeat_extend_back_loop_encodeBlockAsm:
  80. CMPL SI, DI
  81. JLE repeat_extend_back_end_encodeBlockAsm
  82. MOVB -1(DX)(BP*1), BL
  83. MOVB -1(DX)(SI*1), R8
  84. CMPB BL, R8
  85. JNE repeat_extend_back_end_encodeBlockAsm
  86. LEAL -1(SI), SI
  87. DECL BP
  88. JNZ repeat_extend_back_loop_encodeBlockAsm
  89. repeat_extend_back_end_encodeBlockAsm:
  90. MOVL 12(SP), BP
  91. CMPL BP, SI
  92. JEQ emit_literal_done_repeat_emit_encodeBlockAsm
  93. MOVL SI, R8
  94. MOVL SI, 12(SP)
  95. LEAQ (DX)(BP*1), R9
  96. SUBL BP, R8
  97. MOVL R8, BP
  98. SUBL $0x01, BP
  99. JC emit_literal_done_repeat_emit_encodeBlockAsm
  100. CMPL BP, $0x3c
  101. JLT one_byte_repeat_emit_encodeBlockAsm
  102. CMPL BP, $0x00000100
  103. JLT two_bytes_repeat_emit_encodeBlockAsm
  104. CMPL BP, $0x00010000
  105. JLT three_bytes_repeat_emit_encodeBlockAsm
  106. CMPL BP, $0x01000000
  107. JLT four_bytes_repeat_emit_encodeBlockAsm
  108. MOVB $0xfc, (AX)
  109. MOVL BP, 1(AX)
  110. ADDQ $0x05, AX
  111. JMP memmove_repeat_emit_encodeBlockAsm
  112. four_bytes_repeat_emit_encodeBlockAsm:
  113. MOVL BP, R10
  114. SHRL $0x10, R10
  115. MOVB $0xf8, (AX)
  116. MOVW BP, 1(AX)
  117. MOVB R10, 3(AX)
  118. ADDQ $0x04, AX
  119. JMP memmove_repeat_emit_encodeBlockAsm
  120. three_bytes_repeat_emit_encodeBlockAsm:
  121. MOVB $0xf4, (AX)
  122. MOVW BP, 1(AX)
  123. ADDQ $0x03, AX
  124. JMP memmove_repeat_emit_encodeBlockAsm
  125. two_bytes_repeat_emit_encodeBlockAsm:
  126. MOVB $0xf0, (AX)
  127. MOVB BP, 1(AX)
  128. ADDQ $0x02, AX
  129. JMP memmove_repeat_emit_encodeBlockAsm
  130. one_byte_repeat_emit_encodeBlockAsm:
  131. SHLB $0x02, BP
  132. MOVB BP, (AX)
  133. ADDQ $0x01, AX
  134. memmove_repeat_emit_encodeBlockAsm:
  135. LEAQ (AX)(R8*1), BP
  136. NOP
  137. emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_tail:
  138. TESTQ R8, R8
  139. JEQ memmove_end_copy_repeat_emit_encodeBlockAsm
  140. CMPQ R8, $0x02
  141. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_1or2
  142. CMPQ R8, $0x04
  143. JB emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_3
  144. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_4
  145. CMPQ R8, $0x08
  146. JB emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_5through7
  147. JE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8
  148. CMPQ R8, $0x10
  149. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_9through16
  150. CMPQ R8, $0x20
  151. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32
  152. CMPQ R8, $0x40
  153. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64
  154. CMPQ R8, $0x80
  155. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_65through128
  156. CMPQ R8, $0x00000100
  157. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_129through256
  158. JMP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_256through2048
  159. emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_1or2:
  160. MOVB (R9), R10
  161. MOVB -1(R9)(R8*1), R9
  162. MOVB R10, (AX)
  163. MOVB R9, -1(AX)(R8*1)
  164. JMP memmove_end_copy_repeat_emit_encodeBlockAsm
  165. emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_4:
  166. MOVL (R9), R10
  167. MOVL R10, (AX)
  168. JMP memmove_end_copy_repeat_emit_encodeBlockAsm
  169. emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_3:
  170. MOVW (R9), R10
  171. MOVB 2(R9), R9
  172. MOVW R10, (AX)
  173. MOVB R9, 2(AX)
  174. JMP memmove_end_copy_repeat_emit_encodeBlockAsm
  175. emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_5through7:
  176. MOVL (R9), R10
  177. MOVL -4(R9)(R8*1), R9
  178. MOVL R10, (AX)
  179. MOVL R9, -4(AX)(R8*1)
  180. JMP memmove_end_copy_repeat_emit_encodeBlockAsm
  181. emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8:
  182. MOVQ (R9), R10
  183. MOVQ R10, (AX)
  184. JMP memmove_end_copy_repeat_emit_encodeBlockAsm
  185. emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_9through16:
  186. MOVQ (R9), R10
  187. MOVQ -8(R9)(R8*1), R9
  188. MOVQ R10, (AX)
  189. MOVQ R9, -8(AX)(R8*1)
  190. JMP memmove_end_copy_repeat_emit_encodeBlockAsm
  191. emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32:
  192. MOVOU (R9), X0
  193. MOVOU -16(R9)(R8*1), X1
  194. MOVOU X0, (AX)
  195. MOVOU X1, -16(AX)(R8*1)
  196. JMP memmove_end_copy_repeat_emit_encodeBlockAsm
  197. emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64:
  198. MOVOU (R9), X0
  199. MOVOU 16(R9), X1
  200. MOVOU -32(R9)(R8*1), X2
  201. MOVOU -16(R9)(R8*1), X3
  202. MOVOU X0, (AX)
  203. MOVOU X1, 16(AX)
  204. MOVOU X2, -32(AX)(R8*1)
  205. MOVOU X3, -16(AX)(R8*1)
  206. JMP memmove_end_copy_repeat_emit_encodeBlockAsm
  207. emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_65through128:
  208. MOVOU (R9), X0
  209. MOVOU 16(R9), X1
  210. MOVOU 32(R9), X2
  211. MOVOU 48(R9), X3
  212. MOVOU -64(R9)(R8*1), X12
  213. MOVOU -48(R9)(R8*1), X13
  214. MOVOU -32(R9)(R8*1), X14
  215. MOVOU -16(R9)(R8*1), X15
  216. MOVOU X0, (AX)
  217. MOVOU X1, 16(AX)
  218. MOVOU X2, 32(AX)
  219. MOVOU X3, 48(AX)
  220. MOVOU X12, -64(AX)(R8*1)
  221. MOVOU X13, -48(AX)(R8*1)
  222. MOVOU X14, -32(AX)(R8*1)
  223. MOVOU X15, -16(AX)(R8*1)
  224. JMP memmove_end_copy_repeat_emit_encodeBlockAsm
  225. emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_129through256:
  226. MOVOU (R9), X0
  227. MOVOU 16(R9), X1
  228. MOVOU 32(R9), X2
  229. MOVOU 48(R9), X3
  230. MOVOU 64(R9), X4
  231. MOVOU 80(R9), X5
  232. MOVOU 96(R9), X6
  233. MOVOU 112(R9), X7
  234. MOVOU -128(R9)(R8*1), X8
  235. MOVOU -112(R9)(R8*1), X9
  236. MOVOU -96(R9)(R8*1), X10
  237. MOVOU -80(R9)(R8*1), X11
  238. MOVOU -64(R9)(R8*1), X12
  239. MOVOU -48(R9)(R8*1), X13
  240. MOVOU -32(R9)(R8*1), X14
  241. MOVOU -16(R9)(R8*1), X15
  242. MOVOU X0, (AX)
  243. MOVOU X1, 16(AX)
  244. MOVOU X2, 32(AX)
  245. MOVOU X3, 48(AX)
  246. MOVOU X4, 64(AX)
  247. MOVOU X5, 80(AX)
  248. MOVOU X6, 96(AX)
  249. MOVOU X7, 112(AX)
  250. MOVOU X8, -128(AX)(R8*1)
  251. MOVOU X9, -112(AX)(R8*1)
  252. MOVOU X10, -96(AX)(R8*1)
  253. MOVOU X11, -80(AX)(R8*1)
  254. MOVOU X12, -64(AX)(R8*1)
  255. MOVOU X13, -48(AX)(R8*1)
  256. MOVOU X14, -32(AX)(R8*1)
  257. MOVOU X15, -16(AX)(R8*1)
  258. JMP memmove_end_copy_repeat_emit_encodeBlockAsm
  259. emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_256through2048:
  260. LEAQ -256(R8), R8
  261. MOVOU (R9), X0
  262. MOVOU 16(R9), X1
  263. MOVOU 32(R9), X2
  264. MOVOU 48(R9), X3
  265. MOVOU 64(R9), X4
  266. MOVOU 80(R9), X5
  267. MOVOU 96(R9), X6
  268. MOVOU 112(R9), X7
  269. MOVOU 128(R9), X8
  270. MOVOU 144(R9), X9
  271. MOVOU 160(R9), X10
  272. MOVOU 176(R9), X11
  273. MOVOU 192(R9), X12
  274. MOVOU 208(R9), X13
  275. MOVOU 224(R9), X14
  276. MOVOU 240(R9), X15
  277. MOVOU X0, (AX)
  278. MOVOU X1, 16(AX)
  279. MOVOU X2, 32(AX)
  280. MOVOU X3, 48(AX)
  281. MOVOU X4, 64(AX)
  282. MOVOU X5, 80(AX)
  283. MOVOU X6, 96(AX)
  284. MOVOU X7, 112(AX)
  285. MOVOU X8, 128(AX)
  286. MOVOU X9, 144(AX)
  287. MOVOU X10, 160(AX)
  288. MOVOU X11, 176(AX)
  289. MOVOU X12, 192(AX)
  290. MOVOU X13, 208(AX)
  291. MOVOU X14, 224(AX)
  292. MOVOU X15, 240(AX)
  293. CMPQ R8, $0x00000100
  294. LEAQ 256(R9), R9
  295. LEAQ 256(AX), AX
  296. JGE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_256through2048
  297. JMP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_tail
  298. memmove_end_copy_repeat_emit_encodeBlockAsm:
  299. MOVQ BP, AX
  300. emit_literal_done_repeat_emit_encodeBlockAsm:
  301. ADDL $0x05, CX
  302. MOVL CX, BP
  303. SUBL 16(SP), BP
  304. MOVQ src_len+32(FP), R8
  305. SUBL CX, R8
  306. LEAQ (DX)(CX*1), R9
  307. LEAQ (DX)(BP*1), BP
  308. XORL R11, R11
  309. CMPL R8, $0x08
  310. JL matchlen_single_repeat_extend
  311. matchlen_loopback_repeat_extend:
  312. MOVQ (R9)(R11*1), R10
  313. XORQ (BP)(R11*1), R10
  314. TESTQ R10, R10
  315. JZ matchlen_loop_repeat_extend
  316. BSFQ R10, R10
  317. SARQ $0x03, R10
  318. LEAL (R11)(R10*1), R11
  319. JMP repeat_extend_forward_end_encodeBlockAsm
  320. matchlen_loop_repeat_extend:
  321. LEAL -8(R8), R8
  322. LEAL 8(R11), R11
  323. CMPL R8, $0x08
  324. JGE matchlen_loopback_repeat_extend
  325. matchlen_single_repeat_extend:
  326. TESTL R8, R8
  327. JZ repeat_extend_forward_end_encodeBlockAsm
  328. matchlen_single_loopback_repeat_extend:
  329. MOVB (R9)(R11*1), R10
  330. CMPB (BP)(R11*1), R10
  331. JNE repeat_extend_forward_end_encodeBlockAsm
  332. LEAL 1(R11), R11
  333. DECL R8
  334. JNZ matchlen_single_loopback_repeat_extend
  335. repeat_extend_forward_end_encodeBlockAsm:
  336. ADDL R11, CX
  337. MOVL CX, BP
  338. SUBL SI, BP
  339. MOVL 16(SP), SI
  340. TESTL DI, DI
  341. JZ repeat_as_copy_encodeBlockAsm
  342. emit_repeat_again_match_repeat_encodeBlockAsm:
  343. MOVL BP, DI
  344. LEAL -4(BP), BP
  345. CMPL DI, $0x08
  346. JLE repeat_two_match_repeat_encodeBlockAsm
  347. CMPL DI, $0x0c
  348. JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm
  349. CMPL SI, $0x00000800
  350. JLT repeat_two_offset_match_repeat_encodeBlockAsm
  351. cant_repeat_two_offset_match_repeat_encodeBlockAsm:
  352. CMPL BP, $0x00000104
  353. JLT repeat_three_match_repeat_encodeBlockAsm
  354. CMPL BP, $0x00010100
  355. JLT repeat_four_match_repeat_encodeBlockAsm
  356. CMPL BP, $0x0100ffff
  357. JLT repeat_five_match_repeat_encodeBlockAsm
  358. LEAL -16842747(BP), BP
  359. MOVW $0x001d, (AX)
  360. MOVW $0xfffb, 2(AX)
  361. MOVB $0xff, 4(AX)
  362. ADDQ $0x05, AX
  363. JMP emit_repeat_again_match_repeat_encodeBlockAsm
  364. repeat_five_match_repeat_encodeBlockAsm:
  365. LEAL -65536(BP), BP
  366. MOVL BP, SI
  367. MOVW $0x001d, (AX)
  368. MOVW BP, 2(AX)
  369. SARL $0x10, SI
  370. MOVB SI, 4(AX)
  371. ADDQ $0x05, AX
  372. JMP repeat_end_emit_encodeBlockAsm
  373. repeat_four_match_repeat_encodeBlockAsm:
  374. LEAL -256(BP), BP
  375. MOVW $0x0019, (AX)
  376. MOVW BP, 2(AX)
  377. ADDQ $0x04, AX
  378. JMP repeat_end_emit_encodeBlockAsm
  379. repeat_three_match_repeat_encodeBlockAsm:
  380. LEAL -4(BP), BP
  381. MOVW $0x0015, (AX)
  382. MOVB BP, 2(AX)
  383. ADDQ $0x03, AX
  384. JMP repeat_end_emit_encodeBlockAsm
  385. repeat_two_match_repeat_encodeBlockAsm:
  386. SHLL $0x02, BP
  387. ORL $0x01, BP
  388. MOVW BP, (AX)
  389. ADDQ $0x02, AX
  390. JMP repeat_end_emit_encodeBlockAsm
  391. repeat_two_offset_match_repeat_encodeBlockAsm:
  392. XORQ DI, DI
  393. LEAL 1(DI)(BP*4), BP
  394. MOVB SI, 1(AX)
  395. SARL $0x08, SI
  396. SHLL $0x05, SI
  397. ORL SI, BP
  398. MOVB BP, (AX)
  399. ADDQ $0x02, AX
  400. JMP repeat_end_emit_encodeBlockAsm
  401. repeat_as_copy_encodeBlockAsm:
  402. CMPL SI, $0x00010000
  403. JL two_byte_offset_repeat_as_copy_encodeBlockAsm
  404. four_bytes_loop_back_repeat_as_copy_encodeBlockAsm:
  405. CMPL BP, $0x40
  406. JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm
  407. MOVB $0xff, (AX)
  408. MOVL SI, 1(AX)
  409. LEAL -64(BP), BP
  410. ADDQ $0x05, AX
  411. CMPL BP, $0x04
  412. JL four_bytes_remain_repeat_as_copy_encodeBlockAsm
  413. emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy:
  414. MOVL BP, DI
  415. LEAL -4(BP), BP
  416. CMPL DI, $0x08
  417. JLE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy
  418. CMPL DI, $0x0c
  419. JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy
  420. CMPL SI, $0x00000800
  421. JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy
  422. cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy:
  423. CMPL BP, $0x00000104
  424. JLT repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy
  425. CMPL BP, $0x00010100
  426. JLT repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy
  427. CMPL BP, $0x0100ffff
  428. JLT repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy
  429. LEAL -16842747(BP), BP
  430. MOVW $0x001d, (AX)
  431. MOVW $0xfffb, 2(AX)
  432. MOVB $0xff, 4(AX)
  433. ADDQ $0x05, AX
  434. JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy
  435. repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy:
  436. LEAL -65536(BP), BP
  437. MOVL BP, SI
  438. MOVW $0x001d, (AX)
  439. MOVW BP, 2(AX)
  440. SARL $0x10, SI
  441. MOVB SI, 4(AX)
  442. ADDQ $0x05, AX
  443. JMP repeat_end_emit_encodeBlockAsm
  444. repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy:
  445. LEAL -256(BP), BP
  446. MOVW $0x0019, (AX)
  447. MOVW BP, 2(AX)
  448. ADDQ $0x04, AX
  449. JMP repeat_end_emit_encodeBlockAsm
  450. repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy:
  451. LEAL -4(BP), BP
  452. MOVW $0x0015, (AX)
  453. MOVB BP, 2(AX)
  454. ADDQ $0x03, AX
  455. JMP repeat_end_emit_encodeBlockAsm
  456. repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy:
  457. SHLL $0x02, BP
  458. ORL $0x01, BP
  459. MOVW BP, (AX)
  460. ADDQ $0x02, AX
  461. JMP repeat_end_emit_encodeBlockAsm
  462. repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy:
  463. XORQ DI, DI
  464. LEAL 1(DI)(BP*4), BP
  465. MOVB SI, 1(AX)
  466. SARL $0x08, SI
  467. SHLL $0x05, SI
  468. ORL SI, BP
  469. MOVB BP, (AX)
  470. ADDQ $0x02, AX
  471. JMP repeat_end_emit_encodeBlockAsm
  472. JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm
  473. four_bytes_remain_repeat_as_copy_encodeBlockAsm:
  474. TESTL BP, BP
  475. JZ repeat_end_emit_encodeBlockAsm
  476. MOVB $0x03, BL
  477. LEAL -4(BX)(BP*4), BP
  478. MOVB BP, (AX)
  479. MOVL SI, 1(AX)
  480. ADDQ $0x05, AX
  481. JMP repeat_end_emit_encodeBlockAsm
  482. two_byte_offset_repeat_as_copy_encodeBlockAsm:
  483. CMPL BP, $0x40
  484. JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm
  485. MOVB $0xee, (AX)
  486. MOVW SI, 1(AX)
  487. LEAL -60(BP), BP
  488. ADDQ $0x03, AX
  489. emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short:
  490. MOVL BP, DI
  491. LEAL -4(BP), BP
  492. CMPL DI, $0x08
  493. JLE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short
  494. CMPL DI, $0x0c
  495. JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short
  496. CMPL SI, $0x00000800
  497. JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short
  498. cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short:
  499. CMPL BP, $0x00000104
  500. JLT repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short
  501. CMPL BP, $0x00010100
  502. JLT repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short
  503. CMPL BP, $0x0100ffff
  504. JLT repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short
  505. LEAL -16842747(BP), BP
  506. MOVW $0x001d, (AX)
  507. MOVW $0xfffb, 2(AX)
  508. MOVB $0xff, 4(AX)
  509. ADDQ $0x05, AX
  510. JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short
  511. repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short:
  512. LEAL -65536(BP), BP
  513. MOVL BP, SI
  514. MOVW $0x001d, (AX)
  515. MOVW BP, 2(AX)
  516. SARL $0x10, SI
  517. MOVB SI, 4(AX)
  518. ADDQ $0x05, AX
  519. JMP repeat_end_emit_encodeBlockAsm
  520. repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short:
  521. LEAL -256(BP), BP
  522. MOVW $0x0019, (AX)
  523. MOVW BP, 2(AX)
  524. ADDQ $0x04, AX
  525. JMP repeat_end_emit_encodeBlockAsm
  526. repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short:
  527. LEAL -4(BP), BP
  528. MOVW $0x0015, (AX)
  529. MOVB BP, 2(AX)
  530. ADDQ $0x03, AX
  531. JMP repeat_end_emit_encodeBlockAsm
  532. repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short:
  533. SHLL $0x02, BP
  534. ORL $0x01, BP
  535. MOVW BP, (AX)
  536. ADDQ $0x02, AX
  537. JMP repeat_end_emit_encodeBlockAsm
  538. repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short:
  539. XORQ DI, DI
  540. LEAL 1(DI)(BP*4), BP
  541. MOVB SI, 1(AX)
  542. SARL $0x08, SI
  543. SHLL $0x05, SI
  544. ORL SI, BP
  545. MOVB BP, (AX)
  546. ADDQ $0x02, AX
  547. JMP repeat_end_emit_encodeBlockAsm
  548. JMP two_byte_offset_repeat_as_copy_encodeBlockAsm
  549. two_byte_offset_short_repeat_as_copy_encodeBlockAsm:
  550. CMPL BP, $0x0c
  551. JGE emit_copy_three_repeat_as_copy_encodeBlockAsm
  552. CMPL SI, $0x00000800
  553. JGE emit_copy_three_repeat_as_copy_encodeBlockAsm
  554. MOVB $0x01, BL
  555. LEAL -16(BX)(BP*4), BP
  556. MOVB SI, 1(AX)
  557. SHRL $0x08, SI
  558. SHLL $0x05, SI
  559. ORL SI, BP
  560. MOVB BP, (AX)
  561. ADDQ $0x02, AX
  562. JMP repeat_end_emit_encodeBlockAsm
  563. emit_copy_three_repeat_as_copy_encodeBlockAsm:
  564. MOVB $0x02, BL
  565. LEAL -4(BX)(BP*4), BP
  566. MOVB BP, (AX)
  567. MOVW SI, 1(AX)
  568. ADDQ $0x03, AX
  569. repeat_end_emit_encodeBlockAsm:
  570. MOVL CX, 12(SP)
  571. CMPL CX, 8(SP)
  572. JGE emit_remainder_encodeBlockAsm
  573. JMP search_loop_encodeBlockAsm
  574. no_repeat_found_encodeBlockAsm:
  575. CMPL (DX)(BP*1), SI
  576. JEQ candidate_match_encodeBlockAsm
  577. SHRQ $0x08, SI
  578. MOVL 24(SP)(R9*4), BP
  579. LEAL 2(CX), R8
  580. CMPL (DX)(DI*1), SI
  581. JEQ candidate2_match_encodeBlockAsm
  582. MOVL R8, 24(SP)(R9*4)
  583. SHRQ $0x08, SI
  584. CMPL (DX)(BP*1), SI
  585. JEQ candidate3_match_encodeBlockAsm
  586. MOVL 20(SP), CX
  587. JMP search_loop_encodeBlockAsm
  588. candidate3_match_encodeBlockAsm:
  589. ADDL $0x02, CX
  590. JMP candidate_match_encodeBlockAsm
  591. candidate2_match_encodeBlockAsm:
  592. MOVL R8, 24(SP)(R9*4)
  593. INCL CX
  594. MOVL DI, BP
  595. candidate_match_encodeBlockAsm:
  596. MOVL 12(SP), SI
  597. TESTL BP, BP
  598. JZ match_extend_back_end_encodeBlockAsm
  599. match_extend_back_loop_encodeBlockAsm:
  600. CMPL CX, SI
  601. JLE match_extend_back_end_encodeBlockAsm
  602. MOVB -1(DX)(BP*1), BL
  603. MOVB -1(DX)(CX*1), DI
  604. CMPB BL, DI
  605. JNE match_extend_back_end_encodeBlockAsm
  606. LEAL -1(CX), CX
  607. DECL BP
  608. JZ match_extend_back_end_encodeBlockAsm
  609. JMP match_extend_back_loop_encodeBlockAsm
  610. match_extend_back_end_encodeBlockAsm:
  611. MOVL CX, SI
  612. SUBL 12(SP), SI
  613. LEAQ 4(AX)(SI*1), SI
  614. CMPQ SI, (SP)
  615. JL match_dst_size_check_encodeBlockAsm
  616. MOVQ $0x00000000, ret+48(FP)
  617. RET
  618. match_dst_size_check_encodeBlockAsm:
  619. MOVL CX, SI
  620. MOVL 12(SP), DI
  621. CMPL DI, SI
  622. JEQ emit_literal_done_match_emit_encodeBlockAsm
  623. MOVL SI, R8
  624. MOVL SI, 12(SP)
  625. LEAQ (DX)(DI*1), SI
  626. SUBL DI, R8
  627. MOVL R8, DI
  628. SUBL $0x01, DI
  629. JC emit_literal_done_match_emit_encodeBlockAsm
  630. CMPL DI, $0x3c
  631. JLT one_byte_match_emit_encodeBlockAsm
  632. CMPL DI, $0x00000100
  633. JLT two_bytes_match_emit_encodeBlockAsm
  634. CMPL DI, $0x00010000
  635. JLT three_bytes_match_emit_encodeBlockAsm
  636. CMPL DI, $0x01000000
  637. JLT four_bytes_match_emit_encodeBlockAsm
  638. MOVB $0xfc, (AX)
  639. MOVL DI, 1(AX)
  640. ADDQ $0x05, AX
  641. JMP memmove_match_emit_encodeBlockAsm
  642. four_bytes_match_emit_encodeBlockAsm:
  643. MOVL DI, R9
  644. SHRL $0x10, R9
  645. MOVB $0xf8, (AX)
  646. MOVW DI, 1(AX)
  647. MOVB R9, 3(AX)
  648. ADDQ $0x04, AX
  649. JMP memmove_match_emit_encodeBlockAsm
  650. three_bytes_match_emit_encodeBlockAsm:
  651. MOVB $0xf4, (AX)
  652. MOVW DI, 1(AX)
  653. ADDQ $0x03, AX
  654. JMP memmove_match_emit_encodeBlockAsm
  655. two_bytes_match_emit_encodeBlockAsm:
  656. MOVB $0xf0, (AX)
  657. MOVB DI, 1(AX)
  658. ADDQ $0x02, AX
  659. JMP memmove_match_emit_encodeBlockAsm
  660. one_byte_match_emit_encodeBlockAsm:
  661. SHLB $0x02, DI
  662. MOVB DI, (AX)
  663. ADDQ $0x01, AX
  664. memmove_match_emit_encodeBlockAsm:
  665. LEAQ (AX)(R8*1), DI
  666. NOP
  667. emit_lit_memmove_match_emit_encodeBlockAsm_memmove_tail:
  668. TESTQ R8, R8
  669. JEQ memmove_end_copy_match_emit_encodeBlockAsm
  670. CMPQ R8, $0x02
  671. JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_1or2
  672. CMPQ R8, $0x04
  673. JB emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_3
  674. JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_4
  675. CMPQ R8, $0x08
  676. JB emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_5through7
  677. JE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8
  678. CMPQ R8, $0x10
  679. JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_9through16
  680. CMPQ R8, $0x20
  681. JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32
  682. CMPQ R8, $0x40
  683. JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64
  684. CMPQ R8, $0x80
  685. JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_65through128
  686. CMPQ R8, $0x00000100
  687. JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_129through256
  688. JMP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_256through2048
  689. emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_1or2:
  690. MOVB (SI), R9
  691. MOVB -1(SI)(R8*1), SI
  692. MOVB R9, (AX)
  693. MOVB SI, -1(AX)(R8*1)
  694. JMP memmove_end_copy_match_emit_encodeBlockAsm
  695. emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_4:
  696. MOVL (SI), R9
  697. MOVL R9, (AX)
  698. JMP memmove_end_copy_match_emit_encodeBlockAsm
  699. emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_3:
  700. MOVW (SI), R9
  701. MOVB 2(SI), SI
  702. MOVW R9, (AX)
  703. MOVB SI, 2(AX)
  704. JMP memmove_end_copy_match_emit_encodeBlockAsm
  705. emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_5through7:
  706. MOVL (SI), R9
  707. MOVL -4(SI)(R8*1), SI
  708. MOVL R9, (AX)
  709. MOVL SI, -4(AX)(R8*1)
  710. JMP memmove_end_copy_match_emit_encodeBlockAsm
  711. emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8:
  712. MOVQ (SI), R9
  713. MOVQ R9, (AX)
  714. JMP memmove_end_copy_match_emit_encodeBlockAsm
  715. emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_9through16:
  716. MOVQ (SI), R9
  717. MOVQ -8(SI)(R8*1), SI
  718. MOVQ R9, (AX)
  719. MOVQ SI, -8(AX)(R8*1)
  720. JMP memmove_end_copy_match_emit_encodeBlockAsm
  721. emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32:
  722. MOVOU (SI), X0
  723. MOVOU -16(SI)(R8*1), X1
  724. MOVOU X0, (AX)
  725. MOVOU X1, -16(AX)(R8*1)
  726. JMP memmove_end_copy_match_emit_encodeBlockAsm
  727. emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64:
  728. MOVOU (SI), X0
  729. MOVOU 16(SI), X1
  730. MOVOU -32(SI)(R8*1), X2
  731. MOVOU -16(SI)(R8*1), X3
  732. MOVOU X0, (AX)
  733. MOVOU X1, 16(AX)
  734. MOVOU X2, -32(AX)(R8*1)
  735. MOVOU X3, -16(AX)(R8*1)
  736. JMP memmove_end_copy_match_emit_encodeBlockAsm
  737. emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_65through128:
  738. MOVOU (SI), X0
  739. MOVOU 16(SI), X1
  740. MOVOU 32(SI), X2
  741. MOVOU 48(SI), X3
  742. MOVOU -64(SI)(R8*1), X12
  743. MOVOU -48(SI)(R8*1), X13
  744. MOVOU -32(SI)(R8*1), X14
  745. MOVOU -16(SI)(R8*1), X15
  746. MOVOU X0, (AX)
  747. MOVOU X1, 16(AX)
  748. MOVOU X2, 32(AX)
  749. MOVOU X3, 48(AX)
  750. MOVOU X12, -64(AX)(R8*1)
  751. MOVOU X13, -48(AX)(R8*1)
  752. MOVOU X14, -32(AX)(R8*1)
  753. MOVOU X15, -16(AX)(R8*1)
  754. JMP memmove_end_copy_match_emit_encodeBlockAsm
  755. emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_129through256:
  756. MOVOU (SI), X0
  757. MOVOU 16(SI), X1
  758. MOVOU 32(SI), X2
  759. MOVOU 48(SI), X3
  760. MOVOU 64(SI), X4
  761. MOVOU 80(SI), X5
  762. MOVOU 96(SI), X6
  763. MOVOU 112(SI), X7
  764. MOVOU -128(SI)(R8*1), X8
  765. MOVOU -112(SI)(R8*1), X9
  766. MOVOU -96(SI)(R8*1), X10
  767. MOVOU -80(SI)(R8*1), X11
  768. MOVOU -64(SI)(R8*1), X12
  769. MOVOU -48(SI)(R8*1), X13
  770. MOVOU -32(SI)(R8*1), X14
  771. MOVOU -16(SI)(R8*1), X15
  772. MOVOU X0, (AX)
  773. MOVOU X1, 16(AX)
  774. MOVOU X2, 32(AX)
  775. MOVOU X3, 48(AX)
  776. MOVOU X4, 64(AX)
  777. MOVOU X5, 80(AX)
  778. MOVOU X6, 96(AX)
  779. MOVOU X7, 112(AX)
  780. MOVOU X8, -128(AX)(R8*1)
  781. MOVOU X9, -112(AX)(R8*1)
  782. MOVOU X10, -96(AX)(R8*1)
  783. MOVOU X11, -80(AX)(R8*1)
  784. MOVOU X12, -64(AX)(R8*1)
  785. MOVOU X13, -48(AX)(R8*1)
  786. MOVOU X14, -32(AX)(R8*1)
  787. MOVOU X15, -16(AX)(R8*1)
  788. JMP memmove_end_copy_match_emit_encodeBlockAsm
  789. emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_256through2048:
  790. LEAQ -256(R8), R8
  791. MOVOU (SI), X0
  792. MOVOU 16(SI), X1
  793. MOVOU 32(SI), X2
  794. MOVOU 48(SI), X3
  795. MOVOU 64(SI), X4
  796. MOVOU 80(SI), X5
  797. MOVOU 96(SI), X6
  798. MOVOU 112(SI), X7
  799. MOVOU 128(SI), X8
  800. MOVOU 144(SI), X9
  801. MOVOU 160(SI), X10
  802. MOVOU 176(SI), X11
  803. MOVOU 192(SI), X12
  804. MOVOU 208(SI), X13
  805. MOVOU 224(SI), X14
  806. MOVOU 240(SI), X15
  807. MOVOU X0, (AX)
  808. MOVOU X1, 16(AX)
  809. MOVOU X2, 32(AX)
  810. MOVOU X3, 48(AX)
  811. MOVOU X4, 64(AX)
  812. MOVOU X5, 80(AX)
  813. MOVOU X6, 96(AX)
  814. MOVOU X7, 112(AX)
  815. MOVOU X8, 128(AX)
  816. MOVOU X9, 144(AX)
  817. MOVOU X10, 160(AX)
  818. MOVOU X11, 176(AX)
  819. MOVOU X12, 192(AX)
  820. MOVOU X13, 208(AX)
  821. MOVOU X14, 224(AX)
  822. MOVOU X15, 240(AX)
  823. CMPQ R8, $0x00000100
  824. LEAQ 256(SI), SI
  825. LEAQ 256(AX), AX
  826. JGE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_256through2048
  827. JMP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_tail
  828. memmove_end_copy_match_emit_encodeBlockAsm:
  829. MOVQ DI, AX
  830. emit_literal_done_match_emit_encodeBlockAsm:
  831. match_nolit_loop_encodeBlockAsm:
  832. MOVL CX, SI
  833. SUBL BP, SI
  834. MOVL SI, 16(SP)
  835. ADDL $0x04, CX
  836. ADDL $0x04, BP
  837. MOVQ src_len+32(FP), SI
  838. SUBL CX, SI
  839. LEAQ (DX)(CX*1), DI
  840. LEAQ (DX)(BP*1), BP
  841. XORL R9, R9
  842. CMPL SI, $0x08
  843. JL matchlen_single_match_nolit_encodeBlockAsm
  844. matchlen_loopback_match_nolit_encodeBlockAsm:
  845. MOVQ (DI)(R9*1), R8
  846. XORQ (BP)(R9*1), R8
  847. TESTQ R8, R8
  848. JZ matchlen_loop_match_nolit_encodeBlockAsm
  849. BSFQ R8, R8
  850. SARQ $0x03, R8
  851. LEAL (R9)(R8*1), R9
  852. JMP match_nolit_end_encodeBlockAsm
  853. matchlen_loop_match_nolit_encodeBlockAsm:
  854. LEAL -8(SI), SI
  855. LEAL 8(R9), R9
  856. CMPL SI, $0x08
  857. JGE matchlen_loopback_match_nolit_encodeBlockAsm
  858. matchlen_single_match_nolit_encodeBlockAsm:
  859. TESTL SI, SI
  860. JZ match_nolit_end_encodeBlockAsm
  861. matchlen_single_loopback_match_nolit_encodeBlockAsm:
  862. MOVB (DI)(R9*1), R8
  863. CMPB (BP)(R9*1), R8
  864. JNE match_nolit_end_encodeBlockAsm
  865. LEAL 1(R9), R9
  866. DECL SI
  867. JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm
  868. match_nolit_end_encodeBlockAsm:
  869. ADDL R9, CX
  870. MOVL 16(SP), BP
  871. ADDL $0x04, R9
  872. CMPL BP, $0x00010000
  873. JL two_byte_offset_match_nolit_encodeBlockAsm
  874. four_bytes_loop_back_match_nolit_encodeBlockAsm:
  875. CMPL R9, $0x40
  876. JLE four_bytes_remain_match_nolit_encodeBlockAsm
  877. MOVB $0xff, (AX)
  878. MOVL BP, 1(AX)
  879. LEAL -64(R9), R9
  880. ADDQ $0x05, AX
  881. CMPL R9, $0x04
  882. JL four_bytes_remain_match_nolit_encodeBlockAsm
  883. emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy:
  884. MOVL R9, SI
  885. LEAL -4(R9), R9
  886. CMPL SI, $0x08
  887. JLE repeat_two_match_nolit_encodeBlockAsm_emit_copy
  888. CMPL SI, $0x0c
  889. JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy
  890. CMPL BP, $0x00000800
  891. JLT repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy
  892. cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy:
  893. CMPL R9, $0x00000104
  894. JLT repeat_three_match_nolit_encodeBlockAsm_emit_copy
  895. CMPL R9, $0x00010100
  896. JLT repeat_four_match_nolit_encodeBlockAsm_emit_copy
  897. CMPL R9, $0x0100ffff
  898. JLT repeat_five_match_nolit_encodeBlockAsm_emit_copy
  899. LEAL -16842747(R9), R9
  900. MOVW $0x001d, (AX)
  901. MOVW $0xfffb, 2(AX)
  902. MOVB $0xff, 4(AX)
  903. ADDQ $0x05, AX
  904. JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy
  905. repeat_five_match_nolit_encodeBlockAsm_emit_copy:
  906. LEAL -65536(R9), R9
  907. MOVL R9, BP
  908. MOVW $0x001d, (AX)
  909. MOVW R9, 2(AX)
  910. SARL $0x10, BP
  911. MOVB BP, 4(AX)
  912. ADDQ $0x05, AX
  913. JMP match_nolit_emitcopy_end_encodeBlockAsm
  914. repeat_four_match_nolit_encodeBlockAsm_emit_copy:
  915. LEAL -256(R9), R9
  916. MOVW $0x0019, (AX)
  917. MOVW R9, 2(AX)
  918. ADDQ $0x04, AX
  919. JMP match_nolit_emitcopy_end_encodeBlockAsm
  920. repeat_three_match_nolit_encodeBlockAsm_emit_copy:
  921. LEAL -4(R9), R9
  922. MOVW $0x0015, (AX)
  923. MOVB R9, 2(AX)
  924. ADDQ $0x03, AX
  925. JMP match_nolit_emitcopy_end_encodeBlockAsm
  926. repeat_two_match_nolit_encodeBlockAsm_emit_copy:
  927. SHLL $0x02, R9
  928. ORL $0x01, R9
  929. MOVW R9, (AX)
  930. ADDQ $0x02, AX
  931. JMP match_nolit_emitcopy_end_encodeBlockAsm
  932. repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy:
  933. XORQ SI, SI
  934. LEAL 1(SI)(R9*4), R9
  935. MOVB BP, 1(AX)
  936. SARL $0x08, BP
  937. SHLL $0x05, BP
  938. ORL BP, R9
  939. MOVB R9, (AX)
  940. ADDQ $0x02, AX
  941. JMP match_nolit_emitcopy_end_encodeBlockAsm
  942. JMP four_bytes_loop_back_match_nolit_encodeBlockAsm
  943. four_bytes_remain_match_nolit_encodeBlockAsm:
  944. TESTL R9, R9
  945. JZ match_nolit_emitcopy_end_encodeBlockAsm
  946. MOVB $0x03, BL
  947. LEAL -4(BX)(R9*4), R9
  948. MOVB R9, (AX)
  949. MOVL BP, 1(AX)
  950. ADDQ $0x05, AX
  951. JMP match_nolit_emitcopy_end_encodeBlockAsm
  952. two_byte_offset_match_nolit_encodeBlockAsm:
  953. CMPL R9, $0x40
  954. JLE two_byte_offset_short_match_nolit_encodeBlockAsm
  955. MOVB $0xee, (AX)
  956. MOVW BP, 1(AX)
  957. LEAL -60(R9), R9
  958. ADDQ $0x03, AX
  959. emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short:
  960. MOVL R9, SI
  961. LEAL -4(R9), R9
  962. CMPL SI, $0x08
  963. JLE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short
  964. CMPL SI, $0x0c
  965. JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short
  966. CMPL BP, $0x00000800
  967. JLT repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short
  968. cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short:
  969. CMPL R9, $0x00000104
  970. JLT repeat_three_match_nolit_encodeBlockAsm_emit_copy_short
  971. CMPL R9, $0x00010100
  972. JLT repeat_four_match_nolit_encodeBlockAsm_emit_copy_short
  973. CMPL R9, $0x0100ffff
  974. JLT repeat_five_match_nolit_encodeBlockAsm_emit_copy_short
  975. LEAL -16842747(R9), R9
  976. MOVW $0x001d, (AX)
  977. MOVW $0xfffb, 2(AX)
  978. MOVB $0xff, 4(AX)
  979. ADDQ $0x05, AX
  980. JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short
  981. repeat_five_match_nolit_encodeBlockAsm_emit_copy_short:
  982. LEAL -65536(R9), R9
  983. MOVL R9, BP
  984. MOVW $0x001d, (AX)
  985. MOVW R9, 2(AX)
  986. SARL $0x10, BP
  987. MOVB BP, 4(AX)
  988. ADDQ $0x05, AX
  989. JMP match_nolit_emitcopy_end_encodeBlockAsm
  990. repeat_four_match_nolit_encodeBlockAsm_emit_copy_short:
  991. LEAL -256(R9), R9
  992. MOVW $0x0019, (AX)
  993. MOVW R9, 2(AX)
  994. ADDQ $0x04, AX
  995. JMP match_nolit_emitcopy_end_encodeBlockAsm
  996. repeat_three_match_nolit_encodeBlockAsm_emit_copy_short:
  997. LEAL -4(R9), R9
  998. MOVW $0x0015, (AX)
  999. MOVB R9, 2(AX)
  1000. ADDQ $0x03, AX
  1001. JMP match_nolit_emitcopy_end_encodeBlockAsm
  1002. repeat_two_match_nolit_encodeBlockAsm_emit_copy_short:
  1003. SHLL $0x02, R9
  1004. ORL $0x01, R9
  1005. MOVW R9, (AX)
  1006. ADDQ $0x02, AX
  1007. JMP match_nolit_emitcopy_end_encodeBlockAsm
  1008. repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short:
  1009. XORQ SI, SI
  1010. LEAL 1(SI)(R9*4), R9
  1011. MOVB BP, 1(AX)
  1012. SARL $0x08, BP
  1013. SHLL $0x05, BP
  1014. ORL BP, R9
  1015. MOVB R9, (AX)
  1016. ADDQ $0x02, AX
  1017. JMP match_nolit_emitcopy_end_encodeBlockAsm
  1018. JMP two_byte_offset_match_nolit_encodeBlockAsm
  1019. two_byte_offset_short_match_nolit_encodeBlockAsm:
  1020. CMPL R9, $0x0c
  1021. JGE emit_copy_three_match_nolit_encodeBlockAsm
  1022. CMPL BP, $0x00000800
  1023. JGE emit_copy_three_match_nolit_encodeBlockAsm
  1024. MOVB $0x01, BL
  1025. LEAL -16(BX)(R9*4), R9
  1026. MOVB BP, 1(AX)
  1027. SHRL $0x08, BP
  1028. SHLL $0x05, BP
  1029. ORL BP, R9
  1030. MOVB R9, (AX)
  1031. ADDQ $0x02, AX
  1032. JMP match_nolit_emitcopy_end_encodeBlockAsm
  1033. emit_copy_three_match_nolit_encodeBlockAsm:
  1034. MOVB $0x02, BL
  1035. LEAL -4(BX)(R9*4), R9
  1036. MOVB R9, (AX)
  1037. MOVW BP, 1(AX)
  1038. ADDQ $0x03, AX
  1039. match_nolit_emitcopy_end_encodeBlockAsm:
  1040. MOVL CX, 12(SP)
  1041. CMPL CX, 8(SP)
  1042. JGE emit_remainder_encodeBlockAsm
  1043. CMPQ AX, (SP)
  1044. JL match_nolit_dst_ok_encodeBlockAsm
  1045. MOVQ $0x00000000, ret+48(FP)
  1046. RET
  1047. match_nolit_dst_ok_encodeBlockAsm:
  1048. MOVQ -2(DX)(CX*1), SI
  1049. MOVQ $0x0000cf1bbcdcbf9b, BP
  1050. MOVQ SI, DI
  1051. SHRQ $0x10, SI
  1052. MOVQ SI, R8
  1053. SHLQ $0x10, DI
  1054. IMULQ BP, DI
  1055. SHRQ $0x32, DI
  1056. SHLQ $0x10, R8
  1057. IMULQ BP, R8
  1058. SHRQ $0x32, R8
  1059. LEAL -2(CX), R9
  1060. MOVL 24(SP)(R8*4), BP
  1061. MOVL R9, 24(SP)(DI*4)
  1062. MOVL CX, 24(SP)(R8*4)
  1063. CMPL (DX)(BP*1), SI
  1064. JEQ match_nolit_loop_encodeBlockAsm
  1065. INCL CX
  1066. JMP search_loop_encodeBlockAsm
  1067. emit_remainder_encodeBlockAsm:
  1068. MOVQ src_len+32(FP), CX
  1069. SUBL 12(SP), CX
  1070. LEAQ 4(AX)(CX*1), CX
  1071. CMPQ CX, (SP)
  1072. JL emit_remainder_ok_encodeBlockAsm
  1073. MOVQ $0x00000000, ret+48(FP)
  1074. RET
  1075. emit_remainder_ok_encodeBlockAsm:
  1076. MOVQ src_len+32(FP), CX
  1077. MOVL 12(SP), BX
  1078. CMPL BX, CX
  1079. JEQ emit_literal_done_emit_remainder_encodeBlockAsm
  1080. MOVL CX, BP
  1081. MOVL CX, 12(SP)
  1082. LEAQ (DX)(BX*1), CX
  1083. SUBL BX, BP
  1084. MOVL BP, DX
  1085. SUBL $0x01, DX
  1086. JC emit_literal_done_emit_remainder_encodeBlockAsm
  1087. CMPL DX, $0x3c
  1088. JLT one_byte_emit_remainder_encodeBlockAsm
  1089. CMPL DX, $0x00000100
  1090. JLT two_bytes_emit_remainder_encodeBlockAsm
  1091. CMPL DX, $0x00010000
  1092. JLT three_bytes_emit_remainder_encodeBlockAsm
  1093. CMPL DX, $0x01000000
  1094. JLT four_bytes_emit_remainder_encodeBlockAsm
  1095. MOVB $0xfc, (AX)
  1096. MOVL DX, 1(AX)
  1097. ADDQ $0x05, AX
  1098. JMP memmove_emit_remainder_encodeBlockAsm
  1099. four_bytes_emit_remainder_encodeBlockAsm:
  1100. MOVL DX, BX
  1101. SHRL $0x10, BX
  1102. MOVB $0xf8, (AX)
  1103. MOVW DX, 1(AX)
  1104. MOVB BL, 3(AX)
  1105. ADDQ $0x04, AX
  1106. JMP memmove_emit_remainder_encodeBlockAsm
  1107. three_bytes_emit_remainder_encodeBlockAsm:
  1108. MOVB $0xf4, (AX)
  1109. MOVW DX, 1(AX)
  1110. ADDQ $0x03, AX
  1111. JMP memmove_emit_remainder_encodeBlockAsm
  1112. two_bytes_emit_remainder_encodeBlockAsm:
  1113. MOVB $0xf0, (AX)
  1114. MOVB DL, 1(AX)
  1115. ADDQ $0x02, AX
  1116. JMP memmove_emit_remainder_encodeBlockAsm
  1117. one_byte_emit_remainder_encodeBlockAsm:
  1118. SHLB $0x02, DL
  1119. MOVB DL, (AX)
  1120. ADDQ $0x01, AX
  1121. memmove_emit_remainder_encodeBlockAsm:
  1122. LEAQ (AX)(BP*1), DX
  1123. MOVL BP, BX
  1124. NOP
  1125. emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_tail:
  1126. TESTQ BX, BX
  1127. JEQ memmove_end_copy_emit_remainder_encodeBlockAsm
  1128. CMPQ BX, $0x02
  1129. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2
  1130. CMPQ BX, $0x04
  1131. JB emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3
  1132. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4
  1133. CMPQ BX, $0x08
  1134. JB emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_5through7
  1135. JE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8
  1136. CMPQ BX, $0x10
  1137. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_9through16
  1138. CMPQ BX, $0x20
  1139. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32
  1140. CMPQ BX, $0x40
  1141. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64
  1142. CMPQ BX, $0x80
  1143. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_65through128
  1144. CMPQ BX, $0x00000100
  1145. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_129through256
  1146. JMP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_256through2048
  1147. emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2:
  1148. MOVB (CX), BP
  1149. MOVB -1(CX)(BX*1), CL
  1150. MOVB BP, (AX)
  1151. MOVB CL, -1(AX)(BX*1)
  1152. JMP memmove_end_copy_emit_remainder_encodeBlockAsm
  1153. emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4:
  1154. MOVL (CX), BP
  1155. MOVL BP, (AX)
  1156. JMP memmove_end_copy_emit_remainder_encodeBlockAsm
  1157. emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3:
  1158. MOVW (CX), BP
  1159. MOVB 2(CX), CL
  1160. MOVW BP, (AX)
  1161. MOVB CL, 2(AX)
  1162. JMP memmove_end_copy_emit_remainder_encodeBlockAsm
  1163. emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_5through7:
  1164. MOVL (CX), BP
  1165. MOVL -4(CX)(BX*1), CX
  1166. MOVL BP, (AX)
  1167. MOVL CX, -4(AX)(BX*1)
  1168. JMP memmove_end_copy_emit_remainder_encodeBlockAsm
  1169. emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8:
  1170. MOVQ (CX), BP
  1171. MOVQ BP, (AX)
  1172. JMP memmove_end_copy_emit_remainder_encodeBlockAsm
  1173. emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_9through16:
  1174. MOVQ (CX), BP
  1175. MOVQ -8(CX)(BX*1), CX
  1176. MOVQ BP, (AX)
  1177. MOVQ CX, -8(AX)(BX*1)
  1178. JMP memmove_end_copy_emit_remainder_encodeBlockAsm
  1179. emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32:
  1180. MOVOU (CX), X0
  1181. MOVOU -16(CX)(BX*1), X1
  1182. MOVOU X0, (AX)
  1183. MOVOU X1, -16(AX)(BX*1)
  1184. JMP memmove_end_copy_emit_remainder_encodeBlockAsm
  1185. emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64:
  1186. MOVOU (CX), X0
  1187. MOVOU 16(CX), X1
  1188. MOVOU -32(CX)(BX*1), X2
  1189. MOVOU -16(CX)(BX*1), X3
  1190. MOVOU X0, (AX)
  1191. MOVOU X1, 16(AX)
  1192. MOVOU X2, -32(AX)(BX*1)
  1193. MOVOU X3, -16(AX)(BX*1)
  1194. JMP memmove_end_copy_emit_remainder_encodeBlockAsm
  1195. emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_65through128:
  1196. MOVOU (CX), X0
  1197. MOVOU 16(CX), X1
  1198. MOVOU 32(CX), X2
  1199. MOVOU 48(CX), X3
  1200. MOVOU -64(CX)(BX*1), X12
  1201. MOVOU -48(CX)(BX*1), X13
  1202. MOVOU -32(CX)(BX*1), X14
  1203. MOVOU -16(CX)(BX*1), X15
  1204. MOVOU X0, (AX)
  1205. MOVOU X1, 16(AX)
  1206. MOVOU X2, 32(AX)
  1207. MOVOU X3, 48(AX)
  1208. MOVOU X12, -64(AX)(BX*1)
  1209. MOVOU X13, -48(AX)(BX*1)
  1210. MOVOU X14, -32(AX)(BX*1)
  1211. MOVOU X15, -16(AX)(BX*1)
  1212. JMP memmove_end_copy_emit_remainder_encodeBlockAsm
  1213. emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_129through256:
  1214. MOVOU (CX), X0
  1215. MOVOU 16(CX), X1
  1216. MOVOU 32(CX), X2
  1217. MOVOU 48(CX), X3
  1218. MOVOU 64(CX), X4
  1219. MOVOU 80(CX), X5
  1220. MOVOU 96(CX), X6
  1221. MOVOU 112(CX), X7
  1222. MOVOU -128(CX)(BX*1), X8
  1223. MOVOU -112(CX)(BX*1), X9
  1224. MOVOU -96(CX)(BX*1), X10
  1225. MOVOU -80(CX)(BX*1), X11
  1226. MOVOU -64(CX)(BX*1), X12
  1227. MOVOU -48(CX)(BX*1), X13
  1228. MOVOU -32(CX)(BX*1), X14
  1229. MOVOU -16(CX)(BX*1), X15
  1230. MOVOU X0, (AX)
  1231. MOVOU X1, 16(AX)
  1232. MOVOU X2, 32(AX)
  1233. MOVOU X3, 48(AX)
  1234. MOVOU X4, 64(AX)
  1235. MOVOU X5, 80(AX)
  1236. MOVOU X6, 96(AX)
  1237. MOVOU X7, 112(AX)
  1238. MOVOU X8, -128(AX)(BX*1)
  1239. MOVOU X9, -112(AX)(BX*1)
  1240. MOVOU X10, -96(AX)(BX*1)
  1241. MOVOU X11, -80(AX)(BX*1)
  1242. MOVOU X12, -64(AX)(BX*1)
  1243. MOVOU X13, -48(AX)(BX*1)
  1244. MOVOU X14, -32(AX)(BX*1)
  1245. MOVOU X15, -16(AX)(BX*1)
  1246. JMP memmove_end_copy_emit_remainder_encodeBlockAsm
  1247. emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_256through2048:
  1248. LEAQ -256(BX), BX
  1249. MOVOU (CX), X0
  1250. MOVOU 16(CX), X1
  1251. MOVOU 32(CX), X2
  1252. MOVOU 48(CX), X3
  1253. MOVOU 64(CX), X4
  1254. MOVOU 80(CX), X5
  1255. MOVOU 96(CX), X6
  1256. MOVOU 112(CX), X7
  1257. MOVOU 128(CX), X8
  1258. MOVOU 144(CX), X9
  1259. MOVOU 160(CX), X10
  1260. MOVOU 176(CX), X11
  1261. MOVOU 192(CX), X12
  1262. MOVOU 208(CX), X13
  1263. MOVOU 224(CX), X14
  1264. MOVOU 240(CX), X15
  1265. MOVOU X0, (AX)
  1266. MOVOU X1, 16(AX)
  1267. MOVOU X2, 32(AX)
  1268. MOVOU X3, 48(AX)
  1269. MOVOU X4, 64(AX)
  1270. MOVOU X5, 80(AX)
  1271. MOVOU X6, 96(AX)
  1272. MOVOU X7, 112(AX)
  1273. MOVOU X8, 128(AX)
  1274. MOVOU X9, 144(AX)
  1275. MOVOU X10, 160(AX)
  1276. MOVOU X11, 176(AX)
  1277. MOVOU X12, 192(AX)
  1278. MOVOU X13, 208(AX)
  1279. MOVOU X14, 224(AX)
  1280. MOVOU X15, 240(AX)
  1281. CMPQ BX, $0x00000100
  1282. LEAQ 256(CX), CX
  1283. LEAQ 256(AX), AX
  1284. JGE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_256through2048
  1285. JMP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_tail
  1286. memmove_end_copy_emit_remainder_encodeBlockAsm:
  1287. MOVQ DX, AX
  1288. emit_literal_done_emit_remainder_encodeBlockAsm:
  1289. MOVQ dst_base+0(FP), CX
  1290. SUBQ CX, AX
  1291. MOVQ AX, ret+48(FP)
  1292. RET
  1293. // func encodeBlockAsm12B(dst []byte, src []byte) int
  1294. // Requires: SSE2
  1295. TEXT ·encodeBlockAsm12B(SB), $16408-56
  1296. MOVQ dst_base+0(FP), AX
  1297. MOVQ $0x00000080, CX
  1298. LEAQ 24(SP), DX
  1299. PXOR X0, X0
  1300. zero_loop_encodeBlockAsm12B:
  1301. MOVOU X0, (DX)
  1302. MOVOU X0, 16(DX)
  1303. MOVOU X0, 32(DX)
  1304. MOVOU X0, 48(DX)
  1305. MOVOU X0, 64(DX)
  1306. MOVOU X0, 80(DX)
  1307. MOVOU X0, 96(DX)
  1308. MOVOU X0, 112(DX)
  1309. ADDQ $0x80, DX
  1310. DECQ CX
  1311. JNZ zero_loop_encodeBlockAsm12B
  1312. MOVL $0x00000000, 12(SP)
  1313. MOVQ src_len+32(FP), CX
  1314. LEAQ -5(CX), DX
  1315. LEAQ -8(CX), BP
  1316. MOVL BP, 8(SP)
  1317. SHRQ $0x05, CX
  1318. SUBL CX, DX
  1319. LEAQ (AX)(DX*1), DX
  1320. MOVQ DX, (SP)
  1321. MOVL $0x00000001, CX
  1322. MOVL CX, 16(SP)
  1323. MOVQ src_base+24(FP), DX
  1324. search_loop_encodeBlockAsm12B:
  1325. MOVQ (DX)(CX*1), SI
  1326. MOVL CX, BP
  1327. SUBL 12(SP), BP
  1328. SHRL $0x05, BP
  1329. LEAL 4(CX)(BP*1), BP
  1330. MOVL 8(SP), DI
  1331. CMPL BP, DI
  1332. JGT emit_remainder_encodeBlockAsm12B
  1333. MOVL BP, 20(SP)
  1334. MOVQ $0x000000cf1bbcdcbb, R8
  1335. MOVQ SI, R9
  1336. MOVQ SI, R10
  1337. SHRQ $0x08, R10
  1338. SHLQ $0x18, R9
  1339. IMULQ R8, R9
  1340. SHRQ $0x34, R9
  1341. SHLQ $0x18, R10
  1342. IMULQ R8, R10
  1343. SHRQ $0x34, R10
  1344. MOVL 24(SP)(R9*4), BP
  1345. MOVL 24(SP)(R10*4), DI
  1346. MOVL CX, 24(SP)(R9*4)
  1347. LEAL 1(CX), R9
  1348. MOVL R9, 24(SP)(R10*4)
  1349. MOVQ SI, R9
  1350. SHRQ $0x10, R9
  1351. SHLQ $0x18, R9
  1352. IMULQ R8, R9
  1353. SHRQ $0x34, R9
  1354. MOVL CX, R8
  1355. SUBL 16(SP), R8
  1356. MOVL 1(DX)(R8*1), R10
  1357. MOVQ SI, R8
  1358. SHRQ $0x08, R8
  1359. CMPL R8, R10
  1360. JNE no_repeat_found_encodeBlockAsm12B
  1361. LEAL 1(CX), SI
  1362. MOVL 12(SP), DI
  1363. MOVL SI, BP
  1364. SUBL 16(SP), BP
  1365. JZ repeat_extend_back_end_encodeBlockAsm12B
  1366. repeat_extend_back_loop_encodeBlockAsm12B:
  1367. CMPL SI, DI
  1368. JLE repeat_extend_back_end_encodeBlockAsm12B
  1369. MOVB -1(DX)(BP*1), BL
  1370. MOVB -1(DX)(SI*1), R8
  1371. CMPB BL, R8
  1372. JNE repeat_extend_back_end_encodeBlockAsm12B
  1373. LEAL -1(SI), SI
  1374. DECL BP
  1375. JNZ repeat_extend_back_loop_encodeBlockAsm12B
  1376. repeat_extend_back_end_encodeBlockAsm12B:
  1377. MOVL 12(SP), BP
  1378. CMPL BP, SI
  1379. JEQ emit_literal_done_repeat_emit_encodeBlockAsm12B
  1380. MOVL SI, R8
  1381. MOVL SI, 12(SP)
  1382. LEAQ (DX)(BP*1), R9
  1383. SUBL BP, R8
  1384. MOVL R8, BP
  1385. SUBL $0x01, BP
  1386. JC emit_literal_done_repeat_emit_encodeBlockAsm12B
  1387. CMPL BP, $0x3c
  1388. JLT one_byte_repeat_emit_encodeBlockAsm12B
  1389. CMPL BP, $0x00000100
  1390. JLT two_bytes_repeat_emit_encodeBlockAsm12B
  1391. CMPL BP, $0x00010000
  1392. JLT three_bytes_repeat_emit_encodeBlockAsm12B
  1393. CMPL BP, $0x01000000
  1394. JLT four_bytes_repeat_emit_encodeBlockAsm12B
  1395. MOVB $0xfc, (AX)
  1396. MOVL BP, 1(AX)
  1397. ADDQ $0x05, AX
  1398. JMP memmove_repeat_emit_encodeBlockAsm12B
  1399. four_bytes_repeat_emit_encodeBlockAsm12B:
  1400. MOVL BP, R10
  1401. SHRL $0x10, R10
  1402. MOVB $0xf8, (AX)
  1403. MOVW BP, 1(AX)
  1404. MOVB R10, 3(AX)
  1405. ADDQ $0x04, AX
  1406. JMP memmove_repeat_emit_encodeBlockAsm12B
  1407. three_bytes_repeat_emit_encodeBlockAsm12B:
  1408. MOVB $0xf4, (AX)
  1409. MOVW BP, 1(AX)
  1410. ADDQ $0x03, AX
  1411. JMP memmove_repeat_emit_encodeBlockAsm12B
  1412. two_bytes_repeat_emit_encodeBlockAsm12B:
  1413. MOVB $0xf0, (AX)
  1414. MOVB BP, 1(AX)
  1415. ADDQ $0x02, AX
  1416. JMP memmove_repeat_emit_encodeBlockAsm12B
  1417. one_byte_repeat_emit_encodeBlockAsm12B:
  1418. SHLB $0x02, BP
  1419. MOVB BP, (AX)
  1420. ADDQ $0x01, AX
  1421. memmove_repeat_emit_encodeBlockAsm12B:
  1422. LEAQ (AX)(R8*1), BP
  1423. NOP
  1424. emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_tail:
  1425. TESTQ R8, R8
  1426. JEQ memmove_end_copy_repeat_emit_encodeBlockAsm12B
  1427. CMPQ R8, $0x02
  1428. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_1or2
  1429. CMPQ R8, $0x04
  1430. JB emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_3
  1431. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_4
  1432. CMPQ R8, $0x08
  1433. JB emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_5through7
  1434. JE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8
  1435. CMPQ R8, $0x10
  1436. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_9through16
  1437. CMPQ R8, $0x20
  1438. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32
  1439. CMPQ R8, $0x40
  1440. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64
  1441. CMPQ R8, $0x80
  1442. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_65through128
  1443. CMPQ R8, $0x00000100
  1444. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_129through256
  1445. JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_256through2048
  1446. emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_1or2:
  1447. MOVB (R9), R10
  1448. MOVB -1(R9)(R8*1), R9
  1449. MOVB R10, (AX)
  1450. MOVB R9, -1(AX)(R8*1)
  1451. JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B
  1452. emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_4:
  1453. MOVL (R9), R10
  1454. MOVL R10, (AX)
  1455. JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B
  1456. emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_3:
  1457. MOVW (R9), R10
  1458. MOVB 2(R9), R9
  1459. MOVW R10, (AX)
  1460. MOVB R9, 2(AX)
  1461. JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B
  1462. emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_5through7:
  1463. MOVL (R9), R10
  1464. MOVL -4(R9)(R8*1), R9
  1465. MOVL R10, (AX)
  1466. MOVL R9, -4(AX)(R8*1)
  1467. JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B
  1468. emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8:
  1469. MOVQ (R9), R10
  1470. MOVQ R10, (AX)
  1471. JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B
  1472. emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_9through16:
  1473. MOVQ (R9), R10
  1474. MOVQ -8(R9)(R8*1), R9
  1475. MOVQ R10, (AX)
  1476. MOVQ R9, -8(AX)(R8*1)
  1477. JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B
  1478. emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32:
  1479. MOVOU (R9), X0
  1480. MOVOU -16(R9)(R8*1), X1
  1481. MOVOU X0, (AX)
  1482. MOVOU X1, -16(AX)(R8*1)
  1483. JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B
  1484. emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64:
  1485. MOVOU (R9), X0
  1486. MOVOU 16(R9), X1
  1487. MOVOU -32(R9)(R8*1), X2
  1488. MOVOU -16(R9)(R8*1), X3
  1489. MOVOU X0, (AX)
  1490. MOVOU X1, 16(AX)
  1491. MOVOU X2, -32(AX)(R8*1)
  1492. MOVOU X3, -16(AX)(R8*1)
  1493. JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B
  1494. emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_65through128:
  1495. MOVOU (R9), X0
  1496. MOVOU 16(R9), X1
  1497. MOVOU 32(R9), X2
  1498. MOVOU 48(R9), X3
  1499. MOVOU -64(R9)(R8*1), X12
  1500. MOVOU -48(R9)(R8*1), X13
  1501. MOVOU -32(R9)(R8*1), X14
  1502. MOVOU -16(R9)(R8*1), X15
  1503. MOVOU X0, (AX)
  1504. MOVOU X1, 16(AX)
  1505. MOVOU X2, 32(AX)
  1506. MOVOU X3, 48(AX)
  1507. MOVOU X12, -64(AX)(R8*1)
  1508. MOVOU X13, -48(AX)(R8*1)
  1509. MOVOU X14, -32(AX)(R8*1)
  1510. MOVOU X15, -16(AX)(R8*1)
  1511. JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B
  1512. emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_129through256:
  1513. MOVOU (R9), X0
  1514. MOVOU 16(R9), X1
  1515. MOVOU 32(R9), X2
  1516. MOVOU 48(R9), X3
  1517. MOVOU 64(R9), X4
  1518. MOVOU 80(R9), X5
  1519. MOVOU 96(R9), X6
  1520. MOVOU 112(R9), X7
  1521. MOVOU -128(R9)(R8*1), X8
  1522. MOVOU -112(R9)(R8*1), X9
  1523. MOVOU -96(R9)(R8*1), X10
  1524. MOVOU -80(R9)(R8*1), X11
  1525. MOVOU -64(R9)(R8*1), X12
  1526. MOVOU -48(R9)(R8*1), X13
  1527. MOVOU -32(R9)(R8*1), X14
  1528. MOVOU -16(R9)(R8*1), X15
  1529. MOVOU X0, (AX)
  1530. MOVOU X1, 16(AX)
  1531. MOVOU X2, 32(AX)
  1532. MOVOU X3, 48(AX)
  1533. MOVOU X4, 64(AX)
  1534. MOVOU X5, 80(AX)
  1535. MOVOU X6, 96(AX)
  1536. MOVOU X7, 112(AX)
  1537. MOVOU X8, -128(AX)(R8*1)
  1538. MOVOU X9, -112(AX)(R8*1)
  1539. MOVOU X10, -96(AX)(R8*1)
  1540. MOVOU X11, -80(AX)(R8*1)
  1541. MOVOU X12, -64(AX)(R8*1)
  1542. MOVOU X13, -48(AX)(R8*1)
  1543. MOVOU X14, -32(AX)(R8*1)
  1544. MOVOU X15, -16(AX)(R8*1)
  1545. JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B
  1546. emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_256through2048:
  1547. LEAQ -256(R8), R8
  1548. MOVOU (R9), X0
  1549. MOVOU 16(R9), X1
  1550. MOVOU 32(R9), X2
  1551. MOVOU 48(R9), X3
  1552. MOVOU 64(R9), X4
  1553. MOVOU 80(R9), X5
  1554. MOVOU 96(R9), X6
  1555. MOVOU 112(R9), X7
  1556. MOVOU 128(R9), X8
  1557. MOVOU 144(R9), X9
  1558. MOVOU 160(R9), X10
  1559. MOVOU 176(R9), X11
  1560. MOVOU 192(R9), X12
  1561. MOVOU 208(R9), X13
  1562. MOVOU 224(R9), X14
  1563. MOVOU 240(R9), X15
  1564. MOVOU X0, (AX)
  1565. MOVOU X1, 16(AX)
  1566. MOVOU X2, 32(AX)
  1567. MOVOU X3, 48(AX)
  1568. MOVOU X4, 64(AX)
  1569. MOVOU X5, 80(AX)
  1570. MOVOU X6, 96(AX)
  1571. MOVOU X7, 112(AX)
  1572. MOVOU X8, 128(AX)
  1573. MOVOU X9, 144(AX)
  1574. MOVOU X10, 160(AX)
  1575. MOVOU X11, 176(AX)
  1576. MOVOU X12, 192(AX)
  1577. MOVOU X13, 208(AX)
  1578. MOVOU X14, 224(AX)
  1579. MOVOU X15, 240(AX)
  1580. CMPQ R8, $0x00000100
  1581. LEAQ 256(R9), R9
  1582. LEAQ 256(AX), AX
  1583. JGE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_256through2048
  1584. JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_tail
  1585. memmove_end_copy_repeat_emit_encodeBlockAsm12B:
  1586. MOVQ BP, AX
  1587. emit_literal_done_repeat_emit_encodeBlockAsm12B:
  1588. ADDL $0x05, CX
  1589. MOVL CX, BP
  1590. SUBL 16(SP), BP
  1591. MOVQ src_len+32(FP), R8
  1592. SUBL CX, R8
  1593. LEAQ (DX)(CX*1), R9
  1594. LEAQ (DX)(BP*1), BP
  1595. XORL R11, R11
  1596. CMPL R8, $0x08
  1597. JL matchlen_single_repeat_extend
  1598. matchlen_loopback_repeat_extend:
  1599. MOVQ (R9)(R11*1), R10
  1600. XORQ (BP)(R11*1), R10
  1601. TESTQ R10, R10
  1602. JZ matchlen_loop_repeat_extend
  1603. BSFQ R10, R10
  1604. SARQ $0x03, R10
  1605. LEAL (R11)(R10*1), R11
  1606. JMP repeat_extend_forward_end_encodeBlockAsm12B
  1607. matchlen_loop_repeat_extend:
  1608. LEAL -8(R8), R8
  1609. LEAL 8(R11), R11
  1610. CMPL R8, $0x08
  1611. JGE matchlen_loopback_repeat_extend
  1612. matchlen_single_repeat_extend:
  1613. TESTL R8, R8
  1614. JZ repeat_extend_forward_end_encodeBlockAsm12B
  1615. matchlen_single_loopback_repeat_extend:
  1616. MOVB (R9)(R11*1), R10
  1617. CMPB (BP)(R11*1), R10
  1618. JNE repeat_extend_forward_end_encodeBlockAsm12B
  1619. LEAL 1(R11), R11
  1620. DECL R8
  1621. JNZ matchlen_single_loopback_repeat_extend
  1622. repeat_extend_forward_end_encodeBlockAsm12B:
  1623. ADDL R11, CX
  1624. MOVL CX, BP
  1625. SUBL SI, BP
  1626. MOVL 16(SP), SI
  1627. TESTL DI, DI
  1628. JZ repeat_as_copy_encodeBlockAsm12B
  1629. emit_repeat_again_match_repeat_encodeBlockAsm12B:
  1630. MOVL BP, DI
  1631. LEAL -4(BP), BP
  1632. CMPL DI, $0x08
  1633. JLE repeat_two_match_repeat_encodeBlockAsm12B
  1634. CMPL DI, $0x0c
  1635. JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm12B
  1636. CMPL SI, $0x00000800
  1637. JLT repeat_two_offset_match_repeat_encodeBlockAsm12B
  1638. cant_repeat_two_offset_match_repeat_encodeBlockAsm12B:
  1639. CMPL BP, $0x00000104
  1640. JLT repeat_three_match_repeat_encodeBlockAsm12B
  1641. CMPL BP, $0x00010100
  1642. JLT repeat_four_match_repeat_encodeBlockAsm12B
  1643. CMPL BP, $0x0100ffff
  1644. JLT repeat_five_match_repeat_encodeBlockAsm12B
  1645. LEAL -16842747(BP), BP
  1646. MOVW $0x001d, (AX)
  1647. MOVW $0xfffb, 2(AX)
  1648. MOVB $0xff, 4(AX)
  1649. ADDQ $0x05, AX
  1650. JMP emit_repeat_again_match_repeat_encodeBlockAsm12B
  1651. repeat_five_match_repeat_encodeBlockAsm12B:
  1652. LEAL -65536(BP), BP
  1653. MOVL BP, SI
  1654. MOVW $0x001d, (AX)
  1655. MOVW BP, 2(AX)
  1656. SARL $0x10, SI
  1657. MOVB SI, 4(AX)
  1658. ADDQ $0x05, AX
  1659. JMP repeat_end_emit_encodeBlockAsm12B
  1660. repeat_four_match_repeat_encodeBlockAsm12B:
  1661. LEAL -256(BP), BP
  1662. MOVW $0x0019, (AX)
  1663. MOVW BP, 2(AX)
  1664. ADDQ $0x04, AX
  1665. JMP repeat_end_emit_encodeBlockAsm12B
  1666. repeat_three_match_repeat_encodeBlockAsm12B:
  1667. LEAL -4(BP), BP
  1668. MOVW $0x0015, (AX)
  1669. MOVB BP, 2(AX)
  1670. ADDQ $0x03, AX
  1671. JMP repeat_end_emit_encodeBlockAsm12B
  1672. repeat_two_match_repeat_encodeBlockAsm12B:
  1673. SHLL $0x02, BP
  1674. ORL $0x01, BP
  1675. MOVW BP, (AX)
  1676. ADDQ $0x02, AX
  1677. JMP repeat_end_emit_encodeBlockAsm12B
  1678. repeat_two_offset_match_repeat_encodeBlockAsm12B:
  1679. XORQ DI, DI
  1680. LEAL 1(DI)(BP*4), BP
  1681. MOVB SI, 1(AX)
  1682. SARL $0x08, SI
  1683. SHLL $0x05, SI
  1684. ORL SI, BP
  1685. MOVB BP, (AX)
  1686. ADDQ $0x02, AX
  1687. JMP repeat_end_emit_encodeBlockAsm12B
  1688. repeat_as_copy_encodeBlockAsm12B:
  1689. CMPL SI, $0x00010000
  1690. JL two_byte_offset_repeat_as_copy_encodeBlockAsm12B
  1691. four_bytes_loop_back_repeat_as_copy_encodeBlockAsm12B:
  1692. CMPL BP, $0x40
  1693. JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm12B
  1694. MOVB $0xff, (AX)
  1695. MOVL SI, 1(AX)
  1696. LEAL -64(BP), BP
  1697. ADDQ $0x05, AX
  1698. CMPL BP, $0x04
  1699. JL four_bytes_remain_repeat_as_copy_encodeBlockAsm12B
  1700. emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy:
  1701. MOVL BP, DI
  1702. LEAL -4(BP), BP
  1703. CMPL DI, $0x08
  1704. JLE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy
  1705. CMPL DI, $0x0c
  1706. JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy
  1707. CMPL SI, $0x00000800
  1708. JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy
  1709. cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy:
  1710. CMPL BP, $0x00000104
  1711. JLT repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy
  1712. CMPL BP, $0x00010100
  1713. JLT repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy
  1714. CMPL BP, $0x0100ffff
  1715. JLT repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy
  1716. LEAL -16842747(BP), BP
  1717. MOVW $0x001d, (AX)
  1718. MOVW $0xfffb, 2(AX)
  1719. MOVB $0xff, 4(AX)
  1720. ADDQ $0x05, AX
  1721. JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy
  1722. repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy:
  1723. LEAL -65536(BP), BP
  1724. MOVL BP, SI
  1725. MOVW $0x001d, (AX)
  1726. MOVW BP, 2(AX)
  1727. SARL $0x10, SI
  1728. MOVB SI, 4(AX)
  1729. ADDQ $0x05, AX
  1730. JMP repeat_end_emit_encodeBlockAsm12B
  1731. repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy:
  1732. LEAL -256(BP), BP
  1733. MOVW $0x0019, (AX)
  1734. MOVW BP, 2(AX)
  1735. ADDQ $0x04, AX
  1736. JMP repeat_end_emit_encodeBlockAsm12B
  1737. repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy:
  1738. LEAL -4(BP), BP
  1739. MOVW $0x0015, (AX)
  1740. MOVB BP, 2(AX)
  1741. ADDQ $0x03, AX
  1742. JMP repeat_end_emit_encodeBlockAsm12B
  1743. repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy:
  1744. SHLL $0x02, BP
  1745. ORL $0x01, BP
  1746. MOVW BP, (AX)
  1747. ADDQ $0x02, AX
  1748. JMP repeat_end_emit_encodeBlockAsm12B
  1749. repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy:
  1750. XORQ DI, DI
  1751. LEAL 1(DI)(BP*4), BP
  1752. MOVB SI, 1(AX)
  1753. SARL $0x08, SI
  1754. SHLL $0x05, SI
  1755. ORL SI, BP
  1756. MOVB BP, (AX)
  1757. ADDQ $0x02, AX
  1758. JMP repeat_end_emit_encodeBlockAsm12B
  1759. JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm12B
  1760. four_bytes_remain_repeat_as_copy_encodeBlockAsm12B:
  1761. TESTL BP, BP
  1762. JZ repeat_end_emit_encodeBlockAsm12B
  1763. MOVB $0x03, BL
  1764. LEAL -4(BX)(BP*4), BP
  1765. MOVB BP, (AX)
  1766. MOVL SI, 1(AX)
  1767. ADDQ $0x05, AX
  1768. JMP repeat_end_emit_encodeBlockAsm12B
  1769. two_byte_offset_repeat_as_copy_encodeBlockAsm12B:
  1770. CMPL BP, $0x40
  1771. JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B
  1772. MOVB $0xee, (AX)
  1773. MOVW SI, 1(AX)
  1774. LEAL -60(BP), BP
  1775. ADDQ $0x03, AX
  1776. emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
  1777. MOVL BP, DI
  1778. LEAL -4(BP), BP
  1779. CMPL DI, $0x08
  1780. JLE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
  1781. CMPL DI, $0x0c
  1782. JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
  1783. CMPL SI, $0x00000800
  1784. JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
  1785. cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
  1786. CMPL BP, $0x00000104
  1787. JLT repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
  1788. CMPL BP, $0x00010100
  1789. JLT repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
  1790. CMPL BP, $0x0100ffff
  1791. JLT repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
  1792. LEAL -16842747(BP), BP
  1793. MOVW $0x001d, (AX)
  1794. MOVW $0xfffb, 2(AX)
  1795. MOVB $0xff, 4(AX)
  1796. ADDQ $0x05, AX
  1797. JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
  1798. repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
  1799. LEAL -65536(BP), BP
  1800. MOVL BP, SI
  1801. MOVW $0x001d, (AX)
  1802. MOVW BP, 2(AX)
  1803. SARL $0x10, SI
  1804. MOVB SI, 4(AX)
  1805. ADDQ $0x05, AX
  1806. JMP repeat_end_emit_encodeBlockAsm12B
  1807. repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
  1808. LEAL -256(BP), BP
  1809. MOVW $0x0019, (AX)
  1810. MOVW BP, 2(AX)
  1811. ADDQ $0x04, AX
  1812. JMP repeat_end_emit_encodeBlockAsm12B
  1813. repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
  1814. LEAL -4(BP), BP
  1815. MOVW $0x0015, (AX)
  1816. MOVB BP, 2(AX)
  1817. ADDQ $0x03, AX
  1818. JMP repeat_end_emit_encodeBlockAsm12B
  1819. repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
  1820. SHLL $0x02, BP
  1821. ORL $0x01, BP
  1822. MOVW BP, (AX)
  1823. ADDQ $0x02, AX
  1824. JMP repeat_end_emit_encodeBlockAsm12B
  1825. repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
  1826. XORQ DI, DI
  1827. LEAL 1(DI)(BP*4), BP
  1828. MOVB SI, 1(AX)
  1829. SARL $0x08, SI
  1830. SHLL $0x05, SI
  1831. ORL SI, BP
  1832. MOVB BP, (AX)
  1833. ADDQ $0x02, AX
  1834. JMP repeat_end_emit_encodeBlockAsm12B
  1835. JMP two_byte_offset_repeat_as_copy_encodeBlockAsm12B
  1836. two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B:
  1837. CMPL BP, $0x0c
  1838. JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12B
  1839. CMPL SI, $0x00000800
  1840. JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12B
  1841. MOVB $0x01, BL
  1842. LEAL -16(BX)(BP*4), BP
  1843. MOVB SI, 1(AX)
  1844. SHRL $0x08, SI
  1845. SHLL $0x05, SI
  1846. ORL SI, BP
  1847. MOVB BP, (AX)
  1848. ADDQ $0x02, AX
  1849. JMP repeat_end_emit_encodeBlockAsm12B
  1850. emit_copy_three_repeat_as_copy_encodeBlockAsm12B:
  1851. MOVB $0x02, BL
  1852. LEAL -4(BX)(BP*4), BP
  1853. MOVB BP, (AX)
  1854. MOVW SI, 1(AX)
  1855. ADDQ $0x03, AX
  1856. repeat_end_emit_encodeBlockAsm12B:
  1857. MOVL CX, 12(SP)
  1858. CMPL CX, 8(SP)
  1859. JGE emit_remainder_encodeBlockAsm12B
  1860. JMP search_loop_encodeBlockAsm12B
  1861. no_repeat_found_encodeBlockAsm12B:
  1862. CMPL (DX)(BP*1), SI
  1863. JEQ candidate_match_encodeBlockAsm12B
  1864. SHRQ $0x08, SI
  1865. MOVL 24(SP)(R9*4), BP
  1866. LEAL 2(CX), R8
  1867. CMPL (DX)(DI*1), SI
  1868. JEQ candidate2_match_encodeBlockAsm12B
  1869. MOVL R8, 24(SP)(R9*4)
  1870. SHRQ $0x08, SI
  1871. CMPL (DX)(BP*1), SI
  1872. JEQ candidate3_match_encodeBlockAsm12B
  1873. MOVL 20(SP), CX
  1874. JMP search_loop_encodeBlockAsm12B
  1875. candidate3_match_encodeBlockAsm12B:
  1876. ADDL $0x02, CX
  1877. JMP candidate_match_encodeBlockAsm12B
  1878. candidate2_match_encodeBlockAsm12B:
  1879. MOVL R8, 24(SP)(R9*4)
  1880. INCL CX
  1881. MOVL DI, BP
  1882. candidate_match_encodeBlockAsm12B:
  1883. MOVL 12(SP), SI
  1884. TESTL BP, BP
  1885. JZ match_extend_back_end_encodeBlockAsm12B
  1886. match_extend_back_loop_encodeBlockAsm12B:
  1887. CMPL CX, SI
  1888. JLE match_extend_back_end_encodeBlockAsm12B
  1889. MOVB -1(DX)(BP*1), BL
  1890. MOVB -1(DX)(CX*1), DI
  1891. CMPB BL, DI
  1892. JNE match_extend_back_end_encodeBlockAsm12B
  1893. LEAL -1(CX), CX
  1894. DECL BP
  1895. JZ match_extend_back_end_encodeBlockAsm12B
  1896. JMP match_extend_back_loop_encodeBlockAsm12B
  1897. match_extend_back_end_encodeBlockAsm12B:
  1898. MOVL CX, SI
  1899. SUBL 12(SP), SI
  1900. LEAQ 4(AX)(SI*1), SI
  1901. CMPQ SI, (SP)
  1902. JL match_dst_size_check_encodeBlockAsm12B
  1903. MOVQ $0x00000000, ret+48(FP)
  1904. RET
  1905. match_dst_size_check_encodeBlockAsm12B:
  1906. MOVL CX, SI
  1907. MOVL 12(SP), DI
  1908. CMPL DI, SI
  1909. JEQ emit_literal_done_match_emit_encodeBlockAsm12B
  1910. MOVL SI, R8
  1911. MOVL SI, 12(SP)
  1912. LEAQ (DX)(DI*1), SI
  1913. SUBL DI, R8
  1914. MOVL R8, DI
  1915. SUBL $0x01, DI
  1916. JC emit_literal_done_match_emit_encodeBlockAsm12B
  1917. CMPL DI, $0x3c
  1918. JLT one_byte_match_emit_encodeBlockAsm12B
  1919. CMPL DI, $0x00000100
  1920. JLT two_bytes_match_emit_encodeBlockAsm12B
  1921. CMPL DI, $0x00010000
  1922. JLT three_bytes_match_emit_encodeBlockAsm12B
  1923. CMPL DI, $0x01000000
  1924. JLT four_bytes_match_emit_encodeBlockAsm12B
  1925. MOVB $0xfc, (AX)
  1926. MOVL DI, 1(AX)
  1927. ADDQ $0x05, AX
  1928. JMP memmove_match_emit_encodeBlockAsm12B
  1929. four_bytes_match_emit_encodeBlockAsm12B:
  1930. MOVL DI, R9
  1931. SHRL $0x10, R9
  1932. MOVB $0xf8, (AX)
  1933. MOVW DI, 1(AX)
  1934. MOVB R9, 3(AX)
  1935. ADDQ $0x04, AX
  1936. JMP memmove_match_emit_encodeBlockAsm12B
  1937. three_bytes_match_emit_encodeBlockAsm12B:
  1938. MOVB $0xf4, (AX)
  1939. MOVW DI, 1(AX)
  1940. ADDQ $0x03, AX
  1941. JMP memmove_match_emit_encodeBlockAsm12B
  1942. two_bytes_match_emit_encodeBlockAsm12B:
  1943. MOVB $0xf0, (AX)
  1944. MOVB DI, 1(AX)
  1945. ADDQ $0x02, AX
  1946. JMP memmove_match_emit_encodeBlockAsm12B
  1947. one_byte_match_emit_encodeBlockAsm12B:
  1948. SHLB $0x02, DI
  1949. MOVB DI, (AX)
  1950. ADDQ $0x01, AX
  1951. memmove_match_emit_encodeBlockAsm12B:
  1952. LEAQ (AX)(R8*1), DI
  1953. NOP
  1954. emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_tail:
  1955. TESTQ R8, R8
  1956. JEQ memmove_end_copy_match_emit_encodeBlockAsm12B
  1957. CMPQ R8, $0x02
  1958. JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_1or2
  1959. CMPQ R8, $0x04
  1960. JB emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_3
  1961. JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_4
  1962. CMPQ R8, $0x08
  1963. JB emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_5through7
  1964. JE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8
  1965. CMPQ R8, $0x10
  1966. JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_9through16
  1967. CMPQ R8, $0x20
  1968. JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32
  1969. CMPQ R8, $0x40
  1970. JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64
  1971. CMPQ R8, $0x80
  1972. JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_65through128
  1973. CMPQ R8, $0x00000100
  1974. JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_129through256
  1975. JMP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_256through2048
  1976. emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_1or2:
  1977. MOVB (SI), R9
  1978. MOVB -1(SI)(R8*1), SI
  1979. MOVB R9, (AX)
  1980. MOVB SI, -1(AX)(R8*1)
  1981. JMP memmove_end_copy_match_emit_encodeBlockAsm12B
  1982. emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_4:
  1983. MOVL (SI), R9
  1984. MOVL R9, (AX)
  1985. JMP memmove_end_copy_match_emit_encodeBlockAsm12B
  1986. emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_3:
  1987. MOVW (SI), R9
  1988. MOVB 2(SI), SI
  1989. MOVW R9, (AX)
  1990. MOVB SI, 2(AX)
  1991. JMP memmove_end_copy_match_emit_encodeBlockAsm12B
  1992. emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_5through7:
  1993. MOVL (SI), R9
  1994. MOVL -4(SI)(R8*1), SI
  1995. MOVL R9, (AX)
  1996. MOVL SI, -4(AX)(R8*1)
  1997. JMP memmove_end_copy_match_emit_encodeBlockAsm12B
  1998. emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8:
  1999. MOVQ (SI), R9
  2000. MOVQ R9, (AX)
  2001. JMP memmove_end_copy_match_emit_encodeBlockAsm12B
  2002. emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_9through16:
  2003. MOVQ (SI), R9
  2004. MOVQ -8(SI)(R8*1), SI
  2005. MOVQ R9, (AX)
  2006. MOVQ SI, -8(AX)(R8*1)
  2007. JMP memmove_end_copy_match_emit_encodeBlockAsm12B
  2008. emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32:
  2009. MOVOU (SI), X0
  2010. MOVOU -16(SI)(R8*1), X1
  2011. MOVOU X0, (AX)
  2012. MOVOU X1, -16(AX)(R8*1)
  2013. JMP memmove_end_copy_match_emit_encodeBlockAsm12B
  2014. emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64:
  2015. MOVOU (SI), X0
  2016. MOVOU 16(SI), X1
  2017. MOVOU -32(SI)(R8*1), X2
  2018. MOVOU -16(SI)(R8*1), X3
  2019. MOVOU X0, (AX)
  2020. MOVOU X1, 16(AX)
  2021. MOVOU X2, -32(AX)(R8*1)
  2022. MOVOU X3, -16(AX)(R8*1)
  2023. JMP memmove_end_copy_match_emit_encodeBlockAsm12B
  2024. emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_65through128:
  2025. MOVOU (SI), X0
  2026. MOVOU 16(SI), X1
  2027. MOVOU 32(SI), X2
  2028. MOVOU 48(SI), X3
  2029. MOVOU -64(SI)(R8*1), X12
  2030. MOVOU -48(SI)(R8*1), X13
  2031. MOVOU -32(SI)(R8*1), X14
  2032. MOVOU -16(SI)(R8*1), X15
  2033. MOVOU X0, (AX)
  2034. MOVOU X1, 16(AX)
  2035. MOVOU X2, 32(AX)
  2036. MOVOU X3, 48(AX)
  2037. MOVOU X12, -64(AX)(R8*1)
  2038. MOVOU X13, -48(AX)(R8*1)
  2039. MOVOU X14, -32(AX)(R8*1)
  2040. MOVOU X15, -16(AX)(R8*1)
  2041. JMP memmove_end_copy_match_emit_encodeBlockAsm12B
  2042. emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_129through256:
  2043. MOVOU (SI), X0
  2044. MOVOU 16(SI), X1
  2045. MOVOU 32(SI), X2
  2046. MOVOU 48(SI), X3
  2047. MOVOU 64(SI), X4
  2048. MOVOU 80(SI), X5
  2049. MOVOU 96(SI), X6
  2050. MOVOU 112(SI), X7
  2051. MOVOU -128(SI)(R8*1), X8
  2052. MOVOU -112(SI)(R8*1), X9
  2053. MOVOU -96(SI)(R8*1), X10
  2054. MOVOU -80(SI)(R8*1), X11
  2055. MOVOU -64(SI)(R8*1), X12
  2056. MOVOU -48(SI)(R8*1), X13
  2057. MOVOU -32(SI)(R8*1), X14
  2058. MOVOU -16(SI)(R8*1), X15
  2059. MOVOU X0, (AX)
  2060. MOVOU X1, 16(AX)
  2061. MOVOU X2, 32(AX)
  2062. MOVOU X3, 48(AX)
  2063. MOVOU X4, 64(AX)
  2064. MOVOU X5, 80(AX)
  2065. MOVOU X6, 96(AX)
  2066. MOVOU X7, 112(AX)
  2067. MOVOU X8, -128(AX)(R8*1)
  2068. MOVOU X9, -112(AX)(R8*1)
  2069. MOVOU X10, -96(AX)(R8*1)
  2070. MOVOU X11, -80(AX)(R8*1)
  2071. MOVOU X12, -64(AX)(R8*1)
  2072. MOVOU X13, -48(AX)(R8*1)
  2073. MOVOU X14, -32(AX)(R8*1)
  2074. MOVOU X15, -16(AX)(R8*1)
  2075. JMP memmove_end_copy_match_emit_encodeBlockAsm12B
  2076. emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_256through2048:
  2077. LEAQ -256(R8), R8
  2078. MOVOU (SI), X0
  2079. MOVOU 16(SI), X1
  2080. MOVOU 32(SI), X2
  2081. MOVOU 48(SI), X3
  2082. MOVOU 64(SI), X4
  2083. MOVOU 80(SI), X5
  2084. MOVOU 96(SI), X6
  2085. MOVOU 112(SI), X7
  2086. MOVOU 128(SI), X8
  2087. MOVOU 144(SI), X9
  2088. MOVOU 160(SI), X10
  2089. MOVOU 176(SI), X11
  2090. MOVOU 192(SI), X12
  2091. MOVOU 208(SI), X13
  2092. MOVOU 224(SI), X14
  2093. MOVOU 240(SI), X15
  2094. MOVOU X0, (AX)
  2095. MOVOU X1, 16(AX)
  2096. MOVOU X2, 32(AX)
  2097. MOVOU X3, 48(AX)
  2098. MOVOU X4, 64(AX)
  2099. MOVOU X5, 80(AX)
  2100. MOVOU X6, 96(AX)
  2101. MOVOU X7, 112(AX)
  2102. MOVOU X8, 128(AX)
  2103. MOVOU X9, 144(AX)
  2104. MOVOU X10, 160(AX)
  2105. MOVOU X11, 176(AX)
  2106. MOVOU X12, 192(AX)
  2107. MOVOU X13, 208(AX)
  2108. MOVOU X14, 224(AX)
  2109. MOVOU X15, 240(AX)
  2110. CMPQ R8, $0x00000100
  2111. LEAQ 256(SI), SI
  2112. LEAQ 256(AX), AX
  2113. JGE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_256through2048
  2114. JMP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_tail
  2115. memmove_end_copy_match_emit_encodeBlockAsm12B:
  2116. MOVQ DI, AX
  2117. emit_literal_done_match_emit_encodeBlockAsm12B:
  2118. match_nolit_loop_encodeBlockAsm12B:
  2119. MOVL CX, SI
  2120. SUBL BP, SI
  2121. MOVL SI, 16(SP)
  2122. ADDL $0x04, CX
  2123. ADDL $0x04, BP
  2124. MOVQ src_len+32(FP), SI
  2125. SUBL CX, SI
  2126. LEAQ (DX)(CX*1), DI
  2127. LEAQ (DX)(BP*1), BP
  2128. XORL R9, R9
  2129. CMPL SI, $0x08
  2130. JL matchlen_single_match_nolit_encodeBlockAsm12B
  2131. matchlen_loopback_match_nolit_encodeBlockAsm12B:
  2132. MOVQ (DI)(R9*1), R8
  2133. XORQ (BP)(R9*1), R8
  2134. TESTQ R8, R8
  2135. JZ matchlen_loop_match_nolit_encodeBlockAsm12B
  2136. BSFQ R8, R8
  2137. SARQ $0x03, R8
  2138. LEAL (R9)(R8*1), R9
  2139. JMP match_nolit_end_encodeBlockAsm12B
  2140. matchlen_loop_match_nolit_encodeBlockAsm12B:
  2141. LEAL -8(SI), SI
  2142. LEAL 8(R9), R9
  2143. CMPL SI, $0x08
  2144. JGE matchlen_loopback_match_nolit_encodeBlockAsm12B
  2145. matchlen_single_match_nolit_encodeBlockAsm12B:
  2146. TESTL SI, SI
  2147. JZ match_nolit_end_encodeBlockAsm12B
  2148. matchlen_single_loopback_match_nolit_encodeBlockAsm12B:
  2149. MOVB (DI)(R9*1), R8
  2150. CMPB (BP)(R9*1), R8
  2151. JNE match_nolit_end_encodeBlockAsm12B
  2152. LEAL 1(R9), R9
  2153. DECL SI
  2154. JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm12B
  2155. match_nolit_end_encodeBlockAsm12B:
  2156. ADDL R9, CX
  2157. MOVL 16(SP), BP
  2158. ADDL $0x04, R9
  2159. CMPL BP, $0x00010000
  2160. JL two_byte_offset_match_nolit_encodeBlockAsm12B
  2161. four_bytes_loop_back_match_nolit_encodeBlockAsm12B:
  2162. CMPL R9, $0x40
  2163. JLE four_bytes_remain_match_nolit_encodeBlockAsm12B
  2164. MOVB $0xff, (AX)
  2165. MOVL BP, 1(AX)
  2166. LEAL -64(R9), R9
  2167. ADDQ $0x05, AX
  2168. CMPL R9, $0x04
  2169. JL four_bytes_remain_match_nolit_encodeBlockAsm12B
  2170. emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy:
  2171. MOVL R9, SI
  2172. LEAL -4(R9), R9
  2173. CMPL SI, $0x08
  2174. JLE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy
  2175. CMPL SI, $0x0c
  2176. JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy
  2177. CMPL BP, $0x00000800
  2178. JLT repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy
  2179. cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy:
  2180. CMPL R9, $0x00000104
  2181. JLT repeat_three_match_nolit_encodeBlockAsm12B_emit_copy
  2182. CMPL R9, $0x00010100
  2183. JLT repeat_four_match_nolit_encodeBlockAsm12B_emit_copy
  2184. CMPL R9, $0x0100ffff
  2185. JLT repeat_five_match_nolit_encodeBlockAsm12B_emit_copy
  2186. LEAL -16842747(R9), R9
  2187. MOVW $0x001d, (AX)
  2188. MOVW $0xfffb, 2(AX)
  2189. MOVB $0xff, 4(AX)
  2190. ADDQ $0x05, AX
  2191. JMP emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy
  2192. repeat_five_match_nolit_encodeBlockAsm12B_emit_copy:
  2193. LEAL -65536(R9), R9
  2194. MOVL R9, BP
  2195. MOVW $0x001d, (AX)
  2196. MOVW R9, 2(AX)
  2197. SARL $0x10, BP
  2198. MOVB BP, 4(AX)
  2199. ADDQ $0x05, AX
  2200. JMP match_nolit_emitcopy_end_encodeBlockAsm12B
  2201. repeat_four_match_nolit_encodeBlockAsm12B_emit_copy:
  2202. LEAL -256(R9), R9
  2203. MOVW $0x0019, (AX)
  2204. MOVW R9, 2(AX)
  2205. ADDQ $0x04, AX
  2206. JMP match_nolit_emitcopy_end_encodeBlockAsm12B
  2207. repeat_three_match_nolit_encodeBlockAsm12B_emit_copy:
  2208. LEAL -4(R9), R9
  2209. MOVW $0x0015, (AX)
  2210. MOVB R9, 2(AX)
  2211. ADDQ $0x03, AX
  2212. JMP match_nolit_emitcopy_end_encodeBlockAsm12B
  2213. repeat_two_match_nolit_encodeBlockAsm12B_emit_copy:
  2214. SHLL $0x02, R9
  2215. ORL $0x01, R9
  2216. MOVW R9, (AX)
  2217. ADDQ $0x02, AX
  2218. JMP match_nolit_emitcopy_end_encodeBlockAsm12B
  2219. repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy:
  2220. XORQ SI, SI
  2221. LEAL 1(SI)(R9*4), R9
  2222. MOVB BP, 1(AX)
  2223. SARL $0x08, BP
  2224. SHLL $0x05, BP
  2225. ORL BP, R9
  2226. MOVB R9, (AX)
  2227. ADDQ $0x02, AX
  2228. JMP match_nolit_emitcopy_end_encodeBlockAsm12B
  2229. JMP four_bytes_loop_back_match_nolit_encodeBlockAsm12B
  2230. four_bytes_remain_match_nolit_encodeBlockAsm12B:
  2231. TESTL R9, R9
  2232. JZ match_nolit_emitcopy_end_encodeBlockAsm12B
  2233. MOVB $0x03, BL
  2234. LEAL -4(BX)(R9*4), R9
  2235. MOVB R9, (AX)
  2236. MOVL BP, 1(AX)
  2237. ADDQ $0x05, AX
  2238. JMP match_nolit_emitcopy_end_encodeBlockAsm12B
  2239. two_byte_offset_match_nolit_encodeBlockAsm12B:
  2240. CMPL R9, $0x40
  2241. JLE two_byte_offset_short_match_nolit_encodeBlockAsm12B
  2242. MOVB $0xee, (AX)
  2243. MOVW BP, 1(AX)
  2244. LEAL -60(R9), R9
  2245. ADDQ $0x03, AX
  2246. emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy_short:
  2247. MOVL R9, SI
  2248. LEAL -4(R9), R9
  2249. CMPL SI, $0x08
  2250. JLE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short
  2251. CMPL SI, $0x0c
  2252. JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short
  2253. CMPL BP, $0x00000800
  2254. JLT repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short
  2255. cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short:
  2256. CMPL R9, $0x00000104
  2257. JLT repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short
  2258. CMPL R9, $0x00010100
  2259. JLT repeat_four_match_nolit_encodeBlockAsm12B_emit_copy_short
  2260. CMPL R9, $0x0100ffff
  2261. JLT repeat_five_match_nolit_encodeBlockAsm12B_emit_copy_short
  2262. LEAL -16842747(R9), R9
  2263. MOVW $0x001d, (AX)
  2264. MOVW $0xfffb, 2(AX)
  2265. MOVB $0xff, 4(AX)
  2266. ADDQ $0x05, AX
  2267. JMP emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy_short
  2268. repeat_five_match_nolit_encodeBlockAsm12B_emit_copy_short:
  2269. LEAL -65536(R9), R9
  2270. MOVL R9, BP
  2271. MOVW $0x001d, (AX)
  2272. MOVW R9, 2(AX)
  2273. SARL $0x10, BP
  2274. MOVB BP, 4(AX)
  2275. ADDQ $0x05, AX
  2276. JMP match_nolit_emitcopy_end_encodeBlockAsm12B
  2277. repeat_four_match_nolit_encodeBlockAsm12B_emit_copy_short:
  2278. LEAL -256(R9), R9
  2279. MOVW $0x0019, (AX)
  2280. MOVW R9, 2(AX)
  2281. ADDQ $0x04, AX
  2282. JMP match_nolit_emitcopy_end_encodeBlockAsm12B
  2283. repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short:
  2284. LEAL -4(R9), R9
  2285. MOVW $0x0015, (AX)
  2286. MOVB R9, 2(AX)
  2287. ADDQ $0x03, AX
  2288. JMP match_nolit_emitcopy_end_encodeBlockAsm12B
  2289. repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short:
  2290. SHLL $0x02, R9
  2291. ORL $0x01, R9
  2292. MOVW R9, (AX)
  2293. ADDQ $0x02, AX
  2294. JMP match_nolit_emitcopy_end_encodeBlockAsm12B
  2295. repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short:
  2296. XORQ SI, SI
  2297. LEAL 1(SI)(R9*4), R9
  2298. MOVB BP, 1(AX)
  2299. SARL $0x08, BP
  2300. SHLL $0x05, BP
  2301. ORL BP, R9
  2302. MOVB R9, (AX)
  2303. ADDQ $0x02, AX
  2304. JMP match_nolit_emitcopy_end_encodeBlockAsm12B
  2305. JMP two_byte_offset_match_nolit_encodeBlockAsm12B
  2306. two_byte_offset_short_match_nolit_encodeBlockAsm12B:
  2307. CMPL R9, $0x0c
  2308. JGE emit_copy_three_match_nolit_encodeBlockAsm12B
  2309. CMPL BP, $0x00000800
  2310. JGE emit_copy_three_match_nolit_encodeBlockAsm12B
  2311. MOVB $0x01, BL
  2312. LEAL -16(BX)(R9*4), R9
  2313. MOVB BP, 1(AX)
  2314. SHRL $0x08, BP
  2315. SHLL $0x05, BP
  2316. ORL BP, R9
  2317. MOVB R9, (AX)
  2318. ADDQ $0x02, AX
  2319. JMP match_nolit_emitcopy_end_encodeBlockAsm12B
  2320. emit_copy_three_match_nolit_encodeBlockAsm12B:
  2321. MOVB $0x02, BL
  2322. LEAL -4(BX)(R9*4), R9
  2323. MOVB R9, (AX)
  2324. MOVW BP, 1(AX)
  2325. ADDQ $0x03, AX
  2326. match_nolit_emitcopy_end_encodeBlockAsm12B:
  2327. MOVL CX, 12(SP)
  2328. CMPL CX, 8(SP)
  2329. JGE emit_remainder_encodeBlockAsm12B
  2330. CMPQ AX, (SP)
  2331. JL match_nolit_dst_ok_encodeBlockAsm12B
  2332. MOVQ $0x00000000, ret+48(FP)
  2333. RET
  2334. match_nolit_dst_ok_encodeBlockAsm12B:
  2335. MOVQ -2(DX)(CX*1), SI
  2336. MOVQ $0x000000cf1bbcdcbb, BP
  2337. MOVQ SI, DI
  2338. SHRQ $0x10, SI
  2339. MOVQ SI, R8
  2340. SHLQ $0x18, DI
  2341. IMULQ BP, DI
  2342. SHRQ $0x34, DI
  2343. SHLQ $0x18, R8
  2344. IMULQ BP, R8
  2345. SHRQ $0x34, R8
  2346. LEAL -2(CX), R9
  2347. MOVL 24(SP)(R8*4), BP
  2348. MOVL R9, 24(SP)(DI*4)
  2349. MOVL CX, 24(SP)(R8*4)
  2350. CMPL (DX)(BP*1), SI
  2351. JEQ match_nolit_loop_encodeBlockAsm12B
  2352. INCL CX
  2353. JMP search_loop_encodeBlockAsm12B
  2354. emit_remainder_encodeBlockAsm12B:
  2355. MOVQ src_len+32(FP), CX
  2356. SUBL 12(SP), CX
  2357. LEAQ 4(AX)(CX*1), CX
  2358. CMPQ CX, (SP)
  2359. JL emit_remainder_ok_encodeBlockAsm12B
  2360. MOVQ $0x00000000, ret+48(FP)
  2361. RET
  2362. emit_remainder_ok_encodeBlockAsm12B:
  2363. MOVQ src_len+32(FP), CX
  2364. MOVL 12(SP), BX
  2365. CMPL BX, CX
  2366. JEQ emit_literal_done_emit_remainder_encodeBlockAsm12B
  2367. MOVL CX, BP
  2368. MOVL CX, 12(SP)
  2369. LEAQ (DX)(BX*1), CX
  2370. SUBL BX, BP
  2371. MOVL BP, DX
  2372. SUBL $0x01, DX
  2373. JC emit_literal_done_emit_remainder_encodeBlockAsm12B
  2374. CMPL DX, $0x3c
  2375. JLT one_byte_emit_remainder_encodeBlockAsm12B
  2376. CMPL DX, $0x00000100
  2377. JLT two_bytes_emit_remainder_encodeBlockAsm12B
  2378. CMPL DX, $0x00010000
  2379. JLT three_bytes_emit_remainder_encodeBlockAsm12B
  2380. CMPL DX, $0x01000000
  2381. JLT four_bytes_emit_remainder_encodeBlockAsm12B
  2382. MOVB $0xfc, (AX)
  2383. MOVL DX, 1(AX)
  2384. ADDQ $0x05, AX
  2385. JMP memmove_emit_remainder_encodeBlockAsm12B
  2386. four_bytes_emit_remainder_encodeBlockAsm12B:
  2387. MOVL DX, BX
  2388. SHRL $0x10, BX
  2389. MOVB $0xf8, (AX)
  2390. MOVW DX, 1(AX)
  2391. MOVB BL, 3(AX)
  2392. ADDQ $0x04, AX
  2393. JMP memmove_emit_remainder_encodeBlockAsm12B
  2394. three_bytes_emit_remainder_encodeBlockAsm12B:
  2395. MOVB $0xf4, (AX)
  2396. MOVW DX, 1(AX)
  2397. ADDQ $0x03, AX
  2398. JMP memmove_emit_remainder_encodeBlockAsm12B
  2399. two_bytes_emit_remainder_encodeBlockAsm12B:
  2400. MOVB $0xf0, (AX)
  2401. MOVB DL, 1(AX)
  2402. ADDQ $0x02, AX
  2403. JMP memmove_emit_remainder_encodeBlockAsm12B
  2404. one_byte_emit_remainder_encodeBlockAsm12B:
  2405. SHLB $0x02, DL
  2406. MOVB DL, (AX)
  2407. ADDQ $0x01, AX
  2408. memmove_emit_remainder_encodeBlockAsm12B:
  2409. LEAQ (AX)(BP*1), DX
  2410. MOVL BP, BX
  2411. NOP
  2412. emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_tail:
  2413. TESTQ BX, BX
  2414. JEQ memmove_end_copy_emit_remainder_encodeBlockAsm12B
  2415. CMPQ BX, $0x02
  2416. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2
  2417. CMPQ BX, $0x04
  2418. JB emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3
  2419. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4
  2420. CMPQ BX, $0x08
  2421. JB emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_5through7
  2422. JE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8
  2423. CMPQ BX, $0x10
  2424. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_9through16
  2425. CMPQ BX, $0x20
  2426. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32
  2427. CMPQ BX, $0x40
  2428. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64
  2429. CMPQ BX, $0x80
  2430. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_65through128
  2431. CMPQ BX, $0x00000100
  2432. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_129through256
  2433. JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_256through2048
  2434. emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2:
  2435. MOVB (CX), BP
  2436. MOVB -1(CX)(BX*1), CL
  2437. MOVB BP, (AX)
  2438. MOVB CL, -1(AX)(BX*1)
  2439. JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B
  2440. emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4:
  2441. MOVL (CX), BP
  2442. MOVL BP, (AX)
  2443. JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B
  2444. emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3:
  2445. MOVW (CX), BP
  2446. MOVB 2(CX), CL
  2447. MOVW BP, (AX)
  2448. MOVB CL, 2(AX)
  2449. JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B
  2450. emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_5through7:
  2451. MOVL (CX), BP
  2452. MOVL -4(CX)(BX*1), CX
  2453. MOVL BP, (AX)
  2454. MOVL CX, -4(AX)(BX*1)
  2455. JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B
  2456. emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8:
  2457. MOVQ (CX), BP
  2458. MOVQ BP, (AX)
  2459. JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B
  2460. emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_9through16:
  2461. MOVQ (CX), BP
  2462. MOVQ -8(CX)(BX*1), CX
  2463. MOVQ BP, (AX)
  2464. MOVQ CX, -8(AX)(BX*1)
  2465. JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B
  2466. emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32:
  2467. MOVOU (CX), X0
  2468. MOVOU -16(CX)(BX*1), X1
  2469. MOVOU X0, (AX)
  2470. MOVOU X1, -16(AX)(BX*1)
  2471. JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B
  2472. emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64:
  2473. MOVOU (CX), X0
  2474. MOVOU 16(CX), X1
  2475. MOVOU -32(CX)(BX*1), X2
  2476. MOVOU -16(CX)(BX*1), X3
  2477. MOVOU X0, (AX)
  2478. MOVOU X1, 16(AX)
  2479. MOVOU X2, -32(AX)(BX*1)
  2480. MOVOU X3, -16(AX)(BX*1)
  2481. JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B
  2482. emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_65through128:
  2483. MOVOU (CX), X0
  2484. MOVOU 16(CX), X1
  2485. MOVOU 32(CX), X2
  2486. MOVOU 48(CX), X3
  2487. MOVOU -64(CX)(BX*1), X12
  2488. MOVOU -48(CX)(BX*1), X13
  2489. MOVOU -32(CX)(BX*1), X14
  2490. MOVOU -16(CX)(BX*1), X15
  2491. MOVOU X0, (AX)
  2492. MOVOU X1, 16(AX)
  2493. MOVOU X2, 32(AX)
  2494. MOVOU X3, 48(AX)
  2495. MOVOU X12, -64(AX)(BX*1)
  2496. MOVOU X13, -48(AX)(BX*1)
  2497. MOVOU X14, -32(AX)(BX*1)
  2498. MOVOU X15, -16(AX)(BX*1)
  2499. JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B
  2500. emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_129through256:
  2501. MOVOU (CX), X0
  2502. MOVOU 16(CX), X1
  2503. MOVOU 32(CX), X2
  2504. MOVOU 48(CX), X3
  2505. MOVOU 64(CX), X4
  2506. MOVOU 80(CX), X5
  2507. MOVOU 96(CX), X6
  2508. MOVOU 112(CX), X7
  2509. MOVOU -128(CX)(BX*1), X8
  2510. MOVOU -112(CX)(BX*1), X9
  2511. MOVOU -96(CX)(BX*1), X10
  2512. MOVOU -80(CX)(BX*1), X11
  2513. MOVOU -64(CX)(BX*1), X12
  2514. MOVOU -48(CX)(BX*1), X13
  2515. MOVOU -32(CX)(BX*1), X14
  2516. MOVOU -16(CX)(BX*1), X15
  2517. MOVOU X0, (AX)
  2518. MOVOU X1, 16(AX)
  2519. MOVOU X2, 32(AX)
  2520. MOVOU X3, 48(AX)
  2521. MOVOU X4, 64(AX)
  2522. MOVOU X5, 80(AX)
  2523. MOVOU X6, 96(AX)
  2524. MOVOU X7, 112(AX)
  2525. MOVOU X8, -128(AX)(BX*1)
  2526. MOVOU X9, -112(AX)(BX*1)
  2527. MOVOU X10, -96(AX)(BX*1)
  2528. MOVOU X11, -80(AX)(BX*1)
  2529. MOVOU X12, -64(AX)(BX*1)
  2530. MOVOU X13, -48(AX)(BX*1)
  2531. MOVOU X14, -32(AX)(BX*1)
  2532. MOVOU X15, -16(AX)(BX*1)
  2533. JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B
  2534. emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_256through2048:
  2535. LEAQ -256(BX), BX
  2536. MOVOU (CX), X0
  2537. MOVOU 16(CX), X1
  2538. MOVOU 32(CX), X2
  2539. MOVOU 48(CX), X3
  2540. MOVOU 64(CX), X4
  2541. MOVOU 80(CX), X5
  2542. MOVOU 96(CX), X6
  2543. MOVOU 112(CX), X7
  2544. MOVOU 128(CX), X8
  2545. MOVOU 144(CX), X9
  2546. MOVOU 160(CX), X10
  2547. MOVOU 176(CX), X11
  2548. MOVOU 192(CX), X12
  2549. MOVOU 208(CX), X13
  2550. MOVOU 224(CX), X14
  2551. MOVOU 240(CX), X15
  2552. MOVOU X0, (AX)
  2553. MOVOU X1, 16(AX)
  2554. MOVOU X2, 32(AX)
  2555. MOVOU X3, 48(AX)
  2556. MOVOU X4, 64(AX)
  2557. MOVOU X5, 80(AX)
  2558. MOVOU X6, 96(AX)
  2559. MOVOU X7, 112(AX)
  2560. MOVOU X8, 128(AX)
  2561. MOVOU X9, 144(AX)
  2562. MOVOU X10, 160(AX)
  2563. MOVOU X11, 176(AX)
  2564. MOVOU X12, 192(AX)
  2565. MOVOU X13, 208(AX)
  2566. MOVOU X14, 224(AX)
  2567. MOVOU X15, 240(AX)
  2568. CMPQ BX, $0x00000100
  2569. LEAQ 256(CX), CX
  2570. LEAQ 256(AX), AX
  2571. JGE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_256through2048
  2572. JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_tail
  2573. memmove_end_copy_emit_remainder_encodeBlockAsm12B:
  2574. MOVQ DX, AX
  2575. emit_literal_done_emit_remainder_encodeBlockAsm12B:
  2576. MOVQ dst_base+0(FP), CX
  2577. SUBQ CX, AX
  2578. MOVQ AX, ret+48(FP)
  2579. RET
  2580. // func encodeBlockAsm10B(dst []byte, src []byte) int
  2581. // Requires: SSE2
  2582. TEXT ·encodeBlockAsm10B(SB), $4120-56
  2583. MOVQ dst_base+0(FP), AX
  2584. MOVQ $0x00000020, CX
  2585. LEAQ 24(SP), DX
  2586. PXOR X0, X0
  2587. zero_loop_encodeBlockAsm10B:
  2588. MOVOU X0, (DX)
  2589. MOVOU X0, 16(DX)
  2590. MOVOU X0, 32(DX)
  2591. MOVOU X0, 48(DX)
  2592. MOVOU X0, 64(DX)
  2593. MOVOU X0, 80(DX)
  2594. MOVOU X0, 96(DX)
  2595. MOVOU X0, 112(DX)
  2596. ADDQ $0x80, DX
  2597. DECQ CX
  2598. JNZ zero_loop_encodeBlockAsm10B
  2599. MOVL $0x00000000, 12(SP)
  2600. MOVQ src_len+32(FP), CX
  2601. LEAQ -5(CX), DX
  2602. LEAQ -8(CX), BP
  2603. MOVL BP, 8(SP)
  2604. SHRQ $0x05, CX
  2605. SUBL CX, DX
  2606. LEAQ (AX)(DX*1), DX
  2607. MOVQ DX, (SP)
  2608. MOVL $0x00000001, CX
  2609. MOVL CX, 16(SP)
  2610. MOVQ src_base+24(FP), DX
  2611. search_loop_encodeBlockAsm10B:
  2612. MOVQ (DX)(CX*1), SI
  2613. MOVL CX, BP
  2614. SUBL 12(SP), BP
  2615. SHRL $0x05, BP
  2616. LEAL 4(CX)(BP*1), BP
  2617. MOVL 8(SP), DI
  2618. CMPL BP, DI
  2619. JGT emit_remainder_encodeBlockAsm10B
  2620. MOVL BP, 20(SP)
  2621. MOVQ $0x000000cf1bbcdcbb, R8
  2622. MOVQ SI, R9
  2623. MOVQ SI, R10
  2624. SHRQ $0x08, R10
  2625. SHLQ $0x18, R9
  2626. IMULQ R8, R9
  2627. SHRQ $0x36, R9
  2628. SHLQ $0x18, R10
  2629. IMULQ R8, R10
  2630. SHRQ $0x36, R10
  2631. MOVL 24(SP)(R9*4), BP
  2632. MOVL 24(SP)(R10*4), DI
  2633. MOVL CX, 24(SP)(R9*4)
  2634. LEAL 1(CX), R9
  2635. MOVL R9, 24(SP)(R10*4)
  2636. MOVQ SI, R9
  2637. SHRQ $0x10, R9
  2638. SHLQ $0x18, R9
  2639. IMULQ R8, R9
  2640. SHRQ $0x36, R9
  2641. MOVL CX, R8
  2642. SUBL 16(SP), R8
  2643. MOVL 1(DX)(R8*1), R10
  2644. MOVQ SI, R8
  2645. SHRQ $0x08, R8
  2646. CMPL R8, R10
  2647. JNE no_repeat_found_encodeBlockAsm10B
  2648. LEAL 1(CX), SI
  2649. MOVL 12(SP), DI
  2650. MOVL SI, BP
  2651. SUBL 16(SP), BP
  2652. JZ repeat_extend_back_end_encodeBlockAsm10B
  2653. repeat_extend_back_loop_encodeBlockAsm10B:
  2654. CMPL SI, DI
  2655. JLE repeat_extend_back_end_encodeBlockAsm10B
  2656. MOVB -1(DX)(BP*1), BL
  2657. MOVB -1(DX)(SI*1), R8
  2658. CMPB BL, R8
  2659. JNE repeat_extend_back_end_encodeBlockAsm10B
  2660. LEAL -1(SI), SI
  2661. DECL BP
  2662. JNZ repeat_extend_back_loop_encodeBlockAsm10B
  2663. repeat_extend_back_end_encodeBlockAsm10B:
  2664. MOVL 12(SP), BP
  2665. CMPL BP, SI
  2666. JEQ emit_literal_done_repeat_emit_encodeBlockAsm10B
  2667. MOVL SI, R8
  2668. MOVL SI, 12(SP)
  2669. LEAQ (DX)(BP*1), R9
  2670. SUBL BP, R8
  2671. MOVL R8, BP
  2672. SUBL $0x01, BP
  2673. JC emit_literal_done_repeat_emit_encodeBlockAsm10B
  2674. CMPL BP, $0x3c
  2675. JLT one_byte_repeat_emit_encodeBlockAsm10B
  2676. CMPL BP, $0x00000100
  2677. JLT two_bytes_repeat_emit_encodeBlockAsm10B
  2678. CMPL BP, $0x00010000
  2679. JLT three_bytes_repeat_emit_encodeBlockAsm10B
  2680. CMPL BP, $0x01000000
  2681. JLT four_bytes_repeat_emit_encodeBlockAsm10B
  2682. MOVB $0xfc, (AX)
  2683. MOVL BP, 1(AX)
  2684. ADDQ $0x05, AX
  2685. JMP memmove_repeat_emit_encodeBlockAsm10B
  2686. four_bytes_repeat_emit_encodeBlockAsm10B:
  2687. MOVL BP, R10
  2688. SHRL $0x10, R10
  2689. MOVB $0xf8, (AX)
  2690. MOVW BP, 1(AX)
  2691. MOVB R10, 3(AX)
  2692. ADDQ $0x04, AX
  2693. JMP memmove_repeat_emit_encodeBlockAsm10B
  2694. three_bytes_repeat_emit_encodeBlockAsm10B:
  2695. MOVB $0xf4, (AX)
  2696. MOVW BP, 1(AX)
  2697. ADDQ $0x03, AX
  2698. JMP memmove_repeat_emit_encodeBlockAsm10B
  2699. two_bytes_repeat_emit_encodeBlockAsm10B:
  2700. MOVB $0xf0, (AX)
  2701. MOVB BP, 1(AX)
  2702. ADDQ $0x02, AX
  2703. JMP memmove_repeat_emit_encodeBlockAsm10B
  2704. one_byte_repeat_emit_encodeBlockAsm10B:
  2705. SHLB $0x02, BP
  2706. MOVB BP, (AX)
  2707. ADDQ $0x01, AX
  2708. memmove_repeat_emit_encodeBlockAsm10B:
  2709. LEAQ (AX)(R8*1), BP
  2710. NOP
  2711. emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_tail:
  2712. TESTQ R8, R8
  2713. JEQ memmove_end_copy_repeat_emit_encodeBlockAsm10B
  2714. CMPQ R8, $0x02
  2715. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_1or2
  2716. CMPQ R8, $0x04
  2717. JB emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_3
  2718. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_4
  2719. CMPQ R8, $0x08
  2720. JB emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_5through7
  2721. JE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8
  2722. CMPQ R8, $0x10
  2723. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_9through16
  2724. CMPQ R8, $0x20
  2725. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32
  2726. CMPQ R8, $0x40
  2727. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64
  2728. CMPQ R8, $0x80
  2729. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_65through128
  2730. CMPQ R8, $0x00000100
  2731. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_129through256
  2732. JMP emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_256through2048
  2733. emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_1or2:
  2734. MOVB (R9), R10
  2735. MOVB -1(R9)(R8*1), R9
  2736. MOVB R10, (AX)
  2737. MOVB R9, -1(AX)(R8*1)
  2738. JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B
  2739. emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_4:
  2740. MOVL (R9), R10
  2741. MOVL R10, (AX)
  2742. JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B
  2743. emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_3:
  2744. MOVW (R9), R10
  2745. MOVB 2(R9), R9
  2746. MOVW R10, (AX)
  2747. MOVB R9, 2(AX)
  2748. JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B
  2749. emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_5through7:
  2750. MOVL (R9), R10
  2751. MOVL -4(R9)(R8*1), R9
  2752. MOVL R10, (AX)
  2753. MOVL R9, -4(AX)(R8*1)
  2754. JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B
  2755. emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8:
  2756. MOVQ (R9), R10
  2757. MOVQ R10, (AX)
  2758. JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B
  2759. emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_9through16:
  2760. MOVQ (R9), R10
  2761. MOVQ -8(R9)(R8*1), R9
  2762. MOVQ R10, (AX)
  2763. MOVQ R9, -8(AX)(R8*1)
  2764. JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B
  2765. emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32:
  2766. MOVOU (R9), X0
  2767. MOVOU -16(R9)(R8*1), X1
  2768. MOVOU X0, (AX)
  2769. MOVOU X1, -16(AX)(R8*1)
  2770. JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B
  2771. emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64:
  2772. MOVOU (R9), X0
  2773. MOVOU 16(R9), X1
  2774. MOVOU -32(R9)(R8*1), X2
  2775. MOVOU -16(R9)(R8*1), X3
  2776. MOVOU X0, (AX)
  2777. MOVOU X1, 16(AX)
  2778. MOVOU X2, -32(AX)(R8*1)
  2779. MOVOU X3, -16(AX)(R8*1)
  2780. JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B
  2781. emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_65through128:
  2782. MOVOU (R9), X0
  2783. MOVOU 16(R9), X1
  2784. MOVOU 32(R9), X2
  2785. MOVOU 48(R9), X3
  2786. MOVOU -64(R9)(R8*1), X12
  2787. MOVOU -48(R9)(R8*1), X13
  2788. MOVOU -32(R9)(R8*1), X14
  2789. MOVOU -16(R9)(R8*1), X15
  2790. MOVOU X0, (AX)
  2791. MOVOU X1, 16(AX)
  2792. MOVOU X2, 32(AX)
  2793. MOVOU X3, 48(AX)
  2794. MOVOU X12, -64(AX)(R8*1)
  2795. MOVOU X13, -48(AX)(R8*1)
  2796. MOVOU X14, -32(AX)(R8*1)
  2797. MOVOU X15, -16(AX)(R8*1)
  2798. JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B
  2799. emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_129through256:
  2800. MOVOU (R9), X0
  2801. MOVOU 16(R9), X1
  2802. MOVOU 32(R9), X2
  2803. MOVOU 48(R9), X3
  2804. MOVOU 64(R9), X4
  2805. MOVOU 80(R9), X5
  2806. MOVOU 96(R9), X6
  2807. MOVOU 112(R9), X7
  2808. MOVOU -128(R9)(R8*1), X8
  2809. MOVOU -112(R9)(R8*1), X9
  2810. MOVOU -96(R9)(R8*1), X10
  2811. MOVOU -80(R9)(R8*1), X11
  2812. MOVOU -64(R9)(R8*1), X12
  2813. MOVOU -48(R9)(R8*1), X13
  2814. MOVOU -32(R9)(R8*1), X14
  2815. MOVOU -16(R9)(R8*1), X15
  2816. MOVOU X0, (AX)
  2817. MOVOU X1, 16(AX)
  2818. MOVOU X2, 32(AX)
  2819. MOVOU X3, 48(AX)
  2820. MOVOU X4, 64(AX)
  2821. MOVOU X5, 80(AX)
  2822. MOVOU X6, 96(AX)
  2823. MOVOU X7, 112(AX)
  2824. MOVOU X8, -128(AX)(R8*1)
  2825. MOVOU X9, -112(AX)(R8*1)
  2826. MOVOU X10, -96(AX)(R8*1)
  2827. MOVOU X11, -80(AX)(R8*1)
  2828. MOVOU X12, -64(AX)(R8*1)
  2829. MOVOU X13, -48(AX)(R8*1)
  2830. MOVOU X14, -32(AX)(R8*1)
  2831. MOVOU X15, -16(AX)(R8*1)
  2832. JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B
  2833. emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_256through2048:
  2834. LEAQ -256(R8), R8
  2835. MOVOU (R9), X0
  2836. MOVOU 16(R9), X1
  2837. MOVOU 32(R9), X2
  2838. MOVOU 48(R9), X3
  2839. MOVOU 64(R9), X4
  2840. MOVOU 80(R9), X5
  2841. MOVOU 96(R9), X6
  2842. MOVOU 112(R9), X7
  2843. MOVOU 128(R9), X8
  2844. MOVOU 144(R9), X9
  2845. MOVOU 160(R9), X10
  2846. MOVOU 176(R9), X11
  2847. MOVOU 192(R9), X12
  2848. MOVOU 208(R9), X13
  2849. MOVOU 224(R9), X14
  2850. MOVOU 240(R9), X15
  2851. MOVOU X0, (AX)
  2852. MOVOU X1, 16(AX)
  2853. MOVOU X2, 32(AX)
  2854. MOVOU X3, 48(AX)
  2855. MOVOU X4, 64(AX)
  2856. MOVOU X5, 80(AX)
  2857. MOVOU X6, 96(AX)
  2858. MOVOU X7, 112(AX)
  2859. MOVOU X8, 128(AX)
  2860. MOVOU X9, 144(AX)
  2861. MOVOU X10, 160(AX)
  2862. MOVOU X11, 176(AX)
  2863. MOVOU X12, 192(AX)
  2864. MOVOU X13, 208(AX)
  2865. MOVOU X14, 224(AX)
  2866. MOVOU X15, 240(AX)
  2867. CMPQ R8, $0x00000100
  2868. LEAQ 256(R9), R9
  2869. LEAQ 256(AX), AX
  2870. JGE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_256through2048
  2871. JMP emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_tail
  2872. memmove_end_copy_repeat_emit_encodeBlockAsm10B:
  2873. MOVQ BP, AX
  2874. emit_literal_done_repeat_emit_encodeBlockAsm10B:
  2875. ADDL $0x05, CX
  2876. MOVL CX, BP
  2877. SUBL 16(SP), BP
  2878. MOVQ src_len+32(FP), R8
  2879. SUBL CX, R8
  2880. LEAQ (DX)(CX*1), R9
  2881. LEAQ (DX)(BP*1), BP
  2882. XORL R11, R11
  2883. CMPL R8, $0x08
  2884. JL matchlen_single_repeat_extend
  2885. matchlen_loopback_repeat_extend:
  2886. MOVQ (R9)(R11*1), R10
  2887. XORQ (BP)(R11*1), R10
  2888. TESTQ R10, R10
  2889. JZ matchlen_loop_repeat_extend
  2890. BSFQ R10, R10
  2891. SARQ $0x03, R10
  2892. LEAL (R11)(R10*1), R11
  2893. JMP repeat_extend_forward_end_encodeBlockAsm10B
  2894. matchlen_loop_repeat_extend:
  2895. LEAL -8(R8), R8
  2896. LEAL 8(R11), R11
  2897. CMPL R8, $0x08
  2898. JGE matchlen_loopback_repeat_extend
  2899. matchlen_single_repeat_extend:
  2900. TESTL R8, R8
  2901. JZ repeat_extend_forward_end_encodeBlockAsm10B
  2902. matchlen_single_loopback_repeat_extend:
  2903. MOVB (R9)(R11*1), R10
  2904. CMPB (BP)(R11*1), R10
  2905. JNE repeat_extend_forward_end_encodeBlockAsm10B
  2906. LEAL 1(R11), R11
  2907. DECL R8
  2908. JNZ matchlen_single_loopback_repeat_extend
  2909. repeat_extend_forward_end_encodeBlockAsm10B:
  2910. ADDL R11, CX
  2911. MOVL CX, BP
  2912. SUBL SI, BP
  2913. MOVL 16(SP), SI
  2914. TESTL DI, DI
  2915. JZ repeat_as_copy_encodeBlockAsm10B
  2916. emit_repeat_again_match_repeat_encodeBlockAsm10B:
  2917. MOVL BP, DI
  2918. LEAL -4(BP), BP
  2919. CMPL DI, $0x08
  2920. JLE repeat_two_match_repeat_encodeBlockAsm10B
  2921. CMPL DI, $0x0c
  2922. JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm10B
  2923. CMPL SI, $0x00000800
  2924. JLT repeat_two_offset_match_repeat_encodeBlockAsm10B
  2925. cant_repeat_two_offset_match_repeat_encodeBlockAsm10B:
  2926. CMPL BP, $0x00000104
  2927. JLT repeat_three_match_repeat_encodeBlockAsm10B
  2928. CMPL BP, $0x00010100
  2929. JLT repeat_four_match_repeat_encodeBlockAsm10B
  2930. CMPL BP, $0x0100ffff
  2931. JLT repeat_five_match_repeat_encodeBlockAsm10B
  2932. LEAL -16842747(BP), BP
  2933. MOVW $0x001d, (AX)
  2934. MOVW $0xfffb, 2(AX)
  2935. MOVB $0xff, 4(AX)
  2936. ADDQ $0x05, AX
  2937. JMP emit_repeat_again_match_repeat_encodeBlockAsm10B
  2938. repeat_five_match_repeat_encodeBlockAsm10B:
  2939. LEAL -65536(BP), BP
  2940. MOVL BP, SI
  2941. MOVW $0x001d, (AX)
  2942. MOVW BP, 2(AX)
  2943. SARL $0x10, SI
  2944. MOVB SI, 4(AX)
  2945. ADDQ $0x05, AX
  2946. JMP repeat_end_emit_encodeBlockAsm10B
  2947. repeat_four_match_repeat_encodeBlockAsm10B:
  2948. LEAL -256(BP), BP
  2949. MOVW $0x0019, (AX)
  2950. MOVW BP, 2(AX)
  2951. ADDQ $0x04, AX
  2952. JMP repeat_end_emit_encodeBlockAsm10B
  2953. repeat_three_match_repeat_encodeBlockAsm10B:
  2954. LEAL -4(BP), BP
  2955. MOVW $0x0015, (AX)
  2956. MOVB BP, 2(AX)
  2957. ADDQ $0x03, AX
  2958. JMP repeat_end_emit_encodeBlockAsm10B
  2959. repeat_two_match_repeat_encodeBlockAsm10B:
  2960. SHLL $0x02, BP
  2961. ORL $0x01, BP
  2962. MOVW BP, (AX)
  2963. ADDQ $0x02, AX
  2964. JMP repeat_end_emit_encodeBlockAsm10B
  2965. repeat_two_offset_match_repeat_encodeBlockAsm10B:
  2966. XORQ DI, DI
  2967. LEAL 1(DI)(BP*4), BP
  2968. MOVB SI, 1(AX)
  2969. SARL $0x08, SI
  2970. SHLL $0x05, SI
  2971. ORL SI, BP
  2972. MOVB BP, (AX)
  2973. ADDQ $0x02, AX
  2974. JMP repeat_end_emit_encodeBlockAsm10B
  2975. repeat_as_copy_encodeBlockAsm10B:
  2976. CMPL SI, $0x00010000
  2977. JL two_byte_offset_repeat_as_copy_encodeBlockAsm10B
  2978. four_bytes_loop_back_repeat_as_copy_encodeBlockAsm10B:
  2979. CMPL BP, $0x40
  2980. JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm10B
  2981. MOVB $0xff, (AX)
  2982. MOVL SI, 1(AX)
  2983. LEAL -64(BP), BP
  2984. ADDQ $0x05, AX
  2985. CMPL BP, $0x04
  2986. JL four_bytes_remain_repeat_as_copy_encodeBlockAsm10B
  2987. emit_repeat_again_repeat_as_copy_encodeBlockAsm10B_emit_copy:
  2988. MOVL BP, DI
  2989. LEAL -4(BP), BP
  2990. CMPL DI, $0x08
  2991. JLE repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy
  2992. CMPL DI, $0x0c
  2993. JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy
  2994. CMPL SI, $0x00000800
  2995. JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy
  2996. cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy:
  2997. CMPL BP, $0x00000104
  2998. JLT repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy
  2999. CMPL BP, $0x00010100
  3000. JLT repeat_four_repeat_as_copy_encodeBlockAsm10B_emit_copy
  3001. CMPL BP, $0x0100ffff
  3002. JLT repeat_five_repeat_as_copy_encodeBlockAsm10B_emit_copy
  3003. LEAL -16842747(BP), BP
  3004. MOVW $0x001d, (AX)
  3005. MOVW $0xfffb, 2(AX)
  3006. MOVB $0xff, 4(AX)
  3007. ADDQ $0x05, AX
  3008. JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm10B_emit_copy
  3009. repeat_five_repeat_as_copy_encodeBlockAsm10B_emit_copy:
  3010. LEAL -65536(BP), BP
  3011. MOVL BP, SI
  3012. MOVW $0x001d, (AX)
  3013. MOVW BP, 2(AX)
  3014. SARL $0x10, SI
  3015. MOVB SI, 4(AX)
  3016. ADDQ $0x05, AX
  3017. JMP repeat_end_emit_encodeBlockAsm10B
  3018. repeat_four_repeat_as_copy_encodeBlockAsm10B_emit_copy:
  3019. LEAL -256(BP), BP
  3020. MOVW $0x0019, (AX)
  3021. MOVW BP, 2(AX)
  3022. ADDQ $0x04, AX
  3023. JMP repeat_end_emit_encodeBlockAsm10B
  3024. repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy:
  3025. LEAL -4(BP), BP
  3026. MOVW $0x0015, (AX)
  3027. MOVB BP, 2(AX)
  3028. ADDQ $0x03, AX
  3029. JMP repeat_end_emit_encodeBlockAsm10B
  3030. repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy:
  3031. SHLL $0x02, BP
  3032. ORL $0x01, BP
  3033. MOVW BP, (AX)
  3034. ADDQ $0x02, AX
  3035. JMP repeat_end_emit_encodeBlockAsm10B
  3036. repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy:
  3037. XORQ DI, DI
  3038. LEAL 1(DI)(BP*4), BP
  3039. MOVB SI, 1(AX)
  3040. SARL $0x08, SI
  3041. SHLL $0x05, SI
  3042. ORL SI, BP
  3043. MOVB BP, (AX)
  3044. ADDQ $0x02, AX
  3045. JMP repeat_end_emit_encodeBlockAsm10B
  3046. JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm10B
  3047. four_bytes_remain_repeat_as_copy_encodeBlockAsm10B:
  3048. TESTL BP, BP
  3049. JZ repeat_end_emit_encodeBlockAsm10B
  3050. MOVB $0x03, BL
  3051. LEAL -4(BX)(BP*4), BP
  3052. MOVB BP, (AX)
  3053. MOVL SI, 1(AX)
  3054. ADDQ $0x05, AX
  3055. JMP repeat_end_emit_encodeBlockAsm10B
  3056. two_byte_offset_repeat_as_copy_encodeBlockAsm10B:
  3057. CMPL BP, $0x40
  3058. JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B
  3059. MOVB $0xee, (AX)
  3060. MOVW SI, 1(AX)
  3061. LEAL -60(BP), BP
  3062. ADDQ $0x03, AX
  3063. emit_repeat_again_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
  3064. MOVL BP, DI
  3065. LEAL -4(BP), BP
  3066. CMPL DI, $0x08
  3067. JLE repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
  3068. CMPL DI, $0x0c
  3069. JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
  3070. CMPL SI, $0x00000800
  3071. JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
  3072. cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
  3073. CMPL BP, $0x00000104
  3074. JLT repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
  3075. CMPL BP, $0x00010100
  3076. JLT repeat_four_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
  3077. CMPL BP, $0x0100ffff
  3078. JLT repeat_five_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
  3079. LEAL -16842747(BP), BP
  3080. MOVW $0x001d, (AX)
  3081. MOVW $0xfffb, 2(AX)
  3082. MOVB $0xff, 4(AX)
  3083. ADDQ $0x05, AX
  3084. JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
  3085. repeat_five_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
  3086. LEAL -65536(BP), BP
  3087. MOVL BP, SI
  3088. MOVW $0x001d, (AX)
  3089. MOVW BP, 2(AX)
  3090. SARL $0x10, SI
  3091. MOVB SI, 4(AX)
  3092. ADDQ $0x05, AX
  3093. JMP repeat_end_emit_encodeBlockAsm10B
  3094. repeat_four_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
  3095. LEAL -256(BP), BP
  3096. MOVW $0x0019, (AX)
  3097. MOVW BP, 2(AX)
  3098. ADDQ $0x04, AX
  3099. JMP repeat_end_emit_encodeBlockAsm10B
  3100. repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
  3101. LEAL -4(BP), BP
  3102. MOVW $0x0015, (AX)
  3103. MOVB BP, 2(AX)
  3104. ADDQ $0x03, AX
  3105. JMP repeat_end_emit_encodeBlockAsm10B
  3106. repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
  3107. SHLL $0x02, BP
  3108. ORL $0x01, BP
  3109. MOVW BP, (AX)
  3110. ADDQ $0x02, AX
  3111. JMP repeat_end_emit_encodeBlockAsm10B
  3112. repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
  3113. XORQ DI, DI
  3114. LEAL 1(DI)(BP*4), BP
  3115. MOVB SI, 1(AX)
  3116. SARL $0x08, SI
  3117. SHLL $0x05, SI
  3118. ORL SI, BP
  3119. MOVB BP, (AX)
  3120. ADDQ $0x02, AX
  3121. JMP repeat_end_emit_encodeBlockAsm10B
  3122. JMP two_byte_offset_repeat_as_copy_encodeBlockAsm10B
  3123. two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B:
  3124. CMPL BP, $0x0c
  3125. JGE emit_copy_three_repeat_as_copy_encodeBlockAsm10B
  3126. CMPL SI, $0x00000800
  3127. JGE emit_copy_three_repeat_as_copy_encodeBlockAsm10B
  3128. MOVB $0x01, BL
  3129. LEAL -16(BX)(BP*4), BP
  3130. MOVB SI, 1(AX)
  3131. SHRL $0x08, SI
  3132. SHLL $0x05, SI
  3133. ORL SI, BP
  3134. MOVB BP, (AX)
  3135. ADDQ $0x02, AX
  3136. JMP repeat_end_emit_encodeBlockAsm10B
  3137. emit_copy_three_repeat_as_copy_encodeBlockAsm10B:
  3138. MOVB $0x02, BL
  3139. LEAL -4(BX)(BP*4), BP
  3140. MOVB BP, (AX)
  3141. MOVW SI, 1(AX)
  3142. ADDQ $0x03, AX
  3143. repeat_end_emit_encodeBlockAsm10B:
  3144. MOVL CX, 12(SP)
  3145. CMPL CX, 8(SP)
  3146. JGE emit_remainder_encodeBlockAsm10B
  3147. JMP search_loop_encodeBlockAsm10B
  3148. no_repeat_found_encodeBlockAsm10B:
  3149. CMPL (DX)(BP*1), SI
  3150. JEQ candidate_match_encodeBlockAsm10B
  3151. SHRQ $0x08, SI
  3152. MOVL 24(SP)(R9*4), BP
  3153. LEAL 2(CX), R8
  3154. CMPL (DX)(DI*1), SI
  3155. JEQ candidate2_match_encodeBlockAsm10B
  3156. MOVL R8, 24(SP)(R9*4)
  3157. SHRQ $0x08, SI
  3158. CMPL (DX)(BP*1), SI
  3159. JEQ candidate3_match_encodeBlockAsm10B
  3160. MOVL 20(SP), CX
  3161. JMP search_loop_encodeBlockAsm10B
  3162. candidate3_match_encodeBlockAsm10B:
  3163. ADDL $0x02, CX
  3164. JMP candidate_match_encodeBlockAsm10B
  3165. candidate2_match_encodeBlockAsm10B:
  3166. MOVL R8, 24(SP)(R9*4)
  3167. INCL CX
  3168. MOVL DI, BP
  3169. candidate_match_encodeBlockAsm10B:
  3170. MOVL 12(SP), SI
  3171. TESTL BP, BP
  3172. JZ match_extend_back_end_encodeBlockAsm10B
  3173. match_extend_back_loop_encodeBlockAsm10B:
  3174. CMPL CX, SI
  3175. JLE match_extend_back_end_encodeBlockAsm10B
  3176. MOVB -1(DX)(BP*1), BL
  3177. MOVB -1(DX)(CX*1), DI
  3178. CMPB BL, DI
  3179. JNE match_extend_back_end_encodeBlockAsm10B
  3180. LEAL -1(CX), CX
  3181. DECL BP
  3182. JZ match_extend_back_end_encodeBlockAsm10B
  3183. JMP match_extend_back_loop_encodeBlockAsm10B
  3184. match_extend_back_end_encodeBlockAsm10B:
  3185. MOVL CX, SI
  3186. SUBL 12(SP), SI
  3187. LEAQ 4(AX)(SI*1), SI
  3188. CMPQ SI, (SP)
  3189. JL match_dst_size_check_encodeBlockAsm10B
  3190. MOVQ $0x00000000, ret+48(FP)
  3191. RET
  3192. match_dst_size_check_encodeBlockAsm10B:
  3193. MOVL CX, SI
  3194. MOVL 12(SP), DI
  3195. CMPL DI, SI
  3196. JEQ emit_literal_done_match_emit_encodeBlockAsm10B
  3197. MOVL SI, R8
  3198. MOVL SI, 12(SP)
  3199. LEAQ (DX)(DI*1), SI
  3200. SUBL DI, R8
  3201. MOVL R8, DI
  3202. SUBL $0x01, DI
  3203. JC emit_literal_done_match_emit_encodeBlockAsm10B
  3204. CMPL DI, $0x3c
  3205. JLT one_byte_match_emit_encodeBlockAsm10B
  3206. CMPL DI, $0x00000100
  3207. JLT two_bytes_match_emit_encodeBlockAsm10B
  3208. CMPL DI, $0x00010000
  3209. JLT three_bytes_match_emit_encodeBlockAsm10B
  3210. CMPL DI, $0x01000000
  3211. JLT four_bytes_match_emit_encodeBlockAsm10B
  3212. MOVB $0xfc, (AX)
  3213. MOVL DI, 1(AX)
  3214. ADDQ $0x05, AX
  3215. JMP memmove_match_emit_encodeBlockAsm10B
  3216. four_bytes_match_emit_encodeBlockAsm10B:
  3217. MOVL DI, R9
  3218. SHRL $0x10, R9
  3219. MOVB $0xf8, (AX)
  3220. MOVW DI, 1(AX)
  3221. MOVB R9, 3(AX)
  3222. ADDQ $0x04, AX
  3223. JMP memmove_match_emit_encodeBlockAsm10B
  3224. three_bytes_match_emit_encodeBlockAsm10B:
  3225. MOVB $0xf4, (AX)
  3226. MOVW DI, 1(AX)
  3227. ADDQ $0x03, AX
  3228. JMP memmove_match_emit_encodeBlockAsm10B
  3229. two_bytes_match_emit_encodeBlockAsm10B:
  3230. MOVB $0xf0, (AX)
  3231. MOVB DI, 1(AX)
  3232. ADDQ $0x02, AX
  3233. JMP memmove_match_emit_encodeBlockAsm10B
  3234. one_byte_match_emit_encodeBlockAsm10B:
  3235. SHLB $0x02, DI
  3236. MOVB DI, (AX)
  3237. ADDQ $0x01, AX
  3238. memmove_match_emit_encodeBlockAsm10B:
  3239. LEAQ (AX)(R8*1), DI
  3240. NOP
  3241. emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_tail:
  3242. TESTQ R8, R8
  3243. JEQ memmove_end_copy_match_emit_encodeBlockAsm10B
  3244. CMPQ R8, $0x02
  3245. JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_1or2
  3246. CMPQ R8, $0x04
  3247. JB emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_3
  3248. JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_4
  3249. CMPQ R8, $0x08
  3250. JB emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_5through7
  3251. JE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8
  3252. CMPQ R8, $0x10
  3253. JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_9through16
  3254. CMPQ R8, $0x20
  3255. JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32
  3256. CMPQ R8, $0x40
  3257. JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64
  3258. CMPQ R8, $0x80
  3259. JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_65through128
  3260. CMPQ R8, $0x00000100
  3261. JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_129through256
  3262. JMP emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_256through2048
  3263. emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_1or2:
  3264. MOVB (SI), R9
  3265. MOVB -1(SI)(R8*1), SI
  3266. MOVB R9, (AX)
  3267. MOVB SI, -1(AX)(R8*1)
  3268. JMP memmove_end_copy_match_emit_encodeBlockAsm10B
  3269. emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_4:
  3270. MOVL (SI), R9
  3271. MOVL R9, (AX)
  3272. JMP memmove_end_copy_match_emit_encodeBlockAsm10B
  3273. emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_3:
  3274. MOVW (SI), R9
  3275. MOVB 2(SI), SI
  3276. MOVW R9, (AX)
  3277. MOVB SI, 2(AX)
  3278. JMP memmove_end_copy_match_emit_encodeBlockAsm10B
  3279. emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_5through7:
  3280. MOVL (SI), R9
  3281. MOVL -4(SI)(R8*1), SI
  3282. MOVL R9, (AX)
  3283. MOVL SI, -4(AX)(R8*1)
  3284. JMP memmove_end_copy_match_emit_encodeBlockAsm10B
  3285. emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8:
  3286. MOVQ (SI), R9
  3287. MOVQ R9, (AX)
  3288. JMP memmove_end_copy_match_emit_encodeBlockAsm10B
  3289. emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_9through16:
  3290. MOVQ (SI), R9
  3291. MOVQ -8(SI)(R8*1), SI
  3292. MOVQ R9, (AX)
  3293. MOVQ SI, -8(AX)(R8*1)
  3294. JMP memmove_end_copy_match_emit_encodeBlockAsm10B
  3295. emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32:
  3296. MOVOU (SI), X0
  3297. MOVOU -16(SI)(R8*1), X1
  3298. MOVOU X0, (AX)
  3299. MOVOU X1, -16(AX)(R8*1)
  3300. JMP memmove_end_copy_match_emit_encodeBlockAsm10B
  3301. emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64:
  3302. MOVOU (SI), X0
  3303. MOVOU 16(SI), X1
  3304. MOVOU -32(SI)(R8*1), X2
  3305. MOVOU -16(SI)(R8*1), X3
  3306. MOVOU X0, (AX)
  3307. MOVOU X1, 16(AX)
  3308. MOVOU X2, -32(AX)(R8*1)
  3309. MOVOU X3, -16(AX)(R8*1)
  3310. JMP memmove_end_copy_match_emit_encodeBlockAsm10B
  3311. emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_65through128:
  3312. MOVOU (SI), X0
  3313. MOVOU 16(SI), X1
  3314. MOVOU 32(SI), X2
  3315. MOVOU 48(SI), X3
  3316. MOVOU -64(SI)(R8*1), X12
  3317. MOVOU -48(SI)(R8*1), X13
  3318. MOVOU -32(SI)(R8*1), X14
  3319. MOVOU -16(SI)(R8*1), X15
  3320. MOVOU X0, (AX)
  3321. MOVOU X1, 16(AX)
  3322. MOVOU X2, 32(AX)
  3323. MOVOU X3, 48(AX)
  3324. MOVOU X12, -64(AX)(R8*1)
  3325. MOVOU X13, -48(AX)(R8*1)
  3326. MOVOU X14, -32(AX)(R8*1)
  3327. MOVOU X15, -16(AX)(R8*1)
  3328. JMP memmove_end_copy_match_emit_encodeBlockAsm10B
  3329. emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_129through256:
  3330. MOVOU (SI), X0
  3331. MOVOU 16(SI), X1
  3332. MOVOU 32(SI), X2
  3333. MOVOU 48(SI), X3
  3334. MOVOU 64(SI), X4
  3335. MOVOU 80(SI), X5
  3336. MOVOU 96(SI), X6
  3337. MOVOU 112(SI), X7
  3338. MOVOU -128(SI)(R8*1), X8
  3339. MOVOU -112(SI)(R8*1), X9
  3340. MOVOU -96(SI)(R8*1), X10
  3341. MOVOU -80(SI)(R8*1), X11
  3342. MOVOU -64(SI)(R8*1), X12
  3343. MOVOU -48(SI)(R8*1), X13
  3344. MOVOU -32(SI)(R8*1), X14
  3345. MOVOU -16(SI)(R8*1), X15
  3346. MOVOU X0, (AX)
  3347. MOVOU X1, 16(AX)
  3348. MOVOU X2, 32(AX)
  3349. MOVOU X3, 48(AX)
  3350. MOVOU X4, 64(AX)
  3351. MOVOU X5, 80(AX)
  3352. MOVOU X6, 96(AX)
  3353. MOVOU X7, 112(AX)
  3354. MOVOU X8, -128(AX)(R8*1)
  3355. MOVOU X9, -112(AX)(R8*1)
  3356. MOVOU X10, -96(AX)(R8*1)
  3357. MOVOU X11, -80(AX)(R8*1)
  3358. MOVOU X12, -64(AX)(R8*1)
  3359. MOVOU X13, -48(AX)(R8*1)
  3360. MOVOU X14, -32(AX)(R8*1)
  3361. MOVOU X15, -16(AX)(R8*1)
  3362. JMP memmove_end_copy_match_emit_encodeBlockAsm10B
  3363. emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_256through2048:
  3364. LEAQ -256(R8), R8
  3365. MOVOU (SI), X0
  3366. MOVOU 16(SI), X1
  3367. MOVOU 32(SI), X2
  3368. MOVOU 48(SI), X3
  3369. MOVOU 64(SI), X4
  3370. MOVOU 80(SI), X5
  3371. MOVOU 96(SI), X6
  3372. MOVOU 112(SI), X7
  3373. MOVOU 128(SI), X8
  3374. MOVOU 144(SI), X9
  3375. MOVOU 160(SI), X10
  3376. MOVOU 176(SI), X11
  3377. MOVOU 192(SI), X12
  3378. MOVOU 208(SI), X13
  3379. MOVOU 224(SI), X14
  3380. MOVOU 240(SI), X15
  3381. MOVOU X0, (AX)
  3382. MOVOU X1, 16(AX)
  3383. MOVOU X2, 32(AX)
  3384. MOVOU X3, 48(AX)
  3385. MOVOU X4, 64(AX)
  3386. MOVOU X5, 80(AX)
  3387. MOVOU X6, 96(AX)
  3388. MOVOU X7, 112(AX)
  3389. MOVOU X8, 128(AX)
  3390. MOVOU X9, 144(AX)
  3391. MOVOU X10, 160(AX)
  3392. MOVOU X11, 176(AX)
  3393. MOVOU X12, 192(AX)
  3394. MOVOU X13, 208(AX)
  3395. MOVOU X14, 224(AX)
  3396. MOVOU X15, 240(AX)
  3397. CMPQ R8, $0x00000100
  3398. LEAQ 256(SI), SI
  3399. LEAQ 256(AX), AX
  3400. JGE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_256through2048
  3401. JMP emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_tail
  3402. memmove_end_copy_match_emit_encodeBlockAsm10B:
  3403. MOVQ DI, AX
  3404. emit_literal_done_match_emit_encodeBlockAsm10B:
  3405. match_nolit_loop_encodeBlockAsm10B:
  3406. MOVL CX, SI
  3407. SUBL BP, SI
  3408. MOVL SI, 16(SP)
  3409. ADDL $0x04, CX
  3410. ADDL $0x04, BP
  3411. MOVQ src_len+32(FP), SI
  3412. SUBL CX, SI
  3413. LEAQ (DX)(CX*1), DI
  3414. LEAQ (DX)(BP*1), BP
  3415. XORL R9, R9
  3416. CMPL SI, $0x08
  3417. JL matchlen_single_match_nolit_encodeBlockAsm10B
  3418. matchlen_loopback_match_nolit_encodeBlockAsm10B:
  3419. MOVQ (DI)(R9*1), R8
  3420. XORQ (BP)(R9*1), R8
  3421. TESTQ R8, R8
  3422. JZ matchlen_loop_match_nolit_encodeBlockAsm10B
  3423. BSFQ R8, R8
  3424. SARQ $0x03, R8
  3425. LEAL (R9)(R8*1), R9
  3426. JMP match_nolit_end_encodeBlockAsm10B
  3427. matchlen_loop_match_nolit_encodeBlockAsm10B:
  3428. LEAL -8(SI), SI
  3429. LEAL 8(R9), R9
  3430. CMPL SI, $0x08
  3431. JGE matchlen_loopback_match_nolit_encodeBlockAsm10B
  3432. matchlen_single_match_nolit_encodeBlockAsm10B:
  3433. TESTL SI, SI
  3434. JZ match_nolit_end_encodeBlockAsm10B
  3435. matchlen_single_loopback_match_nolit_encodeBlockAsm10B:
  3436. MOVB (DI)(R9*1), R8
  3437. CMPB (BP)(R9*1), R8
  3438. JNE match_nolit_end_encodeBlockAsm10B
  3439. LEAL 1(R9), R9
  3440. DECL SI
  3441. JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm10B
  3442. match_nolit_end_encodeBlockAsm10B:
  3443. ADDL R9, CX
  3444. MOVL 16(SP), BP
  3445. ADDL $0x04, R9
  3446. CMPL BP, $0x00010000
  3447. JL two_byte_offset_match_nolit_encodeBlockAsm10B
  3448. four_bytes_loop_back_match_nolit_encodeBlockAsm10B:
  3449. CMPL R9, $0x40
  3450. JLE four_bytes_remain_match_nolit_encodeBlockAsm10B
  3451. MOVB $0xff, (AX)
  3452. MOVL BP, 1(AX)
  3453. LEAL -64(R9), R9
  3454. ADDQ $0x05, AX
  3455. CMPL R9, $0x04
  3456. JL four_bytes_remain_match_nolit_encodeBlockAsm10B
  3457. emit_repeat_again_match_nolit_encodeBlockAsm10B_emit_copy:
  3458. MOVL R9, SI
  3459. LEAL -4(R9), R9
  3460. CMPL SI, $0x08
  3461. JLE repeat_two_match_nolit_encodeBlockAsm10B_emit_copy
  3462. CMPL SI, $0x0c
  3463. JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy
  3464. CMPL BP, $0x00000800
  3465. JLT repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy
  3466. cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy:
  3467. CMPL R9, $0x00000104
  3468. JLT repeat_three_match_nolit_encodeBlockAsm10B_emit_copy
  3469. CMPL R9, $0x00010100
  3470. JLT repeat_four_match_nolit_encodeBlockAsm10B_emit_copy
  3471. CMPL R9, $0x0100ffff
  3472. JLT repeat_five_match_nolit_encodeBlockAsm10B_emit_copy
  3473. LEAL -16842747(R9), R9
  3474. MOVW $0x001d, (AX)
  3475. MOVW $0xfffb, 2(AX)
  3476. MOVB $0xff, 4(AX)
  3477. ADDQ $0x05, AX
  3478. JMP emit_repeat_again_match_nolit_encodeBlockAsm10B_emit_copy
  3479. repeat_five_match_nolit_encodeBlockAsm10B_emit_copy:
  3480. LEAL -65536(R9), R9
  3481. MOVL R9, BP
  3482. MOVW $0x001d, (AX)
  3483. MOVW R9, 2(AX)
  3484. SARL $0x10, BP
  3485. MOVB BP, 4(AX)
  3486. ADDQ $0x05, AX
  3487. JMP match_nolit_emitcopy_end_encodeBlockAsm10B
  3488. repeat_four_match_nolit_encodeBlockAsm10B_emit_copy:
  3489. LEAL -256(R9), R9
  3490. MOVW $0x0019, (AX)
  3491. MOVW R9, 2(AX)
  3492. ADDQ $0x04, AX
  3493. JMP match_nolit_emitcopy_end_encodeBlockAsm10B
  3494. repeat_three_match_nolit_encodeBlockAsm10B_emit_copy:
  3495. LEAL -4(R9), R9
  3496. MOVW $0x0015, (AX)
  3497. MOVB R9, 2(AX)
  3498. ADDQ $0x03, AX
  3499. JMP match_nolit_emitcopy_end_encodeBlockAsm10B
  3500. repeat_two_match_nolit_encodeBlockAsm10B_emit_copy:
  3501. SHLL $0x02, R9
  3502. ORL $0x01, R9
  3503. MOVW R9, (AX)
  3504. ADDQ $0x02, AX
  3505. JMP match_nolit_emitcopy_end_encodeBlockAsm10B
  3506. repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy:
  3507. XORQ SI, SI
  3508. LEAL 1(SI)(R9*4), R9
  3509. MOVB BP, 1(AX)
  3510. SARL $0x08, BP
  3511. SHLL $0x05, BP
  3512. ORL BP, R9
  3513. MOVB R9, (AX)
  3514. ADDQ $0x02, AX
  3515. JMP match_nolit_emitcopy_end_encodeBlockAsm10B
  3516. JMP four_bytes_loop_back_match_nolit_encodeBlockAsm10B
  3517. four_bytes_remain_match_nolit_encodeBlockAsm10B:
  3518. TESTL R9, R9
  3519. JZ match_nolit_emitcopy_end_encodeBlockAsm10B
  3520. MOVB $0x03, BL
  3521. LEAL -4(BX)(R9*4), R9
  3522. MOVB R9, (AX)
  3523. MOVL BP, 1(AX)
  3524. ADDQ $0x05, AX
  3525. JMP match_nolit_emitcopy_end_encodeBlockAsm10B
  3526. two_byte_offset_match_nolit_encodeBlockAsm10B:
  3527. CMPL R9, $0x40
  3528. JLE two_byte_offset_short_match_nolit_encodeBlockAsm10B
  3529. MOVB $0xee, (AX)
  3530. MOVW BP, 1(AX)
  3531. LEAL -60(R9), R9
  3532. ADDQ $0x03, AX
  3533. emit_repeat_again_match_nolit_encodeBlockAsm10B_emit_copy_short:
  3534. MOVL R9, SI
  3535. LEAL -4(R9), R9
  3536. CMPL SI, $0x08
  3537. JLE repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short
  3538. CMPL SI, $0x0c
  3539. JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short
  3540. CMPL BP, $0x00000800
  3541. JLT repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short
  3542. cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short:
  3543. CMPL R9, $0x00000104
  3544. JLT repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short
  3545. CMPL R9, $0x00010100
  3546. JLT repeat_four_match_nolit_encodeBlockAsm10B_emit_copy_short
  3547. CMPL R9, $0x0100ffff
  3548. JLT repeat_five_match_nolit_encodeBlockAsm10B_emit_copy_short
  3549. LEAL -16842747(R9), R9
  3550. MOVW $0x001d, (AX)
  3551. MOVW $0xfffb, 2(AX)
  3552. MOVB $0xff, 4(AX)
  3553. ADDQ $0x05, AX
  3554. JMP emit_repeat_again_match_nolit_encodeBlockAsm10B_emit_copy_short
  3555. repeat_five_match_nolit_encodeBlockAsm10B_emit_copy_short:
  3556. LEAL -65536(R9), R9
  3557. MOVL R9, BP
  3558. MOVW $0x001d, (AX)
  3559. MOVW R9, 2(AX)
  3560. SARL $0x10, BP
  3561. MOVB BP, 4(AX)
  3562. ADDQ $0x05, AX
  3563. JMP match_nolit_emitcopy_end_encodeBlockAsm10B
  3564. repeat_four_match_nolit_encodeBlockAsm10B_emit_copy_short:
  3565. LEAL -256(R9), R9
  3566. MOVW $0x0019, (AX)
  3567. MOVW R9, 2(AX)
  3568. ADDQ $0x04, AX
  3569. JMP match_nolit_emitcopy_end_encodeBlockAsm10B
  3570. repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short:
  3571. LEAL -4(R9), R9
  3572. MOVW $0x0015, (AX)
  3573. MOVB R9, 2(AX)
  3574. ADDQ $0x03, AX
  3575. JMP match_nolit_emitcopy_end_encodeBlockAsm10B
  3576. repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short:
  3577. SHLL $0x02, R9
  3578. ORL $0x01, R9
  3579. MOVW R9, (AX)
  3580. ADDQ $0x02, AX
  3581. JMP match_nolit_emitcopy_end_encodeBlockAsm10B
  3582. repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short:
  3583. XORQ SI, SI
  3584. LEAL 1(SI)(R9*4), R9
  3585. MOVB BP, 1(AX)
  3586. SARL $0x08, BP
  3587. SHLL $0x05, BP
  3588. ORL BP, R9
  3589. MOVB R9, (AX)
  3590. ADDQ $0x02, AX
  3591. JMP match_nolit_emitcopy_end_encodeBlockAsm10B
  3592. JMP two_byte_offset_match_nolit_encodeBlockAsm10B
  3593. two_byte_offset_short_match_nolit_encodeBlockAsm10B:
  3594. CMPL R9, $0x0c
  3595. JGE emit_copy_three_match_nolit_encodeBlockAsm10B
  3596. CMPL BP, $0x00000800
  3597. JGE emit_copy_three_match_nolit_encodeBlockAsm10B
  3598. MOVB $0x01, BL
  3599. LEAL -16(BX)(R9*4), R9
  3600. MOVB BP, 1(AX)
  3601. SHRL $0x08, BP
  3602. SHLL $0x05, BP
  3603. ORL BP, R9
  3604. MOVB R9, (AX)
  3605. ADDQ $0x02, AX
  3606. JMP match_nolit_emitcopy_end_encodeBlockAsm10B
  3607. emit_copy_three_match_nolit_encodeBlockAsm10B:
  3608. MOVB $0x02, BL
  3609. LEAL -4(BX)(R9*4), R9
  3610. MOVB R9, (AX)
  3611. MOVW BP, 1(AX)
  3612. ADDQ $0x03, AX
  3613. match_nolit_emitcopy_end_encodeBlockAsm10B:
  3614. MOVL CX, 12(SP)
  3615. CMPL CX, 8(SP)
  3616. JGE emit_remainder_encodeBlockAsm10B
  3617. CMPQ AX, (SP)
  3618. JL match_nolit_dst_ok_encodeBlockAsm10B
  3619. MOVQ $0x00000000, ret+48(FP)
  3620. RET
  3621. match_nolit_dst_ok_encodeBlockAsm10B:
  3622. MOVQ -2(DX)(CX*1), SI
  3623. MOVQ $0x000000cf1bbcdcbb, BP
  3624. MOVQ SI, DI
  3625. SHRQ $0x10, SI
  3626. MOVQ SI, R8
  3627. SHLQ $0x18, DI
  3628. IMULQ BP, DI
  3629. SHRQ $0x36, DI
  3630. SHLQ $0x18, R8
  3631. IMULQ BP, R8
  3632. SHRQ $0x36, R8
  3633. LEAL -2(CX), R9
  3634. MOVL 24(SP)(R8*4), BP
  3635. MOVL R9, 24(SP)(DI*4)
  3636. MOVL CX, 24(SP)(R8*4)
  3637. CMPL (DX)(BP*1), SI
  3638. JEQ match_nolit_loop_encodeBlockAsm10B
  3639. INCL CX
  3640. JMP search_loop_encodeBlockAsm10B
  3641. emit_remainder_encodeBlockAsm10B:
  3642. MOVQ src_len+32(FP), CX
  3643. SUBL 12(SP), CX
  3644. LEAQ 4(AX)(CX*1), CX
  3645. CMPQ CX, (SP)
  3646. JL emit_remainder_ok_encodeBlockAsm10B
  3647. MOVQ $0x00000000, ret+48(FP)
  3648. RET
  3649. emit_remainder_ok_encodeBlockAsm10B:
  3650. MOVQ src_len+32(FP), CX
  3651. MOVL 12(SP), BX
  3652. CMPL BX, CX
  3653. JEQ emit_literal_done_emit_remainder_encodeBlockAsm10B
  3654. MOVL CX, BP
  3655. MOVL CX, 12(SP)
  3656. LEAQ (DX)(BX*1), CX
  3657. SUBL BX, BP
  3658. MOVL BP, DX
  3659. SUBL $0x01, DX
  3660. JC emit_literal_done_emit_remainder_encodeBlockAsm10B
  3661. CMPL DX, $0x3c
  3662. JLT one_byte_emit_remainder_encodeBlockAsm10B
  3663. CMPL DX, $0x00000100
  3664. JLT two_bytes_emit_remainder_encodeBlockAsm10B
  3665. CMPL DX, $0x00010000
  3666. JLT three_bytes_emit_remainder_encodeBlockAsm10B
  3667. CMPL DX, $0x01000000
  3668. JLT four_bytes_emit_remainder_encodeBlockAsm10B
  3669. MOVB $0xfc, (AX)
  3670. MOVL DX, 1(AX)
  3671. ADDQ $0x05, AX
  3672. JMP memmove_emit_remainder_encodeBlockAsm10B
  3673. four_bytes_emit_remainder_encodeBlockAsm10B:
  3674. MOVL DX, BX
  3675. SHRL $0x10, BX
  3676. MOVB $0xf8, (AX)
  3677. MOVW DX, 1(AX)
  3678. MOVB BL, 3(AX)
  3679. ADDQ $0x04, AX
  3680. JMP memmove_emit_remainder_encodeBlockAsm10B
  3681. three_bytes_emit_remainder_encodeBlockAsm10B:
  3682. MOVB $0xf4, (AX)
  3683. MOVW DX, 1(AX)
  3684. ADDQ $0x03, AX
  3685. JMP memmove_emit_remainder_encodeBlockAsm10B
  3686. two_bytes_emit_remainder_encodeBlockAsm10B:
  3687. MOVB $0xf0, (AX)
  3688. MOVB DL, 1(AX)
  3689. ADDQ $0x02, AX
  3690. JMP memmove_emit_remainder_encodeBlockAsm10B
  3691. one_byte_emit_remainder_encodeBlockAsm10B:
  3692. SHLB $0x02, DL
  3693. MOVB DL, (AX)
  3694. ADDQ $0x01, AX
  3695. memmove_emit_remainder_encodeBlockAsm10B:
  3696. LEAQ (AX)(BP*1), DX
  3697. MOVL BP, BX
  3698. NOP
  3699. emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_tail:
  3700. TESTQ BX, BX
  3701. JEQ memmove_end_copy_emit_remainder_encodeBlockAsm10B
  3702. CMPQ BX, $0x02
  3703. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_1or2
  3704. CMPQ BX, $0x04
  3705. JB emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_3
  3706. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_4
  3707. CMPQ BX, $0x08
  3708. JB emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_5through7
  3709. JE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8
  3710. CMPQ BX, $0x10
  3711. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_9through16
  3712. CMPQ BX, $0x20
  3713. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32
  3714. CMPQ BX, $0x40
  3715. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64
  3716. CMPQ BX, $0x80
  3717. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_65through128
  3718. CMPQ BX, $0x00000100
  3719. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_129through256
  3720. JMP emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_256through2048
  3721. emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_1or2:
  3722. MOVB (CX), BP
  3723. MOVB -1(CX)(BX*1), CL
  3724. MOVB BP, (AX)
  3725. MOVB CL, -1(AX)(BX*1)
  3726. JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B
  3727. emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_4:
  3728. MOVL (CX), BP
  3729. MOVL BP, (AX)
  3730. JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B
  3731. emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_3:
  3732. MOVW (CX), BP
  3733. MOVB 2(CX), CL
  3734. MOVW BP, (AX)
  3735. MOVB CL, 2(AX)
  3736. JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B
  3737. emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_5through7:
  3738. MOVL (CX), BP
  3739. MOVL -4(CX)(BX*1), CX
  3740. MOVL BP, (AX)
  3741. MOVL CX, -4(AX)(BX*1)
  3742. JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B
  3743. emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8:
  3744. MOVQ (CX), BP
  3745. MOVQ BP, (AX)
  3746. JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B
  3747. emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_9through16:
  3748. MOVQ (CX), BP
  3749. MOVQ -8(CX)(BX*1), CX
  3750. MOVQ BP, (AX)
  3751. MOVQ CX, -8(AX)(BX*1)
  3752. JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B
  3753. emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32:
  3754. MOVOU (CX), X0
  3755. MOVOU -16(CX)(BX*1), X1
  3756. MOVOU X0, (AX)
  3757. MOVOU X1, -16(AX)(BX*1)
  3758. JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B
  3759. emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64:
  3760. MOVOU (CX), X0
  3761. MOVOU 16(CX), X1
  3762. MOVOU -32(CX)(BX*1), X2
  3763. MOVOU -16(CX)(BX*1), X3
  3764. MOVOU X0, (AX)
  3765. MOVOU X1, 16(AX)
  3766. MOVOU X2, -32(AX)(BX*1)
  3767. MOVOU X3, -16(AX)(BX*1)
  3768. JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B
  3769. emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_65through128:
  3770. MOVOU (CX), X0
  3771. MOVOU 16(CX), X1
  3772. MOVOU 32(CX), X2
  3773. MOVOU 48(CX), X3
  3774. MOVOU -64(CX)(BX*1), X12
  3775. MOVOU -48(CX)(BX*1), X13
  3776. MOVOU -32(CX)(BX*1), X14
  3777. MOVOU -16(CX)(BX*1), X15
  3778. MOVOU X0, (AX)
  3779. MOVOU X1, 16(AX)
  3780. MOVOU X2, 32(AX)
  3781. MOVOU X3, 48(AX)
  3782. MOVOU X12, -64(AX)(BX*1)
  3783. MOVOU X13, -48(AX)(BX*1)
  3784. MOVOU X14, -32(AX)(BX*1)
  3785. MOVOU X15, -16(AX)(BX*1)
  3786. JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B
  3787. emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_129through256:
  3788. MOVOU (CX), X0
  3789. MOVOU 16(CX), X1
  3790. MOVOU 32(CX), X2
  3791. MOVOU 48(CX), X3
  3792. MOVOU 64(CX), X4
  3793. MOVOU 80(CX), X5
  3794. MOVOU 96(CX), X6
  3795. MOVOU 112(CX), X7
  3796. MOVOU -128(CX)(BX*1), X8
  3797. MOVOU -112(CX)(BX*1), X9
  3798. MOVOU -96(CX)(BX*1), X10
  3799. MOVOU -80(CX)(BX*1), X11
  3800. MOVOU -64(CX)(BX*1), X12
  3801. MOVOU -48(CX)(BX*1), X13
  3802. MOVOU -32(CX)(BX*1), X14
  3803. MOVOU -16(CX)(BX*1), X15
  3804. MOVOU X0, (AX)
  3805. MOVOU X1, 16(AX)
  3806. MOVOU X2, 32(AX)
  3807. MOVOU X3, 48(AX)
  3808. MOVOU X4, 64(AX)
  3809. MOVOU X5, 80(AX)
  3810. MOVOU X6, 96(AX)
  3811. MOVOU X7, 112(AX)
  3812. MOVOU X8, -128(AX)(BX*1)
  3813. MOVOU X9, -112(AX)(BX*1)
  3814. MOVOU X10, -96(AX)(BX*1)
  3815. MOVOU X11, -80(AX)(BX*1)
  3816. MOVOU X12, -64(AX)(BX*1)
  3817. MOVOU X13, -48(AX)(BX*1)
  3818. MOVOU X14, -32(AX)(BX*1)
  3819. MOVOU X15, -16(AX)(BX*1)
  3820. JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B
  3821. emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_256through2048:
  3822. LEAQ -256(BX), BX
  3823. MOVOU (CX), X0
  3824. MOVOU 16(CX), X1
  3825. MOVOU 32(CX), X2
  3826. MOVOU 48(CX), X3
  3827. MOVOU 64(CX), X4
  3828. MOVOU 80(CX), X5
  3829. MOVOU 96(CX), X6
  3830. MOVOU 112(CX), X7
  3831. MOVOU 128(CX), X8
  3832. MOVOU 144(CX), X9
  3833. MOVOU 160(CX), X10
  3834. MOVOU 176(CX), X11
  3835. MOVOU 192(CX), X12
  3836. MOVOU 208(CX), X13
  3837. MOVOU 224(CX), X14
  3838. MOVOU 240(CX), X15
  3839. MOVOU X0, (AX)
  3840. MOVOU X1, 16(AX)
  3841. MOVOU X2, 32(AX)
  3842. MOVOU X3, 48(AX)
  3843. MOVOU X4, 64(AX)
  3844. MOVOU X5, 80(AX)
  3845. MOVOU X6, 96(AX)
  3846. MOVOU X7, 112(AX)
  3847. MOVOU X8, 128(AX)
  3848. MOVOU X9, 144(AX)
  3849. MOVOU X10, 160(AX)
  3850. MOVOU X11, 176(AX)
  3851. MOVOU X12, 192(AX)
  3852. MOVOU X13, 208(AX)
  3853. MOVOU X14, 224(AX)
  3854. MOVOU X15, 240(AX)
  3855. CMPQ BX, $0x00000100
  3856. LEAQ 256(CX), CX
  3857. LEAQ 256(AX), AX
  3858. JGE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_256through2048
  3859. JMP emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_tail
  3860. memmove_end_copy_emit_remainder_encodeBlockAsm10B:
  3861. MOVQ DX, AX
  3862. emit_literal_done_emit_remainder_encodeBlockAsm10B:
  3863. MOVQ dst_base+0(FP), CX
  3864. SUBQ CX, AX
  3865. MOVQ AX, ret+48(FP)
  3866. RET
  3867. // func encodeBlockAsm8B(dst []byte, src []byte) int
  3868. // Requires: SSE2
  3869. TEXT ·encodeBlockAsm8B(SB), $1048-56
  3870. MOVQ dst_base+0(FP), AX
  3871. MOVQ $0x00000008, CX
  3872. LEAQ 24(SP), DX
  3873. PXOR X0, X0
  3874. zero_loop_encodeBlockAsm8B:
  3875. MOVOU X0, (DX)
  3876. MOVOU X0, 16(DX)
  3877. MOVOU X0, 32(DX)
  3878. MOVOU X0, 48(DX)
  3879. MOVOU X0, 64(DX)
  3880. MOVOU X0, 80(DX)
  3881. MOVOU X0, 96(DX)
  3882. MOVOU X0, 112(DX)
  3883. ADDQ $0x80, DX
  3884. DECQ CX
  3885. JNZ zero_loop_encodeBlockAsm8B
  3886. MOVL $0x00000000, 12(SP)
  3887. MOVQ src_len+32(FP), CX
  3888. LEAQ -5(CX), DX
  3889. LEAQ -8(CX), BP
  3890. MOVL BP, 8(SP)
  3891. SHRQ $0x05, CX
  3892. SUBL CX, DX
  3893. LEAQ (AX)(DX*1), DX
  3894. MOVQ DX, (SP)
  3895. MOVL $0x00000001, CX
  3896. MOVL CX, 16(SP)
  3897. MOVQ src_base+24(FP), DX
  3898. search_loop_encodeBlockAsm8B:
  3899. MOVQ (DX)(CX*1), SI
  3900. MOVL CX, BP
  3901. SUBL 12(SP), BP
  3902. SHRL $0x04, BP
  3903. LEAL 4(CX)(BP*1), BP
  3904. MOVL 8(SP), DI
  3905. CMPL BP, DI
  3906. JGT emit_remainder_encodeBlockAsm8B
  3907. MOVL BP, 20(SP)
  3908. MOVQ $0x9e3779b1, R8
  3909. MOVQ SI, R9
  3910. MOVQ SI, R10
  3911. SHRQ $0x08, R10
  3912. SHLQ $0x20, R9
  3913. IMULQ R8, R9
  3914. SHRQ $0x38, R9
  3915. SHLQ $0x20, R10
  3916. IMULQ R8, R10
  3917. SHRQ $0x38, R10
  3918. MOVL 24(SP)(R9*4), BP
  3919. MOVL 24(SP)(R10*4), DI
  3920. MOVL CX, 24(SP)(R9*4)
  3921. LEAL 1(CX), R9
  3922. MOVL R9, 24(SP)(R10*4)
  3923. MOVQ SI, R9
  3924. SHRQ $0x10, R9
  3925. SHLQ $0x20, R9
  3926. IMULQ R8, R9
  3927. SHRQ $0x38, R9
  3928. MOVL CX, R8
  3929. SUBL 16(SP), R8
  3930. MOVL 1(DX)(R8*1), R10
  3931. MOVQ SI, R8
  3932. SHRQ $0x08, R8
  3933. CMPL R8, R10
  3934. JNE no_repeat_found_encodeBlockAsm8B
  3935. LEAL 1(CX), SI
  3936. MOVL 12(SP), DI
  3937. MOVL SI, BP
  3938. SUBL 16(SP), BP
  3939. JZ repeat_extend_back_end_encodeBlockAsm8B
  3940. repeat_extend_back_loop_encodeBlockAsm8B:
  3941. CMPL SI, DI
  3942. JLE repeat_extend_back_end_encodeBlockAsm8B
  3943. MOVB -1(DX)(BP*1), BL
  3944. MOVB -1(DX)(SI*1), R8
  3945. CMPB BL, R8
  3946. JNE repeat_extend_back_end_encodeBlockAsm8B
  3947. LEAL -1(SI), SI
  3948. DECL BP
  3949. JNZ repeat_extend_back_loop_encodeBlockAsm8B
  3950. repeat_extend_back_end_encodeBlockAsm8B:
  3951. MOVL 12(SP), BP
  3952. CMPL BP, SI
  3953. JEQ emit_literal_done_repeat_emit_encodeBlockAsm8B
  3954. MOVL SI, R8
  3955. MOVL SI, 12(SP)
  3956. LEAQ (DX)(BP*1), R9
  3957. SUBL BP, R8
  3958. MOVL R8, BP
  3959. SUBL $0x01, BP
  3960. JC emit_literal_done_repeat_emit_encodeBlockAsm8B
  3961. CMPL BP, $0x3c
  3962. JLT one_byte_repeat_emit_encodeBlockAsm8B
  3963. CMPL BP, $0x00000100
  3964. JLT two_bytes_repeat_emit_encodeBlockAsm8B
  3965. CMPL BP, $0x00010000
  3966. JLT three_bytes_repeat_emit_encodeBlockAsm8B
  3967. CMPL BP, $0x01000000
  3968. JLT four_bytes_repeat_emit_encodeBlockAsm8B
  3969. MOVB $0xfc, (AX)
  3970. MOVL BP, 1(AX)
  3971. ADDQ $0x05, AX
  3972. JMP memmove_repeat_emit_encodeBlockAsm8B
  3973. four_bytes_repeat_emit_encodeBlockAsm8B:
  3974. MOVL BP, R10
  3975. SHRL $0x10, R10
  3976. MOVB $0xf8, (AX)
  3977. MOVW BP, 1(AX)
  3978. MOVB R10, 3(AX)
  3979. ADDQ $0x04, AX
  3980. JMP memmove_repeat_emit_encodeBlockAsm8B
  3981. three_bytes_repeat_emit_encodeBlockAsm8B:
  3982. MOVB $0xf4, (AX)
  3983. MOVW BP, 1(AX)
  3984. ADDQ $0x03, AX
  3985. JMP memmove_repeat_emit_encodeBlockAsm8B
  3986. two_bytes_repeat_emit_encodeBlockAsm8B:
  3987. MOVB $0xf0, (AX)
  3988. MOVB BP, 1(AX)
  3989. ADDQ $0x02, AX
  3990. JMP memmove_repeat_emit_encodeBlockAsm8B
  3991. one_byte_repeat_emit_encodeBlockAsm8B:
  3992. SHLB $0x02, BP
  3993. MOVB BP, (AX)
  3994. ADDQ $0x01, AX
  3995. memmove_repeat_emit_encodeBlockAsm8B:
  3996. LEAQ (AX)(R8*1), BP
  3997. NOP
  3998. emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_tail:
  3999. TESTQ R8, R8
  4000. JEQ memmove_end_copy_repeat_emit_encodeBlockAsm8B
  4001. CMPQ R8, $0x02
  4002. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_1or2
  4003. CMPQ R8, $0x04
  4004. JB emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_3
  4005. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_4
  4006. CMPQ R8, $0x08
  4007. JB emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_5through7
  4008. JE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8
  4009. CMPQ R8, $0x10
  4010. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_9through16
  4011. CMPQ R8, $0x20
  4012. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32
  4013. CMPQ R8, $0x40
  4014. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64
  4015. CMPQ R8, $0x80
  4016. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_65through128
  4017. CMPQ R8, $0x00000100
  4018. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_129through256
  4019. JMP emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_256through2048
  4020. emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_1or2:
  4021. MOVB (R9), R10
  4022. MOVB -1(R9)(R8*1), R9
  4023. MOVB R10, (AX)
  4024. MOVB R9, -1(AX)(R8*1)
  4025. JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B
  4026. emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_4:
  4027. MOVL (R9), R10
  4028. MOVL R10, (AX)
  4029. JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B
  4030. emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_3:
  4031. MOVW (R9), R10
  4032. MOVB 2(R9), R9
  4033. MOVW R10, (AX)
  4034. MOVB R9, 2(AX)
  4035. JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B
  4036. emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_5through7:
  4037. MOVL (R9), R10
  4038. MOVL -4(R9)(R8*1), R9
  4039. MOVL R10, (AX)
  4040. MOVL R9, -4(AX)(R8*1)
  4041. JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B
  4042. emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8:
  4043. MOVQ (R9), R10
  4044. MOVQ R10, (AX)
  4045. JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B
  4046. emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_9through16:
  4047. MOVQ (R9), R10
  4048. MOVQ -8(R9)(R8*1), R9
  4049. MOVQ R10, (AX)
  4050. MOVQ R9, -8(AX)(R8*1)
  4051. JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B
  4052. emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32:
  4053. MOVOU (R9), X0
  4054. MOVOU -16(R9)(R8*1), X1
  4055. MOVOU X0, (AX)
  4056. MOVOU X1, -16(AX)(R8*1)
  4057. JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B
  4058. emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64:
  4059. MOVOU (R9), X0
  4060. MOVOU 16(R9), X1
  4061. MOVOU -32(R9)(R8*1), X2
  4062. MOVOU -16(R9)(R8*1), X3
  4063. MOVOU X0, (AX)
  4064. MOVOU X1, 16(AX)
  4065. MOVOU X2, -32(AX)(R8*1)
  4066. MOVOU X3, -16(AX)(R8*1)
  4067. JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B
  4068. emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_65through128:
  4069. MOVOU (R9), X0
  4070. MOVOU 16(R9), X1
  4071. MOVOU 32(R9), X2
  4072. MOVOU 48(R9), X3
  4073. MOVOU -64(R9)(R8*1), X12
  4074. MOVOU -48(R9)(R8*1), X13
  4075. MOVOU -32(R9)(R8*1), X14
  4076. MOVOU -16(R9)(R8*1), X15
  4077. MOVOU X0, (AX)
  4078. MOVOU X1, 16(AX)
  4079. MOVOU X2, 32(AX)
  4080. MOVOU X3, 48(AX)
  4081. MOVOU X12, -64(AX)(R8*1)
  4082. MOVOU X13, -48(AX)(R8*1)
  4083. MOVOU X14, -32(AX)(R8*1)
  4084. MOVOU X15, -16(AX)(R8*1)
  4085. JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B
  4086. emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_129through256:
  4087. MOVOU (R9), X0
  4088. MOVOU 16(R9), X1
  4089. MOVOU 32(R9), X2
  4090. MOVOU 48(R9), X3
  4091. MOVOU 64(R9), X4
  4092. MOVOU 80(R9), X5
  4093. MOVOU 96(R9), X6
  4094. MOVOU 112(R9), X7
  4095. MOVOU -128(R9)(R8*1), X8
  4096. MOVOU -112(R9)(R8*1), X9
  4097. MOVOU -96(R9)(R8*1), X10
  4098. MOVOU -80(R9)(R8*1), X11
  4099. MOVOU -64(R9)(R8*1), X12
  4100. MOVOU -48(R9)(R8*1), X13
  4101. MOVOU -32(R9)(R8*1), X14
  4102. MOVOU -16(R9)(R8*1), X15
  4103. MOVOU X0, (AX)
  4104. MOVOU X1, 16(AX)
  4105. MOVOU X2, 32(AX)
  4106. MOVOU X3, 48(AX)
  4107. MOVOU X4, 64(AX)
  4108. MOVOU X5, 80(AX)
  4109. MOVOU X6, 96(AX)
  4110. MOVOU X7, 112(AX)
  4111. MOVOU X8, -128(AX)(R8*1)
  4112. MOVOU X9, -112(AX)(R8*1)
  4113. MOVOU X10, -96(AX)(R8*1)
  4114. MOVOU X11, -80(AX)(R8*1)
  4115. MOVOU X12, -64(AX)(R8*1)
  4116. MOVOU X13, -48(AX)(R8*1)
  4117. MOVOU X14, -32(AX)(R8*1)
  4118. MOVOU X15, -16(AX)(R8*1)
  4119. JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B
  4120. emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_256through2048:
  4121. LEAQ -256(R8), R8
  4122. MOVOU (R9), X0
  4123. MOVOU 16(R9), X1
  4124. MOVOU 32(R9), X2
  4125. MOVOU 48(R9), X3
  4126. MOVOU 64(R9), X4
  4127. MOVOU 80(R9), X5
  4128. MOVOU 96(R9), X6
  4129. MOVOU 112(R9), X7
  4130. MOVOU 128(R9), X8
  4131. MOVOU 144(R9), X9
  4132. MOVOU 160(R9), X10
  4133. MOVOU 176(R9), X11
  4134. MOVOU 192(R9), X12
  4135. MOVOU 208(R9), X13
  4136. MOVOU 224(R9), X14
  4137. MOVOU 240(R9), X15
  4138. MOVOU X0, (AX)
  4139. MOVOU X1, 16(AX)
  4140. MOVOU X2, 32(AX)
  4141. MOVOU X3, 48(AX)
  4142. MOVOU X4, 64(AX)
  4143. MOVOU X5, 80(AX)
  4144. MOVOU X6, 96(AX)
  4145. MOVOU X7, 112(AX)
  4146. MOVOU X8, 128(AX)
  4147. MOVOU X9, 144(AX)
  4148. MOVOU X10, 160(AX)
  4149. MOVOU X11, 176(AX)
  4150. MOVOU X12, 192(AX)
  4151. MOVOU X13, 208(AX)
  4152. MOVOU X14, 224(AX)
  4153. MOVOU X15, 240(AX)
  4154. CMPQ R8, $0x00000100
  4155. LEAQ 256(R9), R9
  4156. LEAQ 256(AX), AX
  4157. JGE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_256through2048
  4158. JMP emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_tail
  4159. memmove_end_copy_repeat_emit_encodeBlockAsm8B:
  4160. MOVQ BP, AX
  4161. emit_literal_done_repeat_emit_encodeBlockAsm8B:
  4162. ADDL $0x05, CX
  4163. MOVL CX, BP
  4164. SUBL 16(SP), BP
  4165. MOVQ src_len+32(FP), R8
  4166. SUBL CX, R8
  4167. LEAQ (DX)(CX*1), R9
  4168. LEAQ (DX)(BP*1), BP
  4169. XORL R11, R11
  4170. CMPL R8, $0x08
  4171. JL matchlen_single_repeat_extend
  4172. matchlen_loopback_repeat_extend:
  4173. MOVQ (R9)(R11*1), R10
  4174. XORQ (BP)(R11*1), R10
  4175. TESTQ R10, R10
  4176. JZ matchlen_loop_repeat_extend
  4177. BSFQ R10, R10
  4178. SARQ $0x03, R10
  4179. LEAL (R11)(R10*1), R11
  4180. JMP repeat_extend_forward_end_encodeBlockAsm8B
  4181. matchlen_loop_repeat_extend:
  4182. LEAL -8(R8), R8
  4183. LEAL 8(R11), R11
  4184. CMPL R8, $0x08
  4185. JGE matchlen_loopback_repeat_extend
  4186. matchlen_single_repeat_extend:
  4187. TESTL R8, R8
  4188. JZ repeat_extend_forward_end_encodeBlockAsm8B
  4189. matchlen_single_loopback_repeat_extend:
  4190. MOVB (R9)(R11*1), R10
  4191. CMPB (BP)(R11*1), R10
  4192. JNE repeat_extend_forward_end_encodeBlockAsm8B
  4193. LEAL 1(R11), R11
  4194. DECL R8
  4195. JNZ matchlen_single_loopback_repeat_extend
  4196. repeat_extend_forward_end_encodeBlockAsm8B:
  4197. ADDL R11, CX
  4198. MOVL CX, BP
  4199. SUBL SI, BP
  4200. MOVL 16(SP), SI
  4201. TESTL DI, DI
  4202. JZ repeat_as_copy_encodeBlockAsm8B
  4203. emit_repeat_again_match_repeat_encodeBlockAsm8B:
  4204. MOVL BP, DI
  4205. LEAL -4(BP), BP
  4206. CMPL DI, $0x08
  4207. JLE repeat_two_match_repeat_encodeBlockAsm8B
  4208. CMPL DI, $0x0c
  4209. JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm8B
  4210. CMPL SI, $0x00000800
  4211. JLT repeat_two_offset_match_repeat_encodeBlockAsm8B
  4212. cant_repeat_two_offset_match_repeat_encodeBlockAsm8B:
  4213. CMPL BP, $0x00000104
  4214. JLT repeat_three_match_repeat_encodeBlockAsm8B
  4215. CMPL BP, $0x00010100
  4216. JLT repeat_four_match_repeat_encodeBlockAsm8B
  4217. CMPL BP, $0x0100ffff
  4218. JLT repeat_five_match_repeat_encodeBlockAsm8B
  4219. LEAL -16842747(BP), BP
  4220. MOVW $0x001d, (AX)
  4221. MOVW $0xfffb, 2(AX)
  4222. MOVB $0xff, 4(AX)
  4223. ADDQ $0x05, AX
  4224. JMP emit_repeat_again_match_repeat_encodeBlockAsm8B
  4225. repeat_five_match_repeat_encodeBlockAsm8B:
  4226. LEAL -65536(BP), BP
  4227. MOVL BP, SI
  4228. MOVW $0x001d, (AX)
  4229. MOVW BP, 2(AX)
  4230. SARL $0x10, SI
  4231. MOVB SI, 4(AX)
  4232. ADDQ $0x05, AX
  4233. JMP repeat_end_emit_encodeBlockAsm8B
  4234. repeat_four_match_repeat_encodeBlockAsm8B:
  4235. LEAL -256(BP), BP
  4236. MOVW $0x0019, (AX)
  4237. MOVW BP, 2(AX)
  4238. ADDQ $0x04, AX
  4239. JMP repeat_end_emit_encodeBlockAsm8B
  4240. repeat_three_match_repeat_encodeBlockAsm8B:
  4241. LEAL -4(BP), BP
  4242. MOVW $0x0015, (AX)
  4243. MOVB BP, 2(AX)
  4244. ADDQ $0x03, AX
  4245. JMP repeat_end_emit_encodeBlockAsm8B
  4246. repeat_two_match_repeat_encodeBlockAsm8B:
  4247. SHLL $0x02, BP
  4248. ORL $0x01, BP
  4249. MOVW BP, (AX)
  4250. ADDQ $0x02, AX
  4251. JMP repeat_end_emit_encodeBlockAsm8B
  4252. repeat_two_offset_match_repeat_encodeBlockAsm8B:
  4253. XORQ DI, DI
  4254. LEAL 1(DI)(BP*4), BP
  4255. MOVB SI, 1(AX)
  4256. SARL $0x08, SI
  4257. SHLL $0x05, SI
  4258. ORL SI, BP
  4259. MOVB BP, (AX)
  4260. ADDQ $0x02, AX
  4261. JMP repeat_end_emit_encodeBlockAsm8B
  4262. repeat_as_copy_encodeBlockAsm8B:
  4263. CMPL SI, $0x00010000
  4264. JL two_byte_offset_repeat_as_copy_encodeBlockAsm8B
  4265. four_bytes_loop_back_repeat_as_copy_encodeBlockAsm8B:
  4266. CMPL BP, $0x40
  4267. JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm8B
  4268. MOVB $0xff, (AX)
  4269. MOVL SI, 1(AX)
  4270. LEAL -64(BP), BP
  4271. ADDQ $0x05, AX
  4272. CMPL BP, $0x04
  4273. JL four_bytes_remain_repeat_as_copy_encodeBlockAsm8B
  4274. emit_repeat_again_repeat_as_copy_encodeBlockAsm8B_emit_copy:
  4275. MOVL BP, DI
  4276. LEAL -4(BP), BP
  4277. CMPL DI, $0x08
  4278. JLE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy
  4279. CMPL DI, $0x0c
  4280. JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy
  4281. CMPL SI, $0x00000800
  4282. JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy
  4283. cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy:
  4284. CMPL BP, $0x00000104
  4285. JLT repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy
  4286. CMPL BP, $0x00010100
  4287. JLT repeat_four_repeat_as_copy_encodeBlockAsm8B_emit_copy
  4288. CMPL BP, $0x0100ffff
  4289. JLT repeat_five_repeat_as_copy_encodeBlockAsm8B_emit_copy
  4290. LEAL -16842747(BP), BP
  4291. MOVW $0x001d, (AX)
  4292. MOVW $0xfffb, 2(AX)
  4293. MOVB $0xff, 4(AX)
  4294. ADDQ $0x05, AX
  4295. JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm8B_emit_copy
  4296. repeat_five_repeat_as_copy_encodeBlockAsm8B_emit_copy:
  4297. LEAL -65536(BP), BP
  4298. MOVL BP, SI
  4299. MOVW $0x001d, (AX)
  4300. MOVW BP, 2(AX)
  4301. SARL $0x10, SI
  4302. MOVB SI, 4(AX)
  4303. ADDQ $0x05, AX
  4304. JMP repeat_end_emit_encodeBlockAsm8B
  4305. repeat_four_repeat_as_copy_encodeBlockAsm8B_emit_copy:
  4306. LEAL -256(BP), BP
  4307. MOVW $0x0019, (AX)
  4308. MOVW BP, 2(AX)
  4309. ADDQ $0x04, AX
  4310. JMP repeat_end_emit_encodeBlockAsm8B
  4311. repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy:
  4312. LEAL -4(BP), BP
  4313. MOVW $0x0015, (AX)
  4314. MOVB BP, 2(AX)
  4315. ADDQ $0x03, AX
  4316. JMP repeat_end_emit_encodeBlockAsm8B
  4317. repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy:
  4318. SHLL $0x02, BP
  4319. ORL $0x01, BP
  4320. MOVW BP, (AX)
  4321. ADDQ $0x02, AX
  4322. JMP repeat_end_emit_encodeBlockAsm8B
  4323. repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy:
  4324. XORQ DI, DI
  4325. LEAL 1(DI)(BP*4), BP
  4326. MOVB SI, 1(AX)
  4327. SARL $0x08, SI
  4328. SHLL $0x05, SI
  4329. ORL SI, BP
  4330. MOVB BP, (AX)
  4331. ADDQ $0x02, AX
  4332. JMP repeat_end_emit_encodeBlockAsm8B
  4333. JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm8B
  4334. four_bytes_remain_repeat_as_copy_encodeBlockAsm8B:
  4335. TESTL BP, BP
  4336. JZ repeat_end_emit_encodeBlockAsm8B
  4337. MOVB $0x03, BL
  4338. LEAL -4(BX)(BP*4), BP
  4339. MOVB BP, (AX)
  4340. MOVL SI, 1(AX)
  4341. ADDQ $0x05, AX
  4342. JMP repeat_end_emit_encodeBlockAsm8B
  4343. two_byte_offset_repeat_as_copy_encodeBlockAsm8B:
  4344. CMPL BP, $0x40
  4345. JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B
  4346. MOVB $0xee, (AX)
  4347. MOVW SI, 1(AX)
  4348. LEAL -60(BP), BP
  4349. ADDQ $0x03, AX
  4350. emit_repeat_again_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
  4351. MOVL BP, DI
  4352. LEAL -4(BP), BP
  4353. CMPL DI, $0x08
  4354. JLE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
  4355. CMPL DI, $0x0c
  4356. JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
  4357. CMPL SI, $0x00000800
  4358. JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
  4359. cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
  4360. CMPL BP, $0x00000104
  4361. JLT repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
  4362. CMPL BP, $0x00010100
  4363. JLT repeat_four_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
  4364. CMPL BP, $0x0100ffff
  4365. JLT repeat_five_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
  4366. LEAL -16842747(BP), BP
  4367. MOVW $0x001d, (AX)
  4368. MOVW $0xfffb, 2(AX)
  4369. MOVB $0xff, 4(AX)
  4370. ADDQ $0x05, AX
  4371. JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
  4372. repeat_five_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
  4373. LEAL -65536(BP), BP
  4374. MOVL BP, SI
  4375. MOVW $0x001d, (AX)
  4376. MOVW BP, 2(AX)
  4377. SARL $0x10, SI
  4378. MOVB SI, 4(AX)
  4379. ADDQ $0x05, AX
  4380. JMP repeat_end_emit_encodeBlockAsm8B
  4381. repeat_four_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
  4382. LEAL -256(BP), BP
  4383. MOVW $0x0019, (AX)
  4384. MOVW BP, 2(AX)
  4385. ADDQ $0x04, AX
  4386. JMP repeat_end_emit_encodeBlockAsm8B
  4387. repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
  4388. LEAL -4(BP), BP
  4389. MOVW $0x0015, (AX)
  4390. MOVB BP, 2(AX)
  4391. ADDQ $0x03, AX
  4392. JMP repeat_end_emit_encodeBlockAsm8B
  4393. repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
  4394. SHLL $0x02, BP
  4395. ORL $0x01, BP
  4396. MOVW BP, (AX)
  4397. ADDQ $0x02, AX
  4398. JMP repeat_end_emit_encodeBlockAsm8B
  4399. repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
  4400. XORQ DI, DI
  4401. LEAL 1(DI)(BP*4), BP
  4402. MOVB SI, 1(AX)
  4403. SARL $0x08, SI
  4404. SHLL $0x05, SI
  4405. ORL SI, BP
  4406. MOVB BP, (AX)
  4407. ADDQ $0x02, AX
  4408. JMP repeat_end_emit_encodeBlockAsm8B
  4409. JMP two_byte_offset_repeat_as_copy_encodeBlockAsm8B
  4410. two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B:
  4411. CMPL BP, $0x0c
  4412. JGE emit_copy_three_repeat_as_copy_encodeBlockAsm8B
  4413. CMPL SI, $0x00000800
  4414. JGE emit_copy_three_repeat_as_copy_encodeBlockAsm8B
  4415. MOVB $0x01, BL
  4416. LEAL -16(BX)(BP*4), BP
  4417. MOVB SI, 1(AX)
  4418. SHRL $0x08, SI
  4419. SHLL $0x05, SI
  4420. ORL SI, BP
  4421. MOVB BP, (AX)
  4422. ADDQ $0x02, AX
  4423. JMP repeat_end_emit_encodeBlockAsm8B
  4424. emit_copy_three_repeat_as_copy_encodeBlockAsm8B:
  4425. MOVB $0x02, BL
  4426. LEAL -4(BX)(BP*4), BP
  4427. MOVB BP, (AX)
  4428. MOVW SI, 1(AX)
  4429. ADDQ $0x03, AX
  4430. repeat_end_emit_encodeBlockAsm8B:
  4431. MOVL CX, 12(SP)
  4432. CMPL CX, 8(SP)
  4433. JGE emit_remainder_encodeBlockAsm8B
  4434. JMP search_loop_encodeBlockAsm8B
  4435. no_repeat_found_encodeBlockAsm8B:
  4436. CMPL (DX)(BP*1), SI
  4437. JEQ candidate_match_encodeBlockAsm8B
  4438. SHRQ $0x08, SI
  4439. MOVL 24(SP)(R9*4), BP
  4440. LEAL 2(CX), R8
  4441. CMPL (DX)(DI*1), SI
  4442. JEQ candidate2_match_encodeBlockAsm8B
  4443. MOVL R8, 24(SP)(R9*4)
  4444. SHRQ $0x08, SI
  4445. CMPL (DX)(BP*1), SI
  4446. JEQ candidate3_match_encodeBlockAsm8B
  4447. MOVL 20(SP), CX
  4448. JMP search_loop_encodeBlockAsm8B
  4449. candidate3_match_encodeBlockAsm8B:
  4450. ADDL $0x02, CX
  4451. JMP candidate_match_encodeBlockAsm8B
  4452. candidate2_match_encodeBlockAsm8B:
  4453. MOVL R8, 24(SP)(R9*4)
  4454. INCL CX
  4455. MOVL DI, BP
  4456. candidate_match_encodeBlockAsm8B:
  4457. MOVL 12(SP), SI
  4458. TESTL BP, BP
  4459. JZ match_extend_back_end_encodeBlockAsm8B
  4460. match_extend_back_loop_encodeBlockAsm8B:
  4461. CMPL CX, SI
  4462. JLE match_extend_back_end_encodeBlockAsm8B
  4463. MOVB -1(DX)(BP*1), BL
  4464. MOVB -1(DX)(CX*1), DI
  4465. CMPB BL, DI
  4466. JNE match_extend_back_end_encodeBlockAsm8B
  4467. LEAL -1(CX), CX
  4468. DECL BP
  4469. JZ match_extend_back_end_encodeBlockAsm8B
  4470. JMP match_extend_back_loop_encodeBlockAsm8B
  4471. match_extend_back_end_encodeBlockAsm8B:
  4472. MOVL CX, SI
  4473. SUBL 12(SP), SI
  4474. LEAQ 4(AX)(SI*1), SI
  4475. CMPQ SI, (SP)
  4476. JL match_dst_size_check_encodeBlockAsm8B
  4477. MOVQ $0x00000000, ret+48(FP)
  4478. RET
  4479. match_dst_size_check_encodeBlockAsm8B:
  4480. MOVL CX, SI
  4481. MOVL 12(SP), DI
  4482. CMPL DI, SI
  4483. JEQ emit_literal_done_match_emit_encodeBlockAsm8B
  4484. MOVL SI, R8
  4485. MOVL SI, 12(SP)
  4486. LEAQ (DX)(DI*1), SI
  4487. SUBL DI, R8
  4488. MOVL R8, DI
  4489. SUBL $0x01, DI
  4490. JC emit_literal_done_match_emit_encodeBlockAsm8B
  4491. CMPL DI, $0x3c
  4492. JLT one_byte_match_emit_encodeBlockAsm8B
  4493. CMPL DI, $0x00000100
  4494. JLT two_bytes_match_emit_encodeBlockAsm8B
  4495. CMPL DI, $0x00010000
  4496. JLT three_bytes_match_emit_encodeBlockAsm8B
  4497. CMPL DI, $0x01000000
  4498. JLT four_bytes_match_emit_encodeBlockAsm8B
  4499. MOVB $0xfc, (AX)
  4500. MOVL DI, 1(AX)
  4501. ADDQ $0x05, AX
  4502. JMP memmove_match_emit_encodeBlockAsm8B
  4503. four_bytes_match_emit_encodeBlockAsm8B:
  4504. MOVL DI, R9
  4505. SHRL $0x10, R9
  4506. MOVB $0xf8, (AX)
  4507. MOVW DI, 1(AX)
  4508. MOVB R9, 3(AX)
  4509. ADDQ $0x04, AX
  4510. JMP memmove_match_emit_encodeBlockAsm8B
  4511. three_bytes_match_emit_encodeBlockAsm8B:
  4512. MOVB $0xf4, (AX)
  4513. MOVW DI, 1(AX)
  4514. ADDQ $0x03, AX
  4515. JMP memmove_match_emit_encodeBlockAsm8B
  4516. two_bytes_match_emit_encodeBlockAsm8B:
  4517. MOVB $0xf0, (AX)
  4518. MOVB DI, 1(AX)
  4519. ADDQ $0x02, AX
  4520. JMP memmove_match_emit_encodeBlockAsm8B
  4521. one_byte_match_emit_encodeBlockAsm8B:
  4522. SHLB $0x02, DI
  4523. MOVB DI, (AX)
  4524. ADDQ $0x01, AX
  4525. memmove_match_emit_encodeBlockAsm8B:
  4526. LEAQ (AX)(R8*1), DI
  4527. NOP
  4528. emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_tail:
  4529. TESTQ R8, R8
  4530. JEQ memmove_end_copy_match_emit_encodeBlockAsm8B
  4531. CMPQ R8, $0x02
  4532. JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_1or2
  4533. CMPQ R8, $0x04
  4534. JB emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_3
  4535. JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_4
  4536. CMPQ R8, $0x08
  4537. JB emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_5through7
  4538. JE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8
  4539. CMPQ R8, $0x10
  4540. JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_9through16
  4541. CMPQ R8, $0x20
  4542. JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32
  4543. CMPQ R8, $0x40
  4544. JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64
  4545. CMPQ R8, $0x80
  4546. JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_65through128
  4547. CMPQ R8, $0x00000100
  4548. JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_129through256
  4549. JMP emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_256through2048
  4550. emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_1or2:
  4551. MOVB (SI), R9
  4552. MOVB -1(SI)(R8*1), SI
  4553. MOVB R9, (AX)
  4554. MOVB SI, -1(AX)(R8*1)
  4555. JMP memmove_end_copy_match_emit_encodeBlockAsm8B
  4556. emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_4:
  4557. MOVL (SI), R9
  4558. MOVL R9, (AX)
  4559. JMP memmove_end_copy_match_emit_encodeBlockAsm8B
  4560. emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_3:
  4561. MOVW (SI), R9
  4562. MOVB 2(SI), SI
  4563. MOVW R9, (AX)
  4564. MOVB SI, 2(AX)
  4565. JMP memmove_end_copy_match_emit_encodeBlockAsm8B
  4566. emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_5through7:
  4567. MOVL (SI), R9
  4568. MOVL -4(SI)(R8*1), SI
  4569. MOVL R9, (AX)
  4570. MOVL SI, -4(AX)(R8*1)
  4571. JMP memmove_end_copy_match_emit_encodeBlockAsm8B
  4572. emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8:
  4573. MOVQ (SI), R9
  4574. MOVQ R9, (AX)
  4575. JMP memmove_end_copy_match_emit_encodeBlockAsm8B
  4576. emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_9through16:
  4577. MOVQ (SI), R9
  4578. MOVQ -8(SI)(R8*1), SI
  4579. MOVQ R9, (AX)
  4580. MOVQ SI, -8(AX)(R8*1)
  4581. JMP memmove_end_copy_match_emit_encodeBlockAsm8B
  4582. emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32:
  4583. MOVOU (SI), X0
  4584. MOVOU -16(SI)(R8*1), X1
  4585. MOVOU X0, (AX)
  4586. MOVOU X1, -16(AX)(R8*1)
  4587. JMP memmove_end_copy_match_emit_encodeBlockAsm8B
  4588. emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64:
  4589. MOVOU (SI), X0
  4590. MOVOU 16(SI), X1
  4591. MOVOU -32(SI)(R8*1), X2
  4592. MOVOU -16(SI)(R8*1), X3
  4593. MOVOU X0, (AX)
  4594. MOVOU X1, 16(AX)
  4595. MOVOU X2, -32(AX)(R8*1)
  4596. MOVOU X3, -16(AX)(R8*1)
  4597. JMP memmove_end_copy_match_emit_encodeBlockAsm8B
  4598. emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_65through128:
  4599. MOVOU (SI), X0
  4600. MOVOU 16(SI), X1
  4601. MOVOU 32(SI), X2
  4602. MOVOU 48(SI), X3
  4603. MOVOU -64(SI)(R8*1), X12
  4604. MOVOU -48(SI)(R8*1), X13
  4605. MOVOU -32(SI)(R8*1), X14
  4606. MOVOU -16(SI)(R8*1), X15
  4607. MOVOU X0, (AX)
  4608. MOVOU X1, 16(AX)
  4609. MOVOU X2, 32(AX)
  4610. MOVOU X3, 48(AX)
  4611. MOVOU X12, -64(AX)(R8*1)
  4612. MOVOU X13, -48(AX)(R8*1)
  4613. MOVOU X14, -32(AX)(R8*1)
  4614. MOVOU X15, -16(AX)(R8*1)
  4615. JMP memmove_end_copy_match_emit_encodeBlockAsm8B
  4616. emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_129through256:
  4617. MOVOU (SI), X0
  4618. MOVOU 16(SI), X1
  4619. MOVOU 32(SI), X2
  4620. MOVOU 48(SI), X3
  4621. MOVOU 64(SI), X4
  4622. MOVOU 80(SI), X5
  4623. MOVOU 96(SI), X6
  4624. MOVOU 112(SI), X7
  4625. MOVOU -128(SI)(R8*1), X8
  4626. MOVOU -112(SI)(R8*1), X9
  4627. MOVOU -96(SI)(R8*1), X10
  4628. MOVOU -80(SI)(R8*1), X11
  4629. MOVOU -64(SI)(R8*1), X12
  4630. MOVOU -48(SI)(R8*1), X13
  4631. MOVOU -32(SI)(R8*1), X14
  4632. MOVOU -16(SI)(R8*1), X15
  4633. MOVOU X0, (AX)
  4634. MOVOU X1, 16(AX)
  4635. MOVOU X2, 32(AX)
  4636. MOVOU X3, 48(AX)
  4637. MOVOU X4, 64(AX)
  4638. MOVOU X5, 80(AX)
  4639. MOVOU X6, 96(AX)
  4640. MOVOU X7, 112(AX)
  4641. MOVOU X8, -128(AX)(R8*1)
  4642. MOVOU X9, -112(AX)(R8*1)
  4643. MOVOU X10, -96(AX)(R8*1)
  4644. MOVOU X11, -80(AX)(R8*1)
  4645. MOVOU X12, -64(AX)(R8*1)
  4646. MOVOU X13, -48(AX)(R8*1)
  4647. MOVOU X14, -32(AX)(R8*1)
  4648. MOVOU X15, -16(AX)(R8*1)
  4649. JMP memmove_end_copy_match_emit_encodeBlockAsm8B
  4650. emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_256through2048:
  4651. LEAQ -256(R8), R8
  4652. MOVOU (SI), X0
  4653. MOVOU 16(SI), X1
  4654. MOVOU 32(SI), X2
  4655. MOVOU 48(SI), X3
  4656. MOVOU 64(SI), X4
  4657. MOVOU 80(SI), X5
  4658. MOVOU 96(SI), X6
  4659. MOVOU 112(SI), X7
  4660. MOVOU 128(SI), X8
  4661. MOVOU 144(SI), X9
  4662. MOVOU 160(SI), X10
  4663. MOVOU 176(SI), X11
  4664. MOVOU 192(SI), X12
  4665. MOVOU 208(SI), X13
  4666. MOVOU 224(SI), X14
  4667. MOVOU 240(SI), X15
  4668. MOVOU X0, (AX)
  4669. MOVOU X1, 16(AX)
  4670. MOVOU X2, 32(AX)
  4671. MOVOU X3, 48(AX)
  4672. MOVOU X4, 64(AX)
  4673. MOVOU X5, 80(AX)
  4674. MOVOU X6, 96(AX)
  4675. MOVOU X7, 112(AX)
  4676. MOVOU X8, 128(AX)
  4677. MOVOU X9, 144(AX)
  4678. MOVOU X10, 160(AX)
  4679. MOVOU X11, 176(AX)
  4680. MOVOU X12, 192(AX)
  4681. MOVOU X13, 208(AX)
  4682. MOVOU X14, 224(AX)
  4683. MOVOU X15, 240(AX)
  4684. CMPQ R8, $0x00000100
  4685. LEAQ 256(SI), SI
  4686. LEAQ 256(AX), AX
  4687. JGE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_256through2048
  4688. JMP emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_tail
  4689. memmove_end_copy_match_emit_encodeBlockAsm8B:
  4690. MOVQ DI, AX
  4691. emit_literal_done_match_emit_encodeBlockAsm8B:
  4692. match_nolit_loop_encodeBlockAsm8B:
  4693. MOVL CX, SI
  4694. SUBL BP, SI
  4695. MOVL SI, 16(SP)
  4696. ADDL $0x04, CX
  4697. ADDL $0x04, BP
  4698. MOVQ src_len+32(FP), SI
  4699. SUBL CX, SI
  4700. LEAQ (DX)(CX*1), DI
  4701. LEAQ (DX)(BP*1), BP
  4702. XORL R9, R9
  4703. CMPL SI, $0x08
  4704. JL matchlen_single_match_nolit_encodeBlockAsm8B
  4705. matchlen_loopback_match_nolit_encodeBlockAsm8B:
  4706. MOVQ (DI)(R9*1), R8
  4707. XORQ (BP)(R9*1), R8
  4708. TESTQ R8, R8
  4709. JZ matchlen_loop_match_nolit_encodeBlockAsm8B
  4710. BSFQ R8, R8
  4711. SARQ $0x03, R8
  4712. LEAL (R9)(R8*1), R9
  4713. JMP match_nolit_end_encodeBlockAsm8B
  4714. matchlen_loop_match_nolit_encodeBlockAsm8B:
  4715. LEAL -8(SI), SI
  4716. LEAL 8(R9), R9
  4717. CMPL SI, $0x08
  4718. JGE matchlen_loopback_match_nolit_encodeBlockAsm8B
  4719. matchlen_single_match_nolit_encodeBlockAsm8B:
  4720. TESTL SI, SI
  4721. JZ match_nolit_end_encodeBlockAsm8B
  4722. matchlen_single_loopback_match_nolit_encodeBlockAsm8B:
  4723. MOVB (DI)(R9*1), R8
  4724. CMPB (BP)(R9*1), R8
  4725. JNE match_nolit_end_encodeBlockAsm8B
  4726. LEAL 1(R9), R9
  4727. DECL SI
  4728. JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm8B
  4729. match_nolit_end_encodeBlockAsm8B:
  4730. ADDL R9, CX
  4731. MOVL 16(SP), BP
  4732. ADDL $0x04, R9
  4733. CMPL BP, $0x00010000
  4734. JL two_byte_offset_match_nolit_encodeBlockAsm8B
  4735. four_bytes_loop_back_match_nolit_encodeBlockAsm8B:
  4736. CMPL R9, $0x40
  4737. JLE four_bytes_remain_match_nolit_encodeBlockAsm8B
  4738. MOVB $0xff, (AX)
  4739. MOVL BP, 1(AX)
  4740. LEAL -64(R9), R9
  4741. ADDQ $0x05, AX
  4742. CMPL R9, $0x04
  4743. JL four_bytes_remain_match_nolit_encodeBlockAsm8B
  4744. emit_repeat_again_match_nolit_encodeBlockAsm8B_emit_copy:
  4745. MOVL R9, SI
  4746. LEAL -4(R9), R9
  4747. CMPL SI, $0x08
  4748. JLE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy
  4749. CMPL SI, $0x0c
  4750. JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy
  4751. CMPL BP, $0x00000800
  4752. JLT repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy
  4753. cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy:
  4754. CMPL R9, $0x00000104
  4755. JLT repeat_three_match_nolit_encodeBlockAsm8B_emit_copy
  4756. CMPL R9, $0x00010100
  4757. JLT repeat_four_match_nolit_encodeBlockAsm8B_emit_copy
  4758. CMPL R9, $0x0100ffff
  4759. JLT repeat_five_match_nolit_encodeBlockAsm8B_emit_copy
  4760. LEAL -16842747(R9), R9
  4761. MOVW $0x001d, (AX)
  4762. MOVW $0xfffb, 2(AX)
  4763. MOVB $0xff, 4(AX)
  4764. ADDQ $0x05, AX
  4765. JMP emit_repeat_again_match_nolit_encodeBlockAsm8B_emit_copy
  4766. repeat_five_match_nolit_encodeBlockAsm8B_emit_copy:
  4767. LEAL -65536(R9), R9
  4768. MOVL R9, BP
  4769. MOVW $0x001d, (AX)
  4770. MOVW R9, 2(AX)
  4771. SARL $0x10, BP
  4772. MOVB BP, 4(AX)
  4773. ADDQ $0x05, AX
  4774. JMP match_nolit_emitcopy_end_encodeBlockAsm8B
  4775. repeat_four_match_nolit_encodeBlockAsm8B_emit_copy:
  4776. LEAL -256(R9), R9
  4777. MOVW $0x0019, (AX)
  4778. MOVW R9, 2(AX)
  4779. ADDQ $0x04, AX
  4780. JMP match_nolit_emitcopy_end_encodeBlockAsm8B
  4781. repeat_three_match_nolit_encodeBlockAsm8B_emit_copy:
  4782. LEAL -4(R9), R9
  4783. MOVW $0x0015, (AX)
  4784. MOVB R9, 2(AX)
  4785. ADDQ $0x03, AX
  4786. JMP match_nolit_emitcopy_end_encodeBlockAsm8B
  4787. repeat_two_match_nolit_encodeBlockAsm8B_emit_copy:
  4788. SHLL $0x02, R9
  4789. ORL $0x01, R9
  4790. MOVW R9, (AX)
  4791. ADDQ $0x02, AX
  4792. JMP match_nolit_emitcopy_end_encodeBlockAsm8B
  4793. repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy:
  4794. XORQ SI, SI
  4795. LEAL 1(SI)(R9*4), R9
  4796. MOVB BP, 1(AX)
  4797. SARL $0x08, BP
  4798. SHLL $0x05, BP
  4799. ORL BP, R9
  4800. MOVB R9, (AX)
  4801. ADDQ $0x02, AX
  4802. JMP match_nolit_emitcopy_end_encodeBlockAsm8B
  4803. JMP four_bytes_loop_back_match_nolit_encodeBlockAsm8B
  4804. four_bytes_remain_match_nolit_encodeBlockAsm8B:
  4805. TESTL R9, R9
  4806. JZ match_nolit_emitcopy_end_encodeBlockAsm8B
  4807. MOVB $0x03, BL
  4808. LEAL -4(BX)(R9*4), R9
  4809. MOVB R9, (AX)
  4810. MOVL BP, 1(AX)
  4811. ADDQ $0x05, AX
  4812. JMP match_nolit_emitcopy_end_encodeBlockAsm8B
  4813. two_byte_offset_match_nolit_encodeBlockAsm8B:
  4814. CMPL R9, $0x40
  4815. JLE two_byte_offset_short_match_nolit_encodeBlockAsm8B
  4816. MOVB $0xee, (AX)
  4817. MOVW BP, 1(AX)
  4818. LEAL -60(R9), R9
  4819. ADDQ $0x03, AX
  4820. emit_repeat_again_match_nolit_encodeBlockAsm8B_emit_copy_short:
  4821. MOVL R9, SI
  4822. LEAL -4(R9), R9
  4823. CMPL SI, $0x08
  4824. JLE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short
  4825. CMPL SI, $0x0c
  4826. JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short
  4827. CMPL BP, $0x00000800
  4828. JLT repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short
  4829. cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short:
  4830. CMPL R9, $0x00000104
  4831. JLT repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short
  4832. CMPL R9, $0x00010100
  4833. JLT repeat_four_match_nolit_encodeBlockAsm8B_emit_copy_short
  4834. CMPL R9, $0x0100ffff
  4835. JLT repeat_five_match_nolit_encodeBlockAsm8B_emit_copy_short
  4836. LEAL -16842747(R9), R9
  4837. MOVW $0x001d, (AX)
  4838. MOVW $0xfffb, 2(AX)
  4839. MOVB $0xff, 4(AX)
  4840. ADDQ $0x05, AX
  4841. JMP emit_repeat_again_match_nolit_encodeBlockAsm8B_emit_copy_short
  4842. repeat_five_match_nolit_encodeBlockAsm8B_emit_copy_short:
  4843. LEAL -65536(R9), R9
  4844. MOVL R9, BP
  4845. MOVW $0x001d, (AX)
  4846. MOVW R9, 2(AX)
  4847. SARL $0x10, BP
  4848. MOVB BP, 4(AX)
  4849. ADDQ $0x05, AX
  4850. JMP match_nolit_emitcopy_end_encodeBlockAsm8B
  4851. repeat_four_match_nolit_encodeBlockAsm8B_emit_copy_short:
  4852. LEAL -256(R9), R9
  4853. MOVW $0x0019, (AX)
  4854. MOVW R9, 2(AX)
  4855. ADDQ $0x04, AX
  4856. JMP match_nolit_emitcopy_end_encodeBlockAsm8B
  4857. repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short:
  4858. LEAL -4(R9), R9
  4859. MOVW $0x0015, (AX)
  4860. MOVB R9, 2(AX)
  4861. ADDQ $0x03, AX
  4862. JMP match_nolit_emitcopy_end_encodeBlockAsm8B
  4863. repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short:
  4864. SHLL $0x02, R9
  4865. ORL $0x01, R9
  4866. MOVW R9, (AX)
  4867. ADDQ $0x02, AX
  4868. JMP match_nolit_emitcopy_end_encodeBlockAsm8B
  4869. repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short:
  4870. XORQ SI, SI
  4871. LEAL 1(SI)(R9*4), R9
  4872. MOVB BP, 1(AX)
  4873. SARL $0x08, BP
  4874. SHLL $0x05, BP
  4875. ORL BP, R9
  4876. MOVB R9, (AX)
  4877. ADDQ $0x02, AX
  4878. JMP match_nolit_emitcopy_end_encodeBlockAsm8B
  4879. JMP two_byte_offset_match_nolit_encodeBlockAsm8B
  4880. two_byte_offset_short_match_nolit_encodeBlockAsm8B:
  4881. CMPL R9, $0x0c
  4882. JGE emit_copy_three_match_nolit_encodeBlockAsm8B
  4883. CMPL BP, $0x00000800
  4884. JGE emit_copy_three_match_nolit_encodeBlockAsm8B
  4885. MOVB $0x01, BL
  4886. LEAL -16(BX)(R9*4), R9
  4887. MOVB BP, 1(AX)
  4888. SHRL $0x08, BP
  4889. SHLL $0x05, BP
  4890. ORL BP, R9
  4891. MOVB R9, (AX)
  4892. ADDQ $0x02, AX
  4893. JMP match_nolit_emitcopy_end_encodeBlockAsm8B
  4894. emit_copy_three_match_nolit_encodeBlockAsm8B:
  4895. MOVB $0x02, BL
  4896. LEAL -4(BX)(R9*4), R9
  4897. MOVB R9, (AX)
  4898. MOVW BP, 1(AX)
  4899. ADDQ $0x03, AX
  4900. match_nolit_emitcopy_end_encodeBlockAsm8B:
  4901. MOVL CX, 12(SP)
  4902. CMPL CX, 8(SP)
  4903. JGE emit_remainder_encodeBlockAsm8B
  4904. CMPQ AX, (SP)
  4905. JL match_nolit_dst_ok_encodeBlockAsm8B
  4906. MOVQ $0x00000000, ret+48(FP)
  4907. RET
  4908. match_nolit_dst_ok_encodeBlockAsm8B:
  4909. MOVQ -2(DX)(CX*1), SI
  4910. MOVQ $0x9e3779b1, BP
  4911. MOVQ SI, DI
  4912. SHRQ $0x10, SI
  4913. MOVQ SI, R8
  4914. SHLQ $0x20, DI
  4915. IMULQ BP, DI
  4916. SHRQ $0x38, DI
  4917. SHLQ $0x20, R8
  4918. IMULQ BP, R8
  4919. SHRQ $0x38, R8
  4920. LEAL -2(CX), R9
  4921. MOVL 24(SP)(R8*4), BP
  4922. MOVL R9, 24(SP)(DI*4)
  4923. MOVL CX, 24(SP)(R8*4)
  4924. CMPL (DX)(BP*1), SI
  4925. JEQ match_nolit_loop_encodeBlockAsm8B
  4926. INCL CX
  4927. JMP search_loop_encodeBlockAsm8B
  4928. emit_remainder_encodeBlockAsm8B:
  4929. MOVQ src_len+32(FP), CX
  4930. SUBL 12(SP), CX
  4931. LEAQ 4(AX)(CX*1), CX
  4932. CMPQ CX, (SP)
  4933. JL emit_remainder_ok_encodeBlockAsm8B
  4934. MOVQ $0x00000000, ret+48(FP)
  4935. RET
  4936. emit_remainder_ok_encodeBlockAsm8B:
  4937. MOVQ src_len+32(FP), CX
  4938. MOVL 12(SP), BX
  4939. CMPL BX, CX
  4940. JEQ emit_literal_done_emit_remainder_encodeBlockAsm8B
  4941. MOVL CX, BP
  4942. MOVL CX, 12(SP)
  4943. LEAQ (DX)(BX*1), CX
  4944. SUBL BX, BP
  4945. MOVL BP, DX
  4946. SUBL $0x01, DX
  4947. JC emit_literal_done_emit_remainder_encodeBlockAsm8B
  4948. CMPL DX, $0x3c
  4949. JLT one_byte_emit_remainder_encodeBlockAsm8B
  4950. CMPL DX, $0x00000100
  4951. JLT two_bytes_emit_remainder_encodeBlockAsm8B
  4952. CMPL DX, $0x00010000
  4953. JLT three_bytes_emit_remainder_encodeBlockAsm8B
  4954. CMPL DX, $0x01000000
  4955. JLT four_bytes_emit_remainder_encodeBlockAsm8B
  4956. MOVB $0xfc, (AX)
  4957. MOVL DX, 1(AX)
  4958. ADDQ $0x05, AX
  4959. JMP memmove_emit_remainder_encodeBlockAsm8B
  4960. four_bytes_emit_remainder_encodeBlockAsm8B:
  4961. MOVL DX, BX
  4962. SHRL $0x10, BX
  4963. MOVB $0xf8, (AX)
  4964. MOVW DX, 1(AX)
  4965. MOVB BL, 3(AX)
  4966. ADDQ $0x04, AX
  4967. JMP memmove_emit_remainder_encodeBlockAsm8B
  4968. three_bytes_emit_remainder_encodeBlockAsm8B:
  4969. MOVB $0xf4, (AX)
  4970. MOVW DX, 1(AX)
  4971. ADDQ $0x03, AX
  4972. JMP memmove_emit_remainder_encodeBlockAsm8B
  4973. two_bytes_emit_remainder_encodeBlockAsm8B:
  4974. MOVB $0xf0, (AX)
  4975. MOVB DL, 1(AX)
  4976. ADDQ $0x02, AX
  4977. JMP memmove_emit_remainder_encodeBlockAsm8B
  4978. one_byte_emit_remainder_encodeBlockAsm8B:
  4979. SHLB $0x02, DL
  4980. MOVB DL, (AX)
  4981. ADDQ $0x01, AX
  4982. memmove_emit_remainder_encodeBlockAsm8B:
  4983. LEAQ (AX)(BP*1), DX
  4984. MOVL BP, BX
  4985. NOP
  4986. emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_tail:
  4987. TESTQ BX, BX
  4988. JEQ memmove_end_copy_emit_remainder_encodeBlockAsm8B
  4989. CMPQ BX, $0x02
  4990. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_1or2
  4991. CMPQ BX, $0x04
  4992. JB emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_3
  4993. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_4
  4994. CMPQ BX, $0x08
  4995. JB emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_5through7
  4996. JE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8
  4997. CMPQ BX, $0x10
  4998. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_9through16
  4999. CMPQ BX, $0x20
  5000. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32
  5001. CMPQ BX, $0x40
  5002. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64
  5003. CMPQ BX, $0x80
  5004. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_65through128
  5005. CMPQ BX, $0x00000100
  5006. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_129through256
  5007. JMP emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_256through2048
  5008. emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_1or2:
  5009. MOVB (CX), BP
  5010. MOVB -1(CX)(BX*1), CL
  5011. MOVB BP, (AX)
  5012. MOVB CL, -1(AX)(BX*1)
  5013. JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B
  5014. emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_4:
  5015. MOVL (CX), BP
  5016. MOVL BP, (AX)
  5017. JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B
  5018. emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_3:
  5019. MOVW (CX), BP
  5020. MOVB 2(CX), CL
  5021. MOVW BP, (AX)
  5022. MOVB CL, 2(AX)
  5023. JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B
  5024. emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_5through7:
  5025. MOVL (CX), BP
  5026. MOVL -4(CX)(BX*1), CX
  5027. MOVL BP, (AX)
  5028. MOVL CX, -4(AX)(BX*1)
  5029. JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B
  5030. emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8:
  5031. MOVQ (CX), BP
  5032. MOVQ BP, (AX)
  5033. JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B
  5034. emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_9through16:
  5035. MOVQ (CX), BP
  5036. MOVQ -8(CX)(BX*1), CX
  5037. MOVQ BP, (AX)
  5038. MOVQ CX, -8(AX)(BX*1)
  5039. JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B
  5040. emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32:
  5041. MOVOU (CX), X0
  5042. MOVOU -16(CX)(BX*1), X1
  5043. MOVOU X0, (AX)
  5044. MOVOU X1, -16(AX)(BX*1)
  5045. JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B
  5046. emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64:
  5047. MOVOU (CX), X0
  5048. MOVOU 16(CX), X1
  5049. MOVOU -32(CX)(BX*1), X2
  5050. MOVOU -16(CX)(BX*1), X3
  5051. MOVOU X0, (AX)
  5052. MOVOU X1, 16(AX)
  5053. MOVOU X2, -32(AX)(BX*1)
  5054. MOVOU X3, -16(AX)(BX*1)
  5055. JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B
  5056. emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_65through128:
  5057. MOVOU (CX), X0
  5058. MOVOU 16(CX), X1
  5059. MOVOU 32(CX), X2
  5060. MOVOU 48(CX), X3
  5061. MOVOU -64(CX)(BX*1), X12
  5062. MOVOU -48(CX)(BX*1), X13
  5063. MOVOU -32(CX)(BX*1), X14
  5064. MOVOU -16(CX)(BX*1), X15
  5065. MOVOU X0, (AX)
  5066. MOVOU X1, 16(AX)
  5067. MOVOU X2, 32(AX)
  5068. MOVOU X3, 48(AX)
  5069. MOVOU X12, -64(AX)(BX*1)
  5070. MOVOU X13, -48(AX)(BX*1)
  5071. MOVOU X14, -32(AX)(BX*1)
  5072. MOVOU X15, -16(AX)(BX*1)
  5073. JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B
  5074. emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_129through256:
  5075. MOVOU (CX), X0
  5076. MOVOU 16(CX), X1
  5077. MOVOU 32(CX), X2
  5078. MOVOU 48(CX), X3
  5079. MOVOU 64(CX), X4
  5080. MOVOU 80(CX), X5
  5081. MOVOU 96(CX), X6
  5082. MOVOU 112(CX), X7
  5083. MOVOU -128(CX)(BX*1), X8
  5084. MOVOU -112(CX)(BX*1), X9
  5085. MOVOU -96(CX)(BX*1), X10
  5086. MOVOU -80(CX)(BX*1), X11
  5087. MOVOU -64(CX)(BX*1), X12
  5088. MOVOU -48(CX)(BX*1), X13
  5089. MOVOU -32(CX)(BX*1), X14
  5090. MOVOU -16(CX)(BX*1), X15
  5091. MOVOU X0, (AX)
  5092. MOVOU X1, 16(AX)
  5093. MOVOU X2, 32(AX)
  5094. MOVOU X3, 48(AX)
  5095. MOVOU X4, 64(AX)
  5096. MOVOU X5, 80(AX)
  5097. MOVOU X6, 96(AX)
  5098. MOVOU X7, 112(AX)
  5099. MOVOU X8, -128(AX)(BX*1)
  5100. MOVOU X9, -112(AX)(BX*1)
  5101. MOVOU X10, -96(AX)(BX*1)
  5102. MOVOU X11, -80(AX)(BX*1)
  5103. MOVOU X12, -64(AX)(BX*1)
  5104. MOVOU X13, -48(AX)(BX*1)
  5105. MOVOU X14, -32(AX)(BX*1)
  5106. MOVOU X15, -16(AX)(BX*1)
  5107. JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B
  5108. emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_256through2048:
  5109. LEAQ -256(BX), BX
  5110. MOVOU (CX), X0
  5111. MOVOU 16(CX), X1
  5112. MOVOU 32(CX), X2
  5113. MOVOU 48(CX), X3
  5114. MOVOU 64(CX), X4
  5115. MOVOU 80(CX), X5
  5116. MOVOU 96(CX), X6
  5117. MOVOU 112(CX), X7
  5118. MOVOU 128(CX), X8
  5119. MOVOU 144(CX), X9
  5120. MOVOU 160(CX), X10
  5121. MOVOU 176(CX), X11
  5122. MOVOU 192(CX), X12
  5123. MOVOU 208(CX), X13
  5124. MOVOU 224(CX), X14
  5125. MOVOU 240(CX), X15
  5126. MOVOU X0, (AX)
  5127. MOVOU X1, 16(AX)
  5128. MOVOU X2, 32(AX)
  5129. MOVOU X3, 48(AX)
  5130. MOVOU X4, 64(AX)
  5131. MOVOU X5, 80(AX)
  5132. MOVOU X6, 96(AX)
  5133. MOVOU X7, 112(AX)
  5134. MOVOU X8, 128(AX)
  5135. MOVOU X9, 144(AX)
  5136. MOVOU X10, 160(AX)
  5137. MOVOU X11, 176(AX)
  5138. MOVOU X12, 192(AX)
  5139. MOVOU X13, 208(AX)
  5140. MOVOU X14, 224(AX)
  5141. MOVOU X15, 240(AX)
  5142. CMPQ BX, $0x00000100
  5143. LEAQ 256(CX), CX
  5144. LEAQ 256(AX), AX
  5145. JGE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_256through2048
  5146. JMP emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_tail
  5147. memmove_end_copy_emit_remainder_encodeBlockAsm8B:
  5148. MOVQ DX, AX
  5149. emit_literal_done_emit_remainder_encodeBlockAsm8B:
  5150. MOVQ dst_base+0(FP), CX
  5151. SUBQ CX, AX
  5152. MOVQ AX, ret+48(FP)
  5153. RET
  5154. // func encodeBlockAsmAvx(dst []byte, src []byte) int
  5155. // Requires: AVX, SSE2
  5156. TEXT ·encodeBlockAsmAvx(SB), $65560-56
  5157. MOVQ dst_base+0(FP), AX
  5158. MOVQ $0x00000200, CX
  5159. LEAQ 24(SP), DX
  5160. PXOR X0, X0
  5161. zero_loop_encodeBlockAsmAvx:
  5162. MOVOU X0, (DX)
  5163. MOVOU X0, 16(DX)
  5164. MOVOU X0, 32(DX)
  5165. MOVOU X0, 48(DX)
  5166. MOVOU X0, 64(DX)
  5167. MOVOU X0, 80(DX)
  5168. MOVOU X0, 96(DX)
  5169. MOVOU X0, 112(DX)
  5170. ADDQ $0x80, DX
  5171. DECQ CX
  5172. JNZ zero_loop_encodeBlockAsmAvx
  5173. MOVL $0x00000000, 12(SP)
  5174. MOVQ src_len+32(FP), CX
  5175. LEAQ -5(CX), DX
  5176. LEAQ -8(CX), BP
  5177. MOVL BP, 8(SP)
  5178. SHRQ $0x05, CX
  5179. SUBL CX, DX
  5180. LEAQ (AX)(DX*1), DX
  5181. MOVQ DX, (SP)
  5182. MOVL $0x00000001, CX
  5183. MOVL CX, 16(SP)
  5184. MOVQ src_base+24(FP), DX
  5185. search_loop_encodeBlockAsmAvx:
  5186. MOVQ (DX)(CX*1), SI
  5187. MOVL CX, BP
  5188. SUBL 12(SP), BP
  5189. SHRL $0x05, BP
  5190. LEAL 4(CX)(BP*1), BP
  5191. MOVL 8(SP), DI
  5192. CMPL BP, DI
  5193. JGT emit_remainder_encodeBlockAsmAvx
  5194. MOVL BP, 20(SP)
  5195. MOVQ $0x0000cf1bbcdcbf9b, R8
  5196. MOVQ SI, R9
  5197. MOVQ SI, R10
  5198. SHRQ $0x08, R10
  5199. SHLQ $0x10, R9
  5200. IMULQ R8, R9
  5201. SHRQ $0x32, R9
  5202. SHLQ $0x10, R10
  5203. IMULQ R8, R10
  5204. SHRQ $0x32, R10
  5205. MOVL 24(SP)(R9*4), BP
  5206. MOVL 24(SP)(R10*4), DI
  5207. MOVL CX, 24(SP)(R9*4)
  5208. LEAL 1(CX), R9
  5209. MOVL R9, 24(SP)(R10*4)
  5210. MOVQ SI, R9
  5211. SHRQ $0x10, R9
  5212. SHLQ $0x10, R9
  5213. IMULQ R8, R9
  5214. SHRQ $0x32, R9
  5215. MOVL CX, R8
  5216. SUBL 16(SP), R8
  5217. MOVL 1(DX)(R8*1), R10
  5218. MOVQ SI, R8
  5219. SHRQ $0x08, R8
  5220. CMPL R8, R10
  5221. JNE no_repeat_found_encodeBlockAsmAvx
  5222. LEAL 1(CX), SI
  5223. MOVL 12(SP), DI
  5224. MOVL SI, BP
  5225. SUBL 16(SP), BP
  5226. JZ repeat_extend_back_end_encodeBlockAsmAvx
  5227. repeat_extend_back_loop_encodeBlockAsmAvx:
  5228. CMPL SI, DI
  5229. JLE repeat_extend_back_end_encodeBlockAsmAvx
  5230. MOVB -1(DX)(BP*1), BL
  5231. MOVB -1(DX)(SI*1), R8
  5232. CMPB BL, R8
  5233. JNE repeat_extend_back_end_encodeBlockAsmAvx
  5234. LEAL -1(SI), SI
  5235. DECL BP
  5236. JNZ repeat_extend_back_loop_encodeBlockAsmAvx
  5237. repeat_extend_back_end_encodeBlockAsmAvx:
  5238. MOVL 12(SP), BP
  5239. CMPL BP, SI
  5240. JEQ emit_literal_done_repeat_emit_encodeBlockAsmAvx
  5241. MOVL SI, R8
  5242. MOVL SI, 12(SP)
  5243. LEAQ (DX)(BP*1), R9
  5244. SUBL BP, R8
  5245. MOVL R8, BP
  5246. SUBL $0x01, BP
  5247. JC emit_literal_done_repeat_emit_encodeBlockAsmAvx
  5248. CMPL BP, $0x3c
  5249. JLT one_byte_repeat_emit_encodeBlockAsmAvx
  5250. CMPL BP, $0x00000100
  5251. JLT two_bytes_repeat_emit_encodeBlockAsmAvx
  5252. CMPL BP, $0x00010000
  5253. JLT three_bytes_repeat_emit_encodeBlockAsmAvx
  5254. CMPL BP, $0x01000000
  5255. JLT four_bytes_repeat_emit_encodeBlockAsmAvx
  5256. MOVB $0xfc, (AX)
  5257. MOVL BP, 1(AX)
  5258. ADDQ $0x05, AX
  5259. JMP memmove_repeat_emit_encodeBlockAsmAvx
  5260. four_bytes_repeat_emit_encodeBlockAsmAvx:
  5261. MOVL BP, R10
  5262. SHRL $0x10, R10
  5263. MOVB $0xf8, (AX)
  5264. MOVW BP, 1(AX)
  5265. MOVB R10, 3(AX)
  5266. ADDQ $0x04, AX
  5267. JMP memmove_repeat_emit_encodeBlockAsmAvx
  5268. three_bytes_repeat_emit_encodeBlockAsmAvx:
  5269. MOVB $0xf4, (AX)
  5270. MOVW BP, 1(AX)
  5271. ADDQ $0x03, AX
  5272. JMP memmove_repeat_emit_encodeBlockAsmAvx
  5273. two_bytes_repeat_emit_encodeBlockAsmAvx:
  5274. MOVB $0xf0, (AX)
  5275. MOVB BP, 1(AX)
  5276. ADDQ $0x02, AX
  5277. JMP memmove_repeat_emit_encodeBlockAsmAvx
  5278. one_byte_repeat_emit_encodeBlockAsmAvx:
  5279. SHLB $0x02, BP
  5280. MOVB BP, (AX)
  5281. ADDQ $0x01, AX
  5282. memmove_repeat_emit_encodeBlockAsmAvx:
  5283. LEAQ (AX)(R8*1), BP
  5284. NOP
  5285. emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_tail:
  5286. TESTQ R8, R8
  5287. JEQ memmove_end_copy_repeat_emit_encodeBlockAsmAvx
  5288. CMPQ R8, $0x02
  5289. JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_1or2
  5290. CMPQ R8, $0x04
  5291. JB emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_3
  5292. JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_4
  5293. CMPQ R8, $0x08
  5294. JB emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_5through7
  5295. JE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_8
  5296. CMPQ R8, $0x10
  5297. JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_9through16
  5298. CMPQ R8, $0x20
  5299. JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_17through32
  5300. CMPQ R8, $0x40
  5301. JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_33through64
  5302. CMPQ R8, $0x80
  5303. JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_65through128
  5304. CMPQ R8, $0x00000100
  5305. JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_129through256
  5306. JMP emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_avxUnaligned
  5307. emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_1or2:
  5308. MOVB (R9), R10
  5309. MOVB -1(R9)(R8*1), R11
  5310. MOVB R10, (AX)
  5311. MOVB R11, -1(AX)(R8*1)
  5312. JMP memmove_end_copy_repeat_emit_encodeBlockAsmAvx
  5313. emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_4:
  5314. MOVL (R9), R10
  5315. MOVL R10, (AX)
  5316. JMP memmove_end_copy_repeat_emit_encodeBlockAsmAvx
  5317. emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_3:
  5318. MOVW (R9), R10
  5319. MOVB 2(R9), R11
  5320. MOVW R10, (AX)
  5321. MOVB R11, 2(AX)
  5322. JMP memmove_end_copy_repeat_emit_encodeBlockAsmAvx
  5323. emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_5through7:
  5324. MOVL (R9), R10
  5325. MOVL -4(R9)(R8*1), R11
  5326. MOVL R10, (AX)
  5327. MOVL R11, -4(AX)(R8*1)
  5328. JMP memmove_end_copy_repeat_emit_encodeBlockAsmAvx
  5329. emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_8:
  5330. MOVQ (R9), R10
  5331. MOVQ R10, (AX)
  5332. JMP memmove_end_copy_repeat_emit_encodeBlockAsmAvx
  5333. emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_9through16:
  5334. MOVQ (R9), R10
  5335. MOVQ -8(R9)(R8*1), R11
  5336. MOVQ R10, (AX)
  5337. MOVQ R11, -8(AX)(R8*1)
  5338. JMP memmove_end_copy_repeat_emit_encodeBlockAsmAvx
  5339. emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_17through32:
  5340. MOVOU (R9), X0
  5341. MOVOU -16(R9)(R8*1), X1
  5342. MOVOU X0, (AX)
  5343. MOVOU X1, -16(AX)(R8*1)
  5344. JMP memmove_end_copy_repeat_emit_encodeBlockAsmAvx
  5345. emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_33through64:
  5346. MOVOU (R9), X0
  5347. MOVOU 16(R9), X1
  5348. MOVOU -32(R9)(R8*1), X2
  5349. MOVOU -16(R9)(R8*1), X3
  5350. MOVOU X0, (AX)
  5351. MOVOU X1, 16(AX)
  5352. MOVOU X2, -32(AX)(R8*1)
  5353. MOVOU X3, -16(AX)(R8*1)
  5354. JMP memmove_end_copy_repeat_emit_encodeBlockAsmAvx
  5355. emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_65through128:
  5356. MOVOU (R9), X0
  5357. MOVOU 16(R9), X1
  5358. MOVOU 32(R9), X2
  5359. MOVOU 48(R9), X3
  5360. MOVOU -64(R9)(R8*1), X12
  5361. MOVOU -48(R9)(R8*1), X13
  5362. MOVOU -32(R9)(R8*1), X14
  5363. MOVOU -16(R9)(R8*1), X15
  5364. MOVOU X0, (AX)
  5365. MOVOU X1, 16(AX)
  5366. MOVOU X2, 32(AX)
  5367. MOVOU X3, 48(AX)
  5368. MOVOU X12, -64(AX)(R8*1)
  5369. MOVOU X13, -48(AX)(R8*1)
  5370. MOVOU X14, -32(AX)(R8*1)
  5371. MOVOU X15, -16(AX)(R8*1)
  5372. JMP memmove_end_copy_repeat_emit_encodeBlockAsmAvx
  5373. emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_129through256:
  5374. MOVOU (R9), X0
  5375. MOVOU 16(R9), X1
  5376. MOVOU 32(R9), X2
  5377. MOVOU 48(R9), X3
  5378. MOVOU 64(R9), X4
  5379. MOVOU 80(R9), X5
  5380. MOVOU 96(R9), X6
  5381. MOVOU 112(R9), X7
  5382. MOVOU -128(R9)(R8*1), X8
  5383. MOVOU -112(R9)(R8*1), X9
  5384. MOVOU -96(R9)(R8*1), X10
  5385. MOVOU -80(R9)(R8*1), X11
  5386. MOVOU -64(R9)(R8*1), X12
  5387. MOVOU -48(R9)(R8*1), X13
  5388. MOVOU -32(R9)(R8*1), X14
  5389. MOVOU -16(R9)(R8*1), X15
  5390. MOVOU X0, (AX)
  5391. MOVOU X1, 16(AX)
  5392. MOVOU X2, 32(AX)
  5393. MOVOU X3, 48(AX)
  5394. MOVOU X4, 64(AX)
  5395. MOVOU X5, 80(AX)
  5396. MOVOU X6, 96(AX)
  5397. MOVOU X7, 112(AX)
  5398. MOVOU X8, -128(AX)(R8*1)
  5399. MOVOU X9, -112(AX)(R8*1)
  5400. MOVOU X10, -96(AX)(R8*1)
  5401. MOVOU X11, -80(AX)(R8*1)
  5402. MOVOU X12, -64(AX)(R8*1)
  5403. MOVOU X13, -48(AX)(R8*1)
  5404. MOVOU X14, -32(AX)(R8*1)
  5405. MOVOU X15, -16(AX)(R8*1)
  5406. JMP memmove_end_copy_repeat_emit_encodeBlockAsmAvx
  5407. emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_256through2048:
  5408. LEAQ -256(R8), R8
  5409. MOVOU (R9), X0
  5410. MOVOU 16(R9), X1
  5411. MOVOU 32(R9), X2
  5412. MOVOU 48(R9), X3
  5413. MOVOU 64(R9), X4
  5414. MOVOU 80(R9), X5
  5415. MOVOU 96(R9), X6
  5416. MOVOU 112(R9), X7
  5417. MOVOU 128(R9), X8
  5418. MOVOU 144(R9), X9
  5419. MOVOU 160(R9), X10
  5420. MOVOU 176(R9), X11
  5421. MOVOU 192(R9), X12
  5422. MOVOU 208(R9), X13
  5423. MOVOU 224(R9), X14
  5424. MOVOU 240(R9), X15
  5425. MOVOU X0, (AX)
  5426. MOVOU X1, 16(AX)
  5427. MOVOU X2, 32(AX)
  5428. MOVOU X3, 48(AX)
  5429. MOVOU X4, 64(AX)
  5430. MOVOU X5, 80(AX)
  5431. MOVOU X6, 96(AX)
  5432. MOVOU X7, 112(AX)
  5433. MOVOU X8, 128(AX)
  5434. MOVOU X9, 144(AX)
  5435. MOVOU X10, 160(AX)
  5436. MOVOU X11, 176(AX)
  5437. MOVOU X12, 192(AX)
  5438. MOVOU X13, 208(AX)
  5439. MOVOU X14, 224(AX)
  5440. MOVOU X15, 240(AX)
  5441. CMPQ R8, $0x00000100
  5442. LEAQ 256(R9), R9
  5443. LEAQ 256(AX), AX
  5444. JGE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_256through2048
  5445. JMP emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_tail
  5446. emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_avxUnaligned:
  5447. LEAQ (R9)(R8*1), R11
  5448. MOVQ AX, R13
  5449. MOVOU -128(R11), X5
  5450. MOVOU -112(R11), X6
  5451. MOVQ $0x00000080, R10
  5452. ANDQ $0xffffffe0, AX
  5453. ADDQ $0x20, AX
  5454. MOVOU -96(R11), X7
  5455. MOVOU -80(R11), X8
  5456. MOVQ AX, R12
  5457. SUBQ R13, R12
  5458. MOVOU -64(R11), X9
  5459. MOVOU -48(R11), X10
  5460. SUBQ R12, R8
  5461. MOVOU -32(R11), X11
  5462. MOVOU -16(R11), X12
  5463. VMOVDQU (R9), Y4
  5464. ADDQ R12, R9
  5465. SUBQ R10, R8
  5466. emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_gobble_128_loop:
  5467. VMOVDQU (R9), Y0
  5468. VMOVDQU 32(R9), Y1
  5469. VMOVDQU 64(R9), Y2
  5470. VMOVDQU 96(R9), Y3
  5471. ADDQ R10, R9
  5472. VMOVDQA Y0, (AX)
  5473. VMOVDQA Y1, 32(AX)
  5474. VMOVDQA Y2, 64(AX)
  5475. VMOVDQA Y3, 96(AX)
  5476. ADDQ R10, AX
  5477. SUBQ R10, R8
  5478. JA emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_gobble_128_loop
  5479. ADDQ R10, R8
  5480. ADDQ AX, R8
  5481. VMOVDQU Y4, (R13)
  5482. VZEROUPPER
  5483. MOVOU X5, -128(R8)
  5484. MOVOU X6, -112(R8)
  5485. MOVOU X7, -96(R8)
  5486. MOVOU X8, -80(R8)
  5487. MOVOU X9, -64(R8)
  5488. MOVOU X10, -48(R8)
  5489. MOVOU X11, -32(R8)
  5490. MOVOU X12, -16(R8)
  5491. memmove_end_copy_repeat_emit_encodeBlockAsmAvx:
  5492. MOVQ BP, AX
  5493. emit_literal_done_repeat_emit_encodeBlockAsmAvx:
  5494. ADDL $0x05, CX
  5495. MOVL CX, BP
  5496. SUBL 16(SP), BP
  5497. MOVQ src_len+32(FP), R8
  5498. SUBL CX, R8
  5499. LEAQ (DX)(CX*1), R9
  5500. LEAQ (DX)(BP*1), BP
  5501. XORL R11, R11
  5502. CMPL R8, $0x08
  5503. JL matchlen_single_repeat_extend
  5504. matchlen_loopback_repeat_extend:
  5505. MOVQ (R9)(R11*1), R10
  5506. XORQ (BP)(R11*1), R10
  5507. TESTQ R10, R10
  5508. JZ matchlen_loop_repeat_extend
  5509. BSFQ R10, R10
  5510. SARQ $0x03, R10
  5511. LEAL (R11)(R10*1), R11
  5512. JMP repeat_extend_forward_end_encodeBlockAsmAvx
  5513. matchlen_loop_repeat_extend:
  5514. LEAL -8(R8), R8
  5515. LEAL 8(R11), R11
  5516. CMPL R8, $0x08
  5517. JGE matchlen_loopback_repeat_extend
  5518. matchlen_single_repeat_extend:
  5519. TESTL R8, R8
  5520. JZ repeat_extend_forward_end_encodeBlockAsmAvx
  5521. matchlen_single_loopback_repeat_extend:
  5522. MOVB (R9)(R11*1), R10
  5523. CMPB (BP)(R11*1), R10
  5524. JNE repeat_extend_forward_end_encodeBlockAsmAvx
  5525. LEAL 1(R11), R11
  5526. DECL R8
  5527. JNZ matchlen_single_loopback_repeat_extend
  5528. repeat_extend_forward_end_encodeBlockAsmAvx:
  5529. ADDL R11, CX
  5530. MOVL CX, BP
  5531. SUBL SI, BP
  5532. MOVL 16(SP), SI
  5533. TESTL DI, DI
  5534. JZ repeat_as_copy_encodeBlockAsmAvx
  5535. emit_repeat_again_match_repeat_encodeBlockAsmAvx:
  5536. MOVL BP, DI
  5537. LEAL -4(BP), BP
  5538. CMPL DI, $0x08
  5539. JLE repeat_two_match_repeat_encodeBlockAsmAvx
  5540. CMPL DI, $0x0c
  5541. JGE cant_repeat_two_offset_match_repeat_encodeBlockAsmAvx
  5542. CMPL SI, $0x00000800
  5543. JLT repeat_two_offset_match_repeat_encodeBlockAsmAvx
  5544. cant_repeat_two_offset_match_repeat_encodeBlockAsmAvx:
  5545. CMPL BP, $0x00000104
  5546. JLT repeat_three_match_repeat_encodeBlockAsmAvx
  5547. CMPL BP, $0x00010100
  5548. JLT repeat_four_match_repeat_encodeBlockAsmAvx
  5549. CMPL BP, $0x0100ffff
  5550. JLT repeat_five_match_repeat_encodeBlockAsmAvx
  5551. LEAL -16842747(BP), BP
  5552. MOVW $0x001d, (AX)
  5553. MOVW $0xfffb, 2(AX)
  5554. MOVB $0xff, 4(AX)
  5555. ADDQ $0x05, AX
  5556. JMP emit_repeat_again_match_repeat_encodeBlockAsmAvx
  5557. repeat_five_match_repeat_encodeBlockAsmAvx:
  5558. LEAL -65536(BP), BP
  5559. MOVL BP, SI
  5560. MOVW $0x001d, (AX)
  5561. MOVW BP, 2(AX)
  5562. SARL $0x10, SI
  5563. MOVB SI, 4(AX)
  5564. ADDQ $0x05, AX
  5565. JMP repeat_end_emit_encodeBlockAsmAvx
  5566. repeat_four_match_repeat_encodeBlockAsmAvx:
  5567. LEAL -256(BP), BP
  5568. MOVW $0x0019, (AX)
  5569. MOVW BP, 2(AX)
  5570. ADDQ $0x04, AX
  5571. JMP repeat_end_emit_encodeBlockAsmAvx
  5572. repeat_three_match_repeat_encodeBlockAsmAvx:
  5573. LEAL -4(BP), BP
  5574. MOVW $0x0015, (AX)
  5575. MOVB BP, 2(AX)
  5576. ADDQ $0x03, AX
  5577. JMP repeat_end_emit_encodeBlockAsmAvx
  5578. repeat_two_match_repeat_encodeBlockAsmAvx:
  5579. SHLL $0x02, BP
  5580. ORL $0x01, BP
  5581. MOVW BP, (AX)
  5582. ADDQ $0x02, AX
  5583. JMP repeat_end_emit_encodeBlockAsmAvx
  5584. repeat_two_offset_match_repeat_encodeBlockAsmAvx:
  5585. XORQ DI, DI
  5586. LEAL 1(DI)(BP*4), BP
  5587. MOVB SI, 1(AX)
  5588. SARL $0x08, SI
  5589. SHLL $0x05, SI
  5590. ORL SI, BP
  5591. MOVB BP, (AX)
  5592. ADDQ $0x02, AX
  5593. JMP repeat_end_emit_encodeBlockAsmAvx
  5594. repeat_as_copy_encodeBlockAsmAvx:
  5595. CMPL SI, $0x00010000
  5596. JL two_byte_offset_repeat_as_copy_encodeBlockAsmAvx
  5597. four_bytes_loop_back_repeat_as_copy_encodeBlockAsmAvx:
  5598. CMPL BP, $0x40
  5599. JLE four_bytes_remain_repeat_as_copy_encodeBlockAsmAvx
  5600. MOVB $0xff, (AX)
  5601. MOVL SI, 1(AX)
  5602. LEAL -64(BP), BP
  5603. ADDQ $0x05, AX
  5604. CMPL BP, $0x04
  5605. JL four_bytes_remain_repeat_as_copy_encodeBlockAsmAvx
  5606. emit_repeat_again_repeat_as_copy_encodeBlockAsmAvx_emit_copy:
  5607. MOVL BP, DI
  5608. LEAL -4(BP), BP
  5609. CMPL DI, $0x08
  5610. JLE repeat_two_repeat_as_copy_encodeBlockAsmAvx_emit_copy
  5611. CMPL DI, $0x0c
  5612. JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy
  5613. CMPL SI, $0x00000800
  5614. JLT repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy
  5615. cant_repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy:
  5616. CMPL BP, $0x00000104
  5617. JLT repeat_three_repeat_as_copy_encodeBlockAsmAvx_emit_copy
  5618. CMPL BP, $0x00010100
  5619. JLT repeat_four_repeat_as_copy_encodeBlockAsmAvx_emit_copy
  5620. CMPL BP, $0x0100ffff
  5621. JLT repeat_five_repeat_as_copy_encodeBlockAsmAvx_emit_copy
  5622. LEAL -16842747(BP), BP
  5623. MOVW $0x001d, (AX)
  5624. MOVW $0xfffb, 2(AX)
  5625. MOVB $0xff, 4(AX)
  5626. ADDQ $0x05, AX
  5627. JMP emit_repeat_again_repeat_as_copy_encodeBlockAsmAvx_emit_copy
  5628. repeat_five_repeat_as_copy_encodeBlockAsmAvx_emit_copy:
  5629. LEAL -65536(BP), BP
  5630. MOVL BP, SI
  5631. MOVW $0x001d, (AX)
  5632. MOVW BP, 2(AX)
  5633. SARL $0x10, SI
  5634. MOVB SI, 4(AX)
  5635. ADDQ $0x05, AX
  5636. JMP repeat_end_emit_encodeBlockAsmAvx
  5637. repeat_four_repeat_as_copy_encodeBlockAsmAvx_emit_copy:
  5638. LEAL -256(BP), BP
  5639. MOVW $0x0019, (AX)
  5640. MOVW BP, 2(AX)
  5641. ADDQ $0x04, AX
  5642. JMP repeat_end_emit_encodeBlockAsmAvx
  5643. repeat_three_repeat_as_copy_encodeBlockAsmAvx_emit_copy:
  5644. LEAL -4(BP), BP
  5645. MOVW $0x0015, (AX)
  5646. MOVB BP, 2(AX)
  5647. ADDQ $0x03, AX
  5648. JMP repeat_end_emit_encodeBlockAsmAvx
  5649. repeat_two_repeat_as_copy_encodeBlockAsmAvx_emit_copy:
  5650. SHLL $0x02, BP
  5651. ORL $0x01, BP
  5652. MOVW BP, (AX)
  5653. ADDQ $0x02, AX
  5654. JMP repeat_end_emit_encodeBlockAsmAvx
  5655. repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy:
  5656. XORQ DI, DI
  5657. LEAL 1(DI)(BP*4), BP
  5658. MOVB SI, 1(AX)
  5659. SARL $0x08, SI
  5660. SHLL $0x05, SI
  5661. ORL SI, BP
  5662. MOVB BP, (AX)
  5663. ADDQ $0x02, AX
  5664. JMP repeat_end_emit_encodeBlockAsmAvx
  5665. JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsmAvx
  5666. four_bytes_remain_repeat_as_copy_encodeBlockAsmAvx:
  5667. TESTL BP, BP
  5668. JZ repeat_end_emit_encodeBlockAsmAvx
  5669. MOVB $0x03, BL
  5670. LEAL -4(BX)(BP*4), BP
  5671. MOVB BP, (AX)
  5672. MOVL SI, 1(AX)
  5673. ADDQ $0x05, AX
  5674. JMP repeat_end_emit_encodeBlockAsmAvx
  5675. two_byte_offset_repeat_as_copy_encodeBlockAsmAvx:
  5676. CMPL BP, $0x40
  5677. JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsmAvx
  5678. MOVB $0xee, (AX)
  5679. MOVW SI, 1(AX)
  5680. LEAL -60(BP), BP
  5681. ADDQ $0x03, AX
  5682. emit_repeat_again_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short:
  5683. MOVL BP, DI
  5684. LEAL -4(BP), BP
  5685. CMPL DI, $0x08
  5686. JLE repeat_two_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short
  5687. CMPL DI, $0x0c
  5688. JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short
  5689. CMPL SI, $0x00000800
  5690. JLT repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short
  5691. cant_repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short:
  5692. CMPL BP, $0x00000104
  5693. JLT repeat_three_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short
  5694. CMPL BP, $0x00010100
  5695. JLT repeat_four_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short
  5696. CMPL BP, $0x0100ffff
  5697. JLT repeat_five_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short
  5698. LEAL -16842747(BP), BP
  5699. MOVW $0x001d, (AX)
  5700. MOVW $0xfffb, 2(AX)
  5701. MOVB $0xff, 4(AX)
  5702. ADDQ $0x05, AX
  5703. JMP emit_repeat_again_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short
  5704. repeat_five_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short:
  5705. LEAL -65536(BP), BP
  5706. MOVL BP, SI
  5707. MOVW $0x001d, (AX)
  5708. MOVW BP, 2(AX)
  5709. SARL $0x10, SI
  5710. MOVB SI, 4(AX)
  5711. ADDQ $0x05, AX
  5712. JMP repeat_end_emit_encodeBlockAsmAvx
  5713. repeat_four_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short:
  5714. LEAL -256(BP), BP
  5715. MOVW $0x0019, (AX)
  5716. MOVW BP, 2(AX)
  5717. ADDQ $0x04, AX
  5718. JMP repeat_end_emit_encodeBlockAsmAvx
  5719. repeat_three_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short:
  5720. LEAL -4(BP), BP
  5721. MOVW $0x0015, (AX)
  5722. MOVB BP, 2(AX)
  5723. ADDQ $0x03, AX
  5724. JMP repeat_end_emit_encodeBlockAsmAvx
  5725. repeat_two_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short:
  5726. SHLL $0x02, BP
  5727. ORL $0x01, BP
  5728. MOVW BP, (AX)
  5729. ADDQ $0x02, AX
  5730. JMP repeat_end_emit_encodeBlockAsmAvx
  5731. repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short:
  5732. XORQ DI, DI
  5733. LEAL 1(DI)(BP*4), BP
  5734. MOVB SI, 1(AX)
  5735. SARL $0x08, SI
  5736. SHLL $0x05, SI
  5737. ORL SI, BP
  5738. MOVB BP, (AX)
  5739. ADDQ $0x02, AX
  5740. JMP repeat_end_emit_encodeBlockAsmAvx
  5741. JMP two_byte_offset_repeat_as_copy_encodeBlockAsmAvx
  5742. two_byte_offset_short_repeat_as_copy_encodeBlockAsmAvx:
  5743. CMPL BP, $0x0c
  5744. JGE emit_copy_three_repeat_as_copy_encodeBlockAsmAvx
  5745. CMPL SI, $0x00000800
  5746. JGE emit_copy_three_repeat_as_copy_encodeBlockAsmAvx
  5747. MOVB $0x01, BL
  5748. LEAL -16(BX)(BP*4), BP
  5749. MOVB SI, 1(AX)
  5750. SHRL $0x08, SI
  5751. SHLL $0x05, SI
  5752. ORL SI, BP
  5753. MOVB BP, (AX)
  5754. ADDQ $0x02, AX
  5755. JMP repeat_end_emit_encodeBlockAsmAvx
  5756. emit_copy_three_repeat_as_copy_encodeBlockAsmAvx:
  5757. MOVB $0x02, BL
  5758. LEAL -4(BX)(BP*4), BP
  5759. MOVB BP, (AX)
  5760. MOVW SI, 1(AX)
  5761. ADDQ $0x03, AX
  5762. repeat_end_emit_encodeBlockAsmAvx:
  5763. MOVL CX, 12(SP)
  5764. CMPL CX, 8(SP)
  5765. JGE emit_remainder_encodeBlockAsmAvx
  5766. JMP search_loop_encodeBlockAsmAvx
  5767. no_repeat_found_encodeBlockAsmAvx:
  5768. CMPL (DX)(BP*1), SI
  5769. JEQ candidate_match_encodeBlockAsmAvx
  5770. SHRQ $0x08, SI
  5771. MOVL 24(SP)(R9*4), BP
  5772. LEAL 2(CX), R8
  5773. CMPL (DX)(DI*1), SI
  5774. JEQ candidate2_match_encodeBlockAsmAvx
  5775. MOVL R8, 24(SP)(R9*4)
  5776. SHRQ $0x08, SI
  5777. CMPL (DX)(BP*1), SI
  5778. JEQ candidate3_match_encodeBlockAsmAvx
  5779. MOVL 20(SP), CX
  5780. JMP search_loop_encodeBlockAsmAvx
  5781. candidate3_match_encodeBlockAsmAvx:
  5782. ADDL $0x02, CX
  5783. JMP candidate_match_encodeBlockAsmAvx
  5784. candidate2_match_encodeBlockAsmAvx:
  5785. MOVL R8, 24(SP)(R9*4)
  5786. INCL CX
  5787. MOVL DI, BP
  5788. candidate_match_encodeBlockAsmAvx:
  5789. MOVL 12(SP), SI
  5790. TESTL BP, BP
  5791. JZ match_extend_back_end_encodeBlockAsmAvx
  5792. match_extend_back_loop_encodeBlockAsmAvx:
  5793. CMPL CX, SI
  5794. JLE match_extend_back_end_encodeBlockAsmAvx
  5795. MOVB -1(DX)(BP*1), BL
  5796. MOVB -1(DX)(CX*1), DI
  5797. CMPB BL, DI
  5798. JNE match_extend_back_end_encodeBlockAsmAvx
  5799. LEAL -1(CX), CX
  5800. DECL BP
  5801. JZ match_extend_back_end_encodeBlockAsmAvx
  5802. JMP match_extend_back_loop_encodeBlockAsmAvx
  5803. match_extend_back_end_encodeBlockAsmAvx:
  5804. MOVL CX, SI
  5805. SUBL 12(SP), SI
  5806. LEAQ 4(AX)(SI*1), SI
  5807. CMPQ SI, (SP)
  5808. JL match_dst_size_check_encodeBlockAsmAvx
  5809. MOVQ $0x00000000, ret+48(FP)
  5810. RET
  5811. match_dst_size_check_encodeBlockAsmAvx:
  5812. MOVL CX, SI
  5813. MOVL 12(SP), DI
  5814. CMPL DI, SI
  5815. JEQ emit_literal_done_match_emit_encodeBlockAsmAvx
  5816. MOVL SI, R8
  5817. MOVL SI, 12(SP)
  5818. LEAQ (DX)(DI*1), SI
  5819. SUBL DI, R8
  5820. MOVL R8, DI
  5821. SUBL $0x01, DI
  5822. JC emit_literal_done_match_emit_encodeBlockAsmAvx
  5823. CMPL DI, $0x3c
  5824. JLT one_byte_match_emit_encodeBlockAsmAvx
  5825. CMPL DI, $0x00000100
  5826. JLT two_bytes_match_emit_encodeBlockAsmAvx
  5827. CMPL DI, $0x00010000
  5828. JLT three_bytes_match_emit_encodeBlockAsmAvx
  5829. CMPL DI, $0x01000000
  5830. JLT four_bytes_match_emit_encodeBlockAsmAvx
  5831. MOVB $0xfc, (AX)
  5832. MOVL DI, 1(AX)
  5833. ADDQ $0x05, AX
  5834. JMP memmove_match_emit_encodeBlockAsmAvx
  5835. four_bytes_match_emit_encodeBlockAsmAvx:
  5836. MOVL DI, R9
  5837. SHRL $0x10, R9
  5838. MOVB $0xf8, (AX)
  5839. MOVW DI, 1(AX)
  5840. MOVB R9, 3(AX)
  5841. ADDQ $0x04, AX
  5842. JMP memmove_match_emit_encodeBlockAsmAvx
  5843. three_bytes_match_emit_encodeBlockAsmAvx:
  5844. MOVB $0xf4, (AX)
  5845. MOVW DI, 1(AX)
  5846. ADDQ $0x03, AX
  5847. JMP memmove_match_emit_encodeBlockAsmAvx
  5848. two_bytes_match_emit_encodeBlockAsmAvx:
  5849. MOVB $0xf0, (AX)
  5850. MOVB DI, 1(AX)
  5851. ADDQ $0x02, AX
  5852. JMP memmove_match_emit_encodeBlockAsmAvx
  5853. one_byte_match_emit_encodeBlockAsmAvx:
  5854. SHLB $0x02, DI
  5855. MOVB DI, (AX)
  5856. ADDQ $0x01, AX
  5857. memmove_match_emit_encodeBlockAsmAvx:
  5858. LEAQ (AX)(R8*1), DI
  5859. NOP
  5860. emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_tail:
  5861. TESTQ R8, R8
  5862. JEQ memmove_end_copy_match_emit_encodeBlockAsmAvx
  5863. CMPQ R8, $0x02
  5864. JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_1or2
  5865. CMPQ R8, $0x04
  5866. JB emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_3
  5867. JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_4
  5868. CMPQ R8, $0x08
  5869. JB emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_5through7
  5870. JE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_8
  5871. CMPQ R8, $0x10
  5872. JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_9through16
  5873. CMPQ R8, $0x20
  5874. JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_17through32
  5875. CMPQ R8, $0x40
  5876. JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_33through64
  5877. CMPQ R8, $0x80
  5878. JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_65through128
  5879. CMPQ R8, $0x00000100
  5880. JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_129through256
  5881. JMP emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_avxUnaligned
  5882. emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_1or2:
  5883. MOVB (SI), R9
  5884. MOVB -1(SI)(R8*1), R10
  5885. MOVB R9, (AX)
  5886. MOVB R10, -1(AX)(R8*1)
  5887. JMP memmove_end_copy_match_emit_encodeBlockAsmAvx
  5888. emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_4:
  5889. MOVL (SI), R9
  5890. MOVL R9, (AX)
  5891. JMP memmove_end_copy_match_emit_encodeBlockAsmAvx
  5892. emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_3:
  5893. MOVW (SI), R9
  5894. MOVB 2(SI), R10
  5895. MOVW R9, (AX)
  5896. MOVB R10, 2(AX)
  5897. JMP memmove_end_copy_match_emit_encodeBlockAsmAvx
  5898. emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_5through7:
  5899. MOVL (SI), R9
  5900. MOVL -4(SI)(R8*1), R10
  5901. MOVL R9, (AX)
  5902. MOVL R10, -4(AX)(R8*1)
  5903. JMP memmove_end_copy_match_emit_encodeBlockAsmAvx
  5904. emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_8:
  5905. MOVQ (SI), R9
  5906. MOVQ R9, (AX)
  5907. JMP memmove_end_copy_match_emit_encodeBlockAsmAvx
  5908. emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_9through16:
  5909. MOVQ (SI), R9
  5910. MOVQ -8(SI)(R8*1), R10
  5911. MOVQ R9, (AX)
  5912. MOVQ R10, -8(AX)(R8*1)
  5913. JMP memmove_end_copy_match_emit_encodeBlockAsmAvx
  5914. emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_17through32:
  5915. MOVOU (SI), X0
  5916. MOVOU -16(SI)(R8*1), X1
  5917. MOVOU X0, (AX)
  5918. MOVOU X1, -16(AX)(R8*1)
  5919. JMP memmove_end_copy_match_emit_encodeBlockAsmAvx
  5920. emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_33through64:
  5921. MOVOU (SI), X0
  5922. MOVOU 16(SI), X1
  5923. MOVOU -32(SI)(R8*1), X2
  5924. MOVOU -16(SI)(R8*1), X3
  5925. MOVOU X0, (AX)
  5926. MOVOU X1, 16(AX)
  5927. MOVOU X2, -32(AX)(R8*1)
  5928. MOVOU X3, -16(AX)(R8*1)
  5929. JMP memmove_end_copy_match_emit_encodeBlockAsmAvx
  5930. emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_65through128:
  5931. MOVOU (SI), X0
  5932. MOVOU 16(SI), X1
  5933. MOVOU 32(SI), X2
  5934. MOVOU 48(SI), X3
  5935. MOVOU -64(SI)(R8*1), X12
  5936. MOVOU -48(SI)(R8*1), X13
  5937. MOVOU -32(SI)(R8*1), X14
  5938. MOVOU -16(SI)(R8*1), X15
  5939. MOVOU X0, (AX)
  5940. MOVOU X1, 16(AX)
  5941. MOVOU X2, 32(AX)
  5942. MOVOU X3, 48(AX)
  5943. MOVOU X12, -64(AX)(R8*1)
  5944. MOVOU X13, -48(AX)(R8*1)
  5945. MOVOU X14, -32(AX)(R8*1)
  5946. MOVOU X15, -16(AX)(R8*1)
  5947. JMP memmove_end_copy_match_emit_encodeBlockAsmAvx
  5948. emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_129through256:
  5949. MOVOU (SI), X0
  5950. MOVOU 16(SI), X1
  5951. MOVOU 32(SI), X2
  5952. MOVOU 48(SI), X3
  5953. MOVOU 64(SI), X4
  5954. MOVOU 80(SI), X5
  5955. MOVOU 96(SI), X6
  5956. MOVOU 112(SI), X7
  5957. MOVOU -128(SI)(R8*1), X8
  5958. MOVOU -112(SI)(R8*1), X9
  5959. MOVOU -96(SI)(R8*1), X10
  5960. MOVOU -80(SI)(R8*1), X11
  5961. MOVOU -64(SI)(R8*1), X12
  5962. MOVOU -48(SI)(R8*1), X13
  5963. MOVOU -32(SI)(R8*1), X14
  5964. MOVOU -16(SI)(R8*1), X15
  5965. MOVOU X0, (AX)
  5966. MOVOU X1, 16(AX)
  5967. MOVOU X2, 32(AX)
  5968. MOVOU X3, 48(AX)
  5969. MOVOU X4, 64(AX)
  5970. MOVOU X5, 80(AX)
  5971. MOVOU X6, 96(AX)
  5972. MOVOU X7, 112(AX)
  5973. MOVOU X8, -128(AX)(R8*1)
  5974. MOVOU X9, -112(AX)(R8*1)
  5975. MOVOU X10, -96(AX)(R8*1)
  5976. MOVOU X11, -80(AX)(R8*1)
  5977. MOVOU X12, -64(AX)(R8*1)
  5978. MOVOU X13, -48(AX)(R8*1)
  5979. MOVOU X14, -32(AX)(R8*1)
  5980. MOVOU X15, -16(AX)(R8*1)
  5981. JMP memmove_end_copy_match_emit_encodeBlockAsmAvx
  5982. emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_256through2048:
  5983. LEAQ -256(R8), R8
  5984. MOVOU (SI), X0
  5985. MOVOU 16(SI), X1
  5986. MOVOU 32(SI), X2
  5987. MOVOU 48(SI), X3
  5988. MOVOU 64(SI), X4
  5989. MOVOU 80(SI), X5
  5990. MOVOU 96(SI), X6
  5991. MOVOU 112(SI), X7
  5992. MOVOU 128(SI), X8
  5993. MOVOU 144(SI), X9
  5994. MOVOU 160(SI), X10
  5995. MOVOU 176(SI), X11
  5996. MOVOU 192(SI), X12
  5997. MOVOU 208(SI), X13
  5998. MOVOU 224(SI), X14
  5999. MOVOU 240(SI), X15
  6000. MOVOU X0, (AX)
  6001. MOVOU X1, 16(AX)
  6002. MOVOU X2, 32(AX)
  6003. MOVOU X3, 48(AX)
  6004. MOVOU X4, 64(AX)
  6005. MOVOU X5, 80(AX)
  6006. MOVOU X6, 96(AX)
  6007. MOVOU X7, 112(AX)
  6008. MOVOU X8, 128(AX)
  6009. MOVOU X9, 144(AX)
  6010. MOVOU X10, 160(AX)
  6011. MOVOU X11, 176(AX)
  6012. MOVOU X12, 192(AX)
  6013. MOVOU X13, 208(AX)
  6014. MOVOU X14, 224(AX)
  6015. MOVOU X15, 240(AX)
  6016. CMPQ R8, $0x00000100
  6017. LEAQ 256(SI), SI
  6018. LEAQ 256(AX), AX
  6019. JGE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_256through2048
  6020. JMP emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_tail
  6021. emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_avxUnaligned:
  6022. LEAQ (SI)(R8*1), R10
  6023. MOVQ AX, R12
  6024. MOVOU -128(R10), X5
  6025. MOVOU -112(R10), X6
  6026. MOVQ $0x00000080, R9
  6027. ANDQ $0xffffffe0, AX
  6028. ADDQ $0x20, AX
  6029. MOVOU -96(R10), X7
  6030. MOVOU -80(R10), X8
  6031. MOVQ AX, R11
  6032. SUBQ R12, R11
  6033. MOVOU -64(R10), X9
  6034. MOVOU -48(R10), X10
  6035. SUBQ R11, R8
  6036. MOVOU -32(R10), X11
  6037. MOVOU -16(R10), X12
  6038. VMOVDQU (SI), Y4
  6039. ADDQ R11, SI
  6040. SUBQ R9, R8
  6041. emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_gobble_128_loop:
  6042. VMOVDQU (SI), Y0
  6043. VMOVDQU 32(SI), Y1
  6044. VMOVDQU 64(SI), Y2
  6045. VMOVDQU 96(SI), Y3
  6046. ADDQ R9, SI
  6047. VMOVDQA Y0, (AX)
  6048. VMOVDQA Y1, 32(AX)
  6049. VMOVDQA Y2, 64(AX)
  6050. VMOVDQA Y3, 96(AX)
  6051. ADDQ R9, AX
  6052. SUBQ R9, R8
  6053. JA emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_gobble_128_loop
  6054. ADDQ R9, R8
  6055. ADDQ AX, R8
  6056. VMOVDQU Y4, (R12)
  6057. VZEROUPPER
  6058. MOVOU X5, -128(R8)
  6059. MOVOU X6, -112(R8)
  6060. MOVOU X7, -96(R8)
  6061. MOVOU X8, -80(R8)
  6062. MOVOU X9, -64(R8)
  6063. MOVOU X10, -48(R8)
  6064. MOVOU X11, -32(R8)
  6065. MOVOU X12, -16(R8)
  6066. memmove_end_copy_match_emit_encodeBlockAsmAvx:
  6067. MOVQ DI, AX
  6068. emit_literal_done_match_emit_encodeBlockAsmAvx:
  6069. match_nolit_loop_encodeBlockAsmAvx:
  6070. MOVL CX, SI
  6071. SUBL BP, SI
  6072. MOVL SI, 16(SP)
  6073. ADDL $0x04, CX
  6074. ADDL $0x04, BP
  6075. MOVQ src_len+32(FP), SI
  6076. SUBL CX, SI
  6077. LEAQ (DX)(CX*1), DI
  6078. LEAQ (DX)(BP*1), BP
  6079. XORL R9, R9
  6080. CMPL SI, $0x08
  6081. JL matchlen_single_match_nolit_encodeBlockAsmAvx
  6082. matchlen_loopback_match_nolit_encodeBlockAsmAvx:
  6083. MOVQ (DI)(R9*1), R8
  6084. XORQ (BP)(R9*1), R8
  6085. TESTQ R8, R8
  6086. JZ matchlen_loop_match_nolit_encodeBlockAsmAvx
  6087. BSFQ R8, R8
  6088. SARQ $0x03, R8
  6089. LEAL (R9)(R8*1), R9
  6090. JMP match_nolit_end_encodeBlockAsmAvx
  6091. matchlen_loop_match_nolit_encodeBlockAsmAvx:
  6092. LEAL -8(SI), SI
  6093. LEAL 8(R9), R9
  6094. CMPL SI, $0x08
  6095. JGE matchlen_loopback_match_nolit_encodeBlockAsmAvx
  6096. matchlen_single_match_nolit_encodeBlockAsmAvx:
  6097. TESTL SI, SI
  6098. JZ match_nolit_end_encodeBlockAsmAvx
  6099. matchlen_single_loopback_match_nolit_encodeBlockAsmAvx:
  6100. MOVB (DI)(R9*1), R8
  6101. CMPB (BP)(R9*1), R8
  6102. JNE match_nolit_end_encodeBlockAsmAvx
  6103. LEAL 1(R9), R9
  6104. DECL SI
  6105. JNZ matchlen_single_loopback_match_nolit_encodeBlockAsmAvx
  6106. match_nolit_end_encodeBlockAsmAvx:
  6107. ADDL R9, CX
  6108. MOVL 16(SP), BP
  6109. ADDL $0x04, R9
  6110. CMPL BP, $0x00010000
  6111. JL two_byte_offset_match_nolit_encodeBlockAsmAvx
  6112. four_bytes_loop_back_match_nolit_encodeBlockAsmAvx:
  6113. CMPL R9, $0x40
  6114. JLE four_bytes_remain_match_nolit_encodeBlockAsmAvx
  6115. MOVB $0xff, (AX)
  6116. MOVL BP, 1(AX)
  6117. LEAL -64(R9), R9
  6118. ADDQ $0x05, AX
  6119. CMPL R9, $0x04
  6120. JL four_bytes_remain_match_nolit_encodeBlockAsmAvx
  6121. emit_repeat_again_match_nolit_encodeBlockAsmAvx_emit_copy:
  6122. MOVL R9, SI
  6123. LEAL -4(R9), R9
  6124. CMPL SI, $0x08
  6125. JLE repeat_two_match_nolit_encodeBlockAsmAvx_emit_copy
  6126. CMPL SI, $0x0c
  6127. JGE cant_repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy
  6128. CMPL BP, $0x00000800
  6129. JLT repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy
  6130. cant_repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy:
  6131. CMPL R9, $0x00000104
  6132. JLT repeat_three_match_nolit_encodeBlockAsmAvx_emit_copy
  6133. CMPL R9, $0x00010100
  6134. JLT repeat_four_match_nolit_encodeBlockAsmAvx_emit_copy
  6135. CMPL R9, $0x0100ffff
  6136. JLT repeat_five_match_nolit_encodeBlockAsmAvx_emit_copy
  6137. LEAL -16842747(R9), R9
  6138. MOVW $0x001d, (AX)
  6139. MOVW $0xfffb, 2(AX)
  6140. MOVB $0xff, 4(AX)
  6141. ADDQ $0x05, AX
  6142. JMP emit_repeat_again_match_nolit_encodeBlockAsmAvx_emit_copy
  6143. repeat_five_match_nolit_encodeBlockAsmAvx_emit_copy:
  6144. LEAL -65536(R9), R9
  6145. MOVL R9, BP
  6146. MOVW $0x001d, (AX)
  6147. MOVW R9, 2(AX)
  6148. SARL $0x10, BP
  6149. MOVB BP, 4(AX)
  6150. ADDQ $0x05, AX
  6151. JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
  6152. repeat_four_match_nolit_encodeBlockAsmAvx_emit_copy:
  6153. LEAL -256(R9), R9
  6154. MOVW $0x0019, (AX)
  6155. MOVW R9, 2(AX)
  6156. ADDQ $0x04, AX
  6157. JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
  6158. repeat_three_match_nolit_encodeBlockAsmAvx_emit_copy:
  6159. LEAL -4(R9), R9
  6160. MOVW $0x0015, (AX)
  6161. MOVB R9, 2(AX)
  6162. ADDQ $0x03, AX
  6163. JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
  6164. repeat_two_match_nolit_encodeBlockAsmAvx_emit_copy:
  6165. SHLL $0x02, R9
  6166. ORL $0x01, R9
  6167. MOVW R9, (AX)
  6168. ADDQ $0x02, AX
  6169. JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
  6170. repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy:
  6171. XORQ SI, SI
  6172. LEAL 1(SI)(R9*4), R9
  6173. MOVB BP, 1(AX)
  6174. SARL $0x08, BP
  6175. SHLL $0x05, BP
  6176. ORL BP, R9
  6177. MOVB R9, (AX)
  6178. ADDQ $0x02, AX
  6179. JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
  6180. JMP four_bytes_loop_back_match_nolit_encodeBlockAsmAvx
  6181. four_bytes_remain_match_nolit_encodeBlockAsmAvx:
  6182. TESTL R9, R9
  6183. JZ match_nolit_emitcopy_end_encodeBlockAsmAvx
  6184. MOVB $0x03, BL
  6185. LEAL -4(BX)(R9*4), R9
  6186. MOVB R9, (AX)
  6187. MOVL BP, 1(AX)
  6188. ADDQ $0x05, AX
  6189. JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
  6190. two_byte_offset_match_nolit_encodeBlockAsmAvx:
  6191. CMPL R9, $0x40
  6192. JLE two_byte_offset_short_match_nolit_encodeBlockAsmAvx
  6193. MOVB $0xee, (AX)
  6194. MOVW BP, 1(AX)
  6195. LEAL -60(R9), R9
  6196. ADDQ $0x03, AX
  6197. emit_repeat_again_match_nolit_encodeBlockAsmAvx_emit_copy_short:
  6198. MOVL R9, SI
  6199. LEAL -4(R9), R9
  6200. CMPL SI, $0x08
  6201. JLE repeat_two_match_nolit_encodeBlockAsmAvx_emit_copy_short
  6202. CMPL SI, $0x0c
  6203. JGE cant_repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy_short
  6204. CMPL BP, $0x00000800
  6205. JLT repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy_short
  6206. cant_repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy_short:
  6207. CMPL R9, $0x00000104
  6208. JLT repeat_three_match_nolit_encodeBlockAsmAvx_emit_copy_short
  6209. CMPL R9, $0x00010100
  6210. JLT repeat_four_match_nolit_encodeBlockAsmAvx_emit_copy_short
  6211. CMPL R9, $0x0100ffff
  6212. JLT repeat_five_match_nolit_encodeBlockAsmAvx_emit_copy_short
  6213. LEAL -16842747(R9), R9
  6214. MOVW $0x001d, (AX)
  6215. MOVW $0xfffb, 2(AX)
  6216. MOVB $0xff, 4(AX)
  6217. ADDQ $0x05, AX
  6218. JMP emit_repeat_again_match_nolit_encodeBlockAsmAvx_emit_copy_short
  6219. repeat_five_match_nolit_encodeBlockAsmAvx_emit_copy_short:
  6220. LEAL -65536(R9), R9
  6221. MOVL R9, BP
  6222. MOVW $0x001d, (AX)
  6223. MOVW R9, 2(AX)
  6224. SARL $0x10, BP
  6225. MOVB BP, 4(AX)
  6226. ADDQ $0x05, AX
  6227. JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
  6228. repeat_four_match_nolit_encodeBlockAsmAvx_emit_copy_short:
  6229. LEAL -256(R9), R9
  6230. MOVW $0x0019, (AX)
  6231. MOVW R9, 2(AX)
  6232. ADDQ $0x04, AX
  6233. JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
  6234. repeat_three_match_nolit_encodeBlockAsmAvx_emit_copy_short:
  6235. LEAL -4(R9), R9
  6236. MOVW $0x0015, (AX)
  6237. MOVB R9, 2(AX)
  6238. ADDQ $0x03, AX
  6239. JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
  6240. repeat_two_match_nolit_encodeBlockAsmAvx_emit_copy_short:
  6241. SHLL $0x02, R9
  6242. ORL $0x01, R9
  6243. MOVW R9, (AX)
  6244. ADDQ $0x02, AX
  6245. JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
  6246. repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy_short:
  6247. XORQ SI, SI
  6248. LEAL 1(SI)(R9*4), R9
  6249. MOVB BP, 1(AX)
  6250. SARL $0x08, BP
  6251. SHLL $0x05, BP
  6252. ORL BP, R9
  6253. MOVB R9, (AX)
  6254. ADDQ $0x02, AX
  6255. JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
  6256. JMP two_byte_offset_match_nolit_encodeBlockAsmAvx
  6257. two_byte_offset_short_match_nolit_encodeBlockAsmAvx:
  6258. CMPL R9, $0x0c
  6259. JGE emit_copy_three_match_nolit_encodeBlockAsmAvx
  6260. CMPL BP, $0x00000800
  6261. JGE emit_copy_three_match_nolit_encodeBlockAsmAvx
  6262. MOVB $0x01, BL
  6263. LEAL -16(BX)(R9*4), R9
  6264. MOVB BP, 1(AX)
  6265. SHRL $0x08, BP
  6266. SHLL $0x05, BP
  6267. ORL BP, R9
  6268. MOVB R9, (AX)
  6269. ADDQ $0x02, AX
  6270. JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
  6271. emit_copy_three_match_nolit_encodeBlockAsmAvx:
  6272. MOVB $0x02, BL
  6273. LEAL -4(BX)(R9*4), R9
  6274. MOVB R9, (AX)
  6275. MOVW BP, 1(AX)
  6276. ADDQ $0x03, AX
  6277. match_nolit_emitcopy_end_encodeBlockAsmAvx:
  6278. MOVL CX, 12(SP)
  6279. CMPL CX, 8(SP)
  6280. JGE emit_remainder_encodeBlockAsmAvx
  6281. CMPQ AX, (SP)
  6282. JL match_nolit_dst_ok_encodeBlockAsmAvx
  6283. MOVQ $0x00000000, ret+48(FP)
  6284. RET
  6285. match_nolit_dst_ok_encodeBlockAsmAvx:
  6286. MOVQ -2(DX)(CX*1), SI
  6287. MOVQ $0x0000cf1bbcdcbf9b, BP
  6288. MOVQ SI, DI
  6289. SHRQ $0x10, SI
  6290. MOVQ SI, R8
  6291. SHLQ $0x10, DI
  6292. IMULQ BP, DI
  6293. SHRQ $0x32, DI
  6294. SHLQ $0x10, R8
  6295. IMULQ BP, R8
  6296. SHRQ $0x32, R8
  6297. LEAL -2(CX), R9
  6298. MOVL 24(SP)(R8*4), BP
  6299. MOVL R9, 24(SP)(DI*4)
  6300. MOVL CX, 24(SP)(R8*4)
  6301. CMPL (DX)(BP*1), SI
  6302. JEQ match_nolit_loop_encodeBlockAsmAvx
  6303. INCL CX
  6304. JMP search_loop_encodeBlockAsmAvx
  6305. emit_remainder_encodeBlockAsmAvx:
  6306. MOVQ src_len+32(FP), CX
  6307. SUBL 12(SP), CX
  6308. LEAQ 4(AX)(CX*1), CX
  6309. CMPQ CX, (SP)
  6310. JL emit_remainder_ok_encodeBlockAsmAvx
  6311. MOVQ $0x00000000, ret+48(FP)
  6312. RET
  6313. emit_remainder_ok_encodeBlockAsmAvx:
  6314. MOVQ src_len+32(FP), CX
  6315. MOVL 12(SP), BX
  6316. CMPL BX, CX
  6317. JEQ emit_literal_done_emit_remainder_encodeBlockAsmAvx
  6318. MOVL CX, BP
  6319. MOVL CX, 12(SP)
  6320. LEAQ (DX)(BX*1), CX
  6321. SUBL BX, BP
  6322. MOVL BP, DX
  6323. SUBL $0x01, DX
  6324. JC emit_literal_done_emit_remainder_encodeBlockAsmAvx
  6325. CMPL DX, $0x3c
  6326. JLT one_byte_emit_remainder_encodeBlockAsmAvx
  6327. CMPL DX, $0x00000100
  6328. JLT two_bytes_emit_remainder_encodeBlockAsmAvx
  6329. CMPL DX, $0x00010000
  6330. JLT three_bytes_emit_remainder_encodeBlockAsmAvx
  6331. CMPL DX, $0x01000000
  6332. JLT four_bytes_emit_remainder_encodeBlockAsmAvx
  6333. MOVB $0xfc, (AX)
  6334. MOVL DX, 1(AX)
  6335. ADDQ $0x05, AX
  6336. JMP memmove_emit_remainder_encodeBlockAsmAvx
  6337. four_bytes_emit_remainder_encodeBlockAsmAvx:
  6338. MOVL DX, BX
  6339. SHRL $0x10, BX
  6340. MOVB $0xf8, (AX)
  6341. MOVW DX, 1(AX)
  6342. MOVB BL, 3(AX)
  6343. ADDQ $0x04, AX
  6344. JMP memmove_emit_remainder_encodeBlockAsmAvx
  6345. three_bytes_emit_remainder_encodeBlockAsmAvx:
  6346. MOVB $0xf4, (AX)
  6347. MOVW DX, 1(AX)
  6348. ADDQ $0x03, AX
  6349. JMP memmove_emit_remainder_encodeBlockAsmAvx
  6350. two_bytes_emit_remainder_encodeBlockAsmAvx:
  6351. MOVB $0xf0, (AX)
  6352. MOVB DL, 1(AX)
  6353. ADDQ $0x02, AX
  6354. JMP memmove_emit_remainder_encodeBlockAsmAvx
  6355. one_byte_emit_remainder_encodeBlockAsmAvx:
  6356. SHLB $0x02, DL
  6357. MOVB DL, (AX)
  6358. ADDQ $0x01, AX
  6359. memmove_emit_remainder_encodeBlockAsmAvx:
  6360. LEAQ (AX)(BP*1), DX
  6361. MOVL BP, BX
  6362. NOP
  6363. emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_tail:
  6364. TESTQ BX, BX
  6365. JEQ memmove_end_copy_emit_remainder_encodeBlockAsmAvx
  6366. CMPQ BX, $0x02
  6367. JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_1or2
  6368. CMPQ BX, $0x04
  6369. JB emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_3
  6370. JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_4
  6371. CMPQ BX, $0x08
  6372. JB emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_5through7
  6373. JE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_8
  6374. CMPQ BX, $0x10
  6375. JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_9through16
  6376. CMPQ BX, $0x20
  6377. JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_17through32
  6378. CMPQ BX, $0x40
  6379. JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_33through64
  6380. CMPQ BX, $0x80
  6381. JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_65through128
  6382. CMPQ BX, $0x00000100
  6383. JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_129through256
  6384. JMP emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_avxUnaligned
  6385. emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_1or2:
  6386. MOVB (CX), BP
  6387. MOVB -1(CX)(BX*1), SI
  6388. MOVB BP, (AX)
  6389. MOVB SI, -1(AX)(BX*1)
  6390. JMP memmove_end_copy_emit_remainder_encodeBlockAsmAvx
  6391. emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_4:
  6392. MOVL (CX), BP
  6393. MOVL BP, (AX)
  6394. JMP memmove_end_copy_emit_remainder_encodeBlockAsmAvx
  6395. emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_3:
  6396. MOVW (CX), BP
  6397. MOVB 2(CX), SI
  6398. MOVW BP, (AX)
  6399. MOVB SI, 2(AX)
  6400. JMP memmove_end_copy_emit_remainder_encodeBlockAsmAvx
  6401. emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_5through7:
  6402. MOVL (CX), BP
  6403. MOVL -4(CX)(BX*1), SI
  6404. MOVL BP, (AX)
  6405. MOVL SI, -4(AX)(BX*1)
  6406. JMP memmove_end_copy_emit_remainder_encodeBlockAsmAvx
  6407. emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_8:
  6408. MOVQ (CX), BP
  6409. MOVQ BP, (AX)
  6410. JMP memmove_end_copy_emit_remainder_encodeBlockAsmAvx
  6411. emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_9through16:
  6412. MOVQ (CX), BP
  6413. MOVQ -8(CX)(BX*1), SI
  6414. MOVQ BP, (AX)
  6415. MOVQ SI, -8(AX)(BX*1)
  6416. JMP memmove_end_copy_emit_remainder_encodeBlockAsmAvx
  6417. emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_17through32:
  6418. MOVOU (CX), X0
  6419. MOVOU -16(CX)(BX*1), X1
  6420. MOVOU X0, (AX)
  6421. MOVOU X1, -16(AX)(BX*1)
  6422. JMP memmove_end_copy_emit_remainder_encodeBlockAsmAvx
  6423. emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_33through64:
  6424. MOVOU (CX), X0
  6425. MOVOU 16(CX), X1
  6426. MOVOU -32(CX)(BX*1), X2
  6427. MOVOU -16(CX)(BX*1), X3
  6428. MOVOU X0, (AX)
  6429. MOVOU X1, 16(AX)
  6430. MOVOU X2, -32(AX)(BX*1)
  6431. MOVOU X3, -16(AX)(BX*1)
  6432. JMP memmove_end_copy_emit_remainder_encodeBlockAsmAvx
  6433. emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_65through128:
  6434. MOVOU (CX), X0
  6435. MOVOU 16(CX), X1
  6436. MOVOU 32(CX), X2
  6437. MOVOU 48(CX), X3
  6438. MOVOU -64(CX)(BX*1), X12
  6439. MOVOU -48(CX)(BX*1), X13
  6440. MOVOU -32(CX)(BX*1), X14
  6441. MOVOU -16(CX)(BX*1), X15
  6442. MOVOU X0, (AX)
  6443. MOVOU X1, 16(AX)
  6444. MOVOU X2, 32(AX)
  6445. MOVOU X3, 48(AX)
  6446. MOVOU X12, -64(AX)(BX*1)
  6447. MOVOU X13, -48(AX)(BX*1)
  6448. MOVOU X14, -32(AX)(BX*1)
  6449. MOVOU X15, -16(AX)(BX*1)
  6450. JMP memmove_end_copy_emit_remainder_encodeBlockAsmAvx
  6451. emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_129through256:
  6452. MOVOU (CX), X0
  6453. MOVOU 16(CX), X1
  6454. MOVOU 32(CX), X2
  6455. MOVOU 48(CX), X3
  6456. MOVOU 64(CX), X4
  6457. MOVOU 80(CX), X5
  6458. MOVOU 96(CX), X6
  6459. MOVOU 112(CX), X7
  6460. MOVOU -128(CX)(BX*1), X8
  6461. MOVOU -112(CX)(BX*1), X9
  6462. MOVOU -96(CX)(BX*1), X10
  6463. MOVOU -80(CX)(BX*1), X11
  6464. MOVOU -64(CX)(BX*1), X12
  6465. MOVOU -48(CX)(BX*1), X13
  6466. MOVOU -32(CX)(BX*1), X14
  6467. MOVOU -16(CX)(BX*1), X15
  6468. MOVOU X0, (AX)
  6469. MOVOU X1, 16(AX)
  6470. MOVOU X2, 32(AX)
  6471. MOVOU X3, 48(AX)
  6472. MOVOU X4, 64(AX)
  6473. MOVOU X5, 80(AX)
  6474. MOVOU X6, 96(AX)
  6475. MOVOU X7, 112(AX)
  6476. MOVOU X8, -128(AX)(BX*1)
  6477. MOVOU X9, -112(AX)(BX*1)
  6478. MOVOU X10, -96(AX)(BX*1)
  6479. MOVOU X11, -80(AX)(BX*1)
  6480. MOVOU X12, -64(AX)(BX*1)
  6481. MOVOU X13, -48(AX)(BX*1)
  6482. MOVOU X14, -32(AX)(BX*1)
  6483. MOVOU X15, -16(AX)(BX*1)
  6484. JMP memmove_end_copy_emit_remainder_encodeBlockAsmAvx
  6485. emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_256through2048:
  6486. LEAQ -256(BX), BX
  6487. MOVOU (CX), X0
  6488. MOVOU 16(CX), X1
  6489. MOVOU 32(CX), X2
  6490. MOVOU 48(CX), X3
  6491. MOVOU 64(CX), X4
  6492. MOVOU 80(CX), X5
  6493. MOVOU 96(CX), X6
  6494. MOVOU 112(CX), X7
  6495. MOVOU 128(CX), X8
  6496. MOVOU 144(CX), X9
  6497. MOVOU 160(CX), X10
  6498. MOVOU 176(CX), X11
  6499. MOVOU 192(CX), X12
  6500. MOVOU 208(CX), X13
  6501. MOVOU 224(CX), X14
  6502. MOVOU 240(CX), X15
  6503. MOVOU X0, (AX)
  6504. MOVOU X1, 16(AX)
  6505. MOVOU X2, 32(AX)
  6506. MOVOU X3, 48(AX)
  6507. MOVOU X4, 64(AX)
  6508. MOVOU X5, 80(AX)
  6509. MOVOU X6, 96(AX)
  6510. MOVOU X7, 112(AX)
  6511. MOVOU X8, 128(AX)
  6512. MOVOU X9, 144(AX)
  6513. MOVOU X10, 160(AX)
  6514. MOVOU X11, 176(AX)
  6515. MOVOU X12, 192(AX)
  6516. MOVOU X13, 208(AX)
  6517. MOVOU X14, 224(AX)
  6518. MOVOU X15, 240(AX)
  6519. CMPQ BX, $0x00000100
  6520. LEAQ 256(CX), CX
  6521. LEAQ 256(AX), AX
  6522. JGE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_256through2048
  6523. JMP emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_tail
  6524. emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_avxUnaligned:
  6525. LEAQ (CX)(BX*1), SI
  6526. MOVQ AX, R8
  6527. MOVOU -128(SI), X5
  6528. MOVOU -112(SI), X6
  6529. MOVQ $0x00000080, BP
  6530. ANDQ $0xffffffe0, AX
  6531. ADDQ $0x20, AX
  6532. MOVOU -96(SI), X7
  6533. MOVOU -80(SI), X8
  6534. MOVQ AX, DI
  6535. SUBQ R8, DI
  6536. MOVOU -64(SI), X9
  6537. MOVOU -48(SI), X10
  6538. SUBQ DI, BX
  6539. MOVOU -32(SI), X11
  6540. MOVOU -16(SI), X12
  6541. VMOVDQU (CX), Y4
  6542. ADDQ DI, CX
  6543. SUBQ BP, BX
  6544. emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_gobble_128_loop:
  6545. VMOVDQU (CX), Y0
  6546. VMOVDQU 32(CX), Y1
  6547. VMOVDQU 64(CX), Y2
  6548. VMOVDQU 96(CX), Y3
  6549. ADDQ BP, CX
  6550. VMOVDQA Y0, (AX)
  6551. VMOVDQA Y1, 32(AX)
  6552. VMOVDQA Y2, 64(AX)
  6553. VMOVDQA Y3, 96(AX)
  6554. ADDQ BP, AX
  6555. SUBQ BP, BX
  6556. JA emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_gobble_128_loop
  6557. ADDQ BP, BX
  6558. ADDQ AX, BX
  6559. VMOVDQU Y4, (R8)
  6560. VZEROUPPER
  6561. MOVOU X5, -128(BX)
  6562. MOVOU X6, -112(BX)
  6563. MOVOU X7, -96(BX)
  6564. MOVOU X8, -80(BX)
  6565. MOVOU X9, -64(BX)
  6566. MOVOU X10, -48(BX)
  6567. MOVOU X11, -32(BX)
  6568. MOVOU X12, -16(BX)
  6569. memmove_end_copy_emit_remainder_encodeBlockAsmAvx:
  6570. MOVQ DX, AX
  6571. emit_literal_done_emit_remainder_encodeBlockAsmAvx:
  6572. MOVQ dst_base+0(FP), CX
  6573. SUBQ CX, AX
  6574. MOVQ AX, ret+48(FP)
  6575. RET
  6576. // func encodeBlockAsm12BAvx(dst []byte, src []byte) int
  6577. // Requires: AVX, SSE2
  6578. TEXT ·encodeBlockAsm12BAvx(SB), $16408-56
  6579. MOVQ dst_base+0(FP), AX
  6580. MOVQ $0x00000080, CX
  6581. LEAQ 24(SP), DX
  6582. PXOR X0, X0
  6583. zero_loop_encodeBlockAsm12BAvx:
  6584. MOVOU X0, (DX)
  6585. MOVOU X0, 16(DX)
  6586. MOVOU X0, 32(DX)
  6587. MOVOU X0, 48(DX)
  6588. MOVOU X0, 64(DX)
  6589. MOVOU X0, 80(DX)
  6590. MOVOU X0, 96(DX)
  6591. MOVOU X0, 112(DX)
  6592. ADDQ $0x80, DX
  6593. DECQ CX
  6594. JNZ zero_loop_encodeBlockAsm12BAvx
  6595. MOVL $0x00000000, 12(SP)
  6596. MOVQ src_len+32(FP), CX
  6597. LEAQ -5(CX), DX
  6598. LEAQ -8(CX), BP
  6599. MOVL BP, 8(SP)
  6600. SHRQ $0x05, CX
  6601. SUBL CX, DX
  6602. LEAQ (AX)(DX*1), DX
  6603. MOVQ DX, (SP)
  6604. MOVL $0x00000001, CX
  6605. MOVL CX, 16(SP)
  6606. MOVQ src_base+24(FP), DX
  6607. search_loop_encodeBlockAsm12BAvx:
  6608. MOVQ (DX)(CX*1), SI
  6609. MOVL CX, BP
  6610. SUBL 12(SP), BP
  6611. SHRL $0x05, BP
  6612. LEAL 4(CX)(BP*1), BP
  6613. MOVL 8(SP), DI
  6614. CMPL BP, DI
  6615. JGT emit_remainder_encodeBlockAsm12BAvx
  6616. MOVL BP, 20(SP)
  6617. MOVQ $0x000000cf1bbcdcbb, R8
  6618. MOVQ SI, R9
  6619. MOVQ SI, R10
  6620. SHRQ $0x08, R10
  6621. SHLQ $0x18, R9
  6622. IMULQ R8, R9
  6623. SHRQ $0x34, R9
  6624. SHLQ $0x18, R10
  6625. IMULQ R8, R10
  6626. SHRQ $0x34, R10
  6627. MOVL 24(SP)(R9*4), BP
  6628. MOVL 24(SP)(R10*4), DI
  6629. MOVL CX, 24(SP)(R9*4)
  6630. LEAL 1(CX), R9
  6631. MOVL R9, 24(SP)(R10*4)
  6632. MOVQ SI, R9
  6633. SHRQ $0x10, R9
  6634. SHLQ $0x18, R9
  6635. IMULQ R8, R9
  6636. SHRQ $0x34, R9
  6637. MOVL CX, R8
  6638. SUBL 16(SP), R8
  6639. MOVL 1(DX)(R8*1), R10
  6640. MOVQ SI, R8
  6641. SHRQ $0x08, R8
  6642. CMPL R8, R10
  6643. JNE no_repeat_found_encodeBlockAsm12BAvx
  6644. LEAL 1(CX), SI
  6645. MOVL 12(SP), DI
  6646. MOVL SI, BP
  6647. SUBL 16(SP), BP
  6648. JZ repeat_extend_back_end_encodeBlockAsm12BAvx
  6649. repeat_extend_back_loop_encodeBlockAsm12BAvx:
  6650. CMPL SI, DI
  6651. JLE repeat_extend_back_end_encodeBlockAsm12BAvx
  6652. MOVB -1(DX)(BP*1), BL
  6653. MOVB -1(DX)(SI*1), R8
  6654. CMPB BL, R8
  6655. JNE repeat_extend_back_end_encodeBlockAsm12BAvx
  6656. LEAL -1(SI), SI
  6657. DECL BP
  6658. JNZ repeat_extend_back_loop_encodeBlockAsm12BAvx
  6659. repeat_extend_back_end_encodeBlockAsm12BAvx:
  6660. MOVL 12(SP), BP
  6661. CMPL BP, SI
  6662. JEQ emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
  6663. MOVL SI, R8
  6664. MOVL SI, 12(SP)
  6665. LEAQ (DX)(BP*1), R9
  6666. SUBL BP, R8
  6667. MOVL R8, BP
  6668. SUBL $0x01, BP
  6669. JC emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
  6670. CMPL BP, $0x3c
  6671. JLT one_byte_repeat_emit_encodeBlockAsm12BAvx
  6672. CMPL BP, $0x00000100
  6673. JLT two_bytes_repeat_emit_encodeBlockAsm12BAvx
  6674. CMPL BP, $0x00010000
  6675. JLT three_bytes_repeat_emit_encodeBlockAsm12BAvx
  6676. CMPL BP, $0x01000000
  6677. JLT four_bytes_repeat_emit_encodeBlockAsm12BAvx
  6678. MOVB $0xfc, (AX)
  6679. MOVL BP, 1(AX)
  6680. ADDQ $0x05, AX
  6681. JMP memmove_repeat_emit_encodeBlockAsm12BAvx
  6682. four_bytes_repeat_emit_encodeBlockAsm12BAvx:
  6683. MOVL BP, R10
  6684. SHRL $0x10, R10
  6685. MOVB $0xf8, (AX)
  6686. MOVW BP, 1(AX)
  6687. MOVB R10, 3(AX)
  6688. ADDQ $0x04, AX
  6689. JMP memmove_repeat_emit_encodeBlockAsm12BAvx
  6690. three_bytes_repeat_emit_encodeBlockAsm12BAvx:
  6691. MOVB $0xf4, (AX)
  6692. MOVW BP, 1(AX)
  6693. ADDQ $0x03, AX
  6694. JMP memmove_repeat_emit_encodeBlockAsm12BAvx
  6695. two_bytes_repeat_emit_encodeBlockAsm12BAvx:
  6696. MOVB $0xf0, (AX)
  6697. MOVB BP, 1(AX)
  6698. ADDQ $0x02, AX
  6699. JMP memmove_repeat_emit_encodeBlockAsm12BAvx
  6700. one_byte_repeat_emit_encodeBlockAsm12BAvx:
  6701. SHLB $0x02, BP
  6702. MOVB BP, (AX)
  6703. ADDQ $0x01, AX
  6704. memmove_repeat_emit_encodeBlockAsm12BAvx:
  6705. LEAQ (AX)(R8*1), BP
  6706. NOP
  6707. emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_tail:
  6708. TESTQ R8, R8
  6709. JEQ memmove_end_copy_repeat_emit_encodeBlockAsm12BAvx
  6710. CMPQ R8, $0x02
  6711. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_1or2
  6712. CMPQ R8, $0x04
  6713. JB emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_3
  6714. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_4
  6715. CMPQ R8, $0x08
  6716. JB emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_5through7
  6717. JE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_8
  6718. CMPQ R8, $0x10
  6719. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_9through16
  6720. CMPQ R8, $0x20
  6721. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_17through32
  6722. CMPQ R8, $0x40
  6723. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_33through64
  6724. CMPQ R8, $0x80
  6725. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_65through128
  6726. CMPQ R8, $0x00000100
  6727. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_129through256
  6728. JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_avxUnaligned
  6729. emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_1or2:
  6730. MOVB (R9), R10
  6731. MOVB -1(R9)(R8*1), R11
  6732. MOVB R10, (AX)
  6733. MOVB R11, -1(AX)(R8*1)
  6734. JMP memmove_end_copy_repeat_emit_encodeBlockAsm12BAvx
  6735. emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_4:
  6736. MOVL (R9), R10
  6737. MOVL R10, (AX)
  6738. JMP memmove_end_copy_repeat_emit_encodeBlockAsm12BAvx
  6739. emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_3:
  6740. MOVW (R9), R10
  6741. MOVB 2(R9), R11
  6742. MOVW R10, (AX)
  6743. MOVB R11, 2(AX)
  6744. JMP memmove_end_copy_repeat_emit_encodeBlockAsm12BAvx
  6745. emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_5through7:
  6746. MOVL (R9), R10
  6747. MOVL -4(R9)(R8*1), R11
  6748. MOVL R10, (AX)
  6749. MOVL R11, -4(AX)(R8*1)
  6750. JMP memmove_end_copy_repeat_emit_encodeBlockAsm12BAvx
  6751. emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_8:
  6752. MOVQ (R9), R10
  6753. MOVQ R10, (AX)
  6754. JMP memmove_end_copy_repeat_emit_encodeBlockAsm12BAvx
  6755. emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_9through16:
  6756. MOVQ (R9), R10
  6757. MOVQ -8(R9)(R8*1), R11
  6758. MOVQ R10, (AX)
  6759. MOVQ R11, -8(AX)(R8*1)
  6760. JMP memmove_end_copy_repeat_emit_encodeBlockAsm12BAvx
  6761. emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_17through32:
  6762. MOVOU (R9), X0
  6763. MOVOU -16(R9)(R8*1), X1
  6764. MOVOU X0, (AX)
  6765. MOVOU X1, -16(AX)(R8*1)
  6766. JMP memmove_end_copy_repeat_emit_encodeBlockAsm12BAvx
  6767. emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_33through64:
  6768. MOVOU (R9), X0
  6769. MOVOU 16(R9), X1
  6770. MOVOU -32(R9)(R8*1), X2
  6771. MOVOU -16(R9)(R8*1), X3
  6772. MOVOU X0, (AX)
  6773. MOVOU X1, 16(AX)
  6774. MOVOU X2, -32(AX)(R8*1)
  6775. MOVOU X3, -16(AX)(R8*1)
  6776. JMP memmove_end_copy_repeat_emit_encodeBlockAsm12BAvx
  6777. emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_65through128:
  6778. MOVOU (R9), X0
  6779. MOVOU 16(R9), X1
  6780. MOVOU 32(R9), X2
  6781. MOVOU 48(R9), X3
  6782. MOVOU -64(R9)(R8*1), X12
  6783. MOVOU -48(R9)(R8*1), X13
  6784. MOVOU -32(R9)(R8*1), X14
  6785. MOVOU -16(R9)(R8*1), X15
  6786. MOVOU X0, (AX)
  6787. MOVOU X1, 16(AX)
  6788. MOVOU X2, 32(AX)
  6789. MOVOU X3, 48(AX)
  6790. MOVOU X12, -64(AX)(R8*1)
  6791. MOVOU X13, -48(AX)(R8*1)
  6792. MOVOU X14, -32(AX)(R8*1)
  6793. MOVOU X15, -16(AX)(R8*1)
  6794. JMP memmove_end_copy_repeat_emit_encodeBlockAsm12BAvx
  6795. emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_129through256:
  6796. MOVOU (R9), X0
  6797. MOVOU 16(R9), X1
  6798. MOVOU 32(R9), X2
  6799. MOVOU 48(R9), X3
  6800. MOVOU 64(R9), X4
  6801. MOVOU 80(R9), X5
  6802. MOVOU 96(R9), X6
  6803. MOVOU 112(R9), X7
  6804. MOVOU -128(R9)(R8*1), X8
  6805. MOVOU -112(R9)(R8*1), X9
  6806. MOVOU -96(R9)(R8*1), X10
  6807. MOVOU -80(R9)(R8*1), X11
  6808. MOVOU -64(R9)(R8*1), X12
  6809. MOVOU -48(R9)(R8*1), X13
  6810. MOVOU -32(R9)(R8*1), X14
  6811. MOVOU -16(R9)(R8*1), X15
  6812. MOVOU X0, (AX)
  6813. MOVOU X1, 16(AX)
  6814. MOVOU X2, 32(AX)
  6815. MOVOU X3, 48(AX)
  6816. MOVOU X4, 64(AX)
  6817. MOVOU X5, 80(AX)
  6818. MOVOU X6, 96(AX)
  6819. MOVOU X7, 112(AX)
  6820. MOVOU X8, -128(AX)(R8*1)
  6821. MOVOU X9, -112(AX)(R8*1)
  6822. MOVOU X10, -96(AX)(R8*1)
  6823. MOVOU X11, -80(AX)(R8*1)
  6824. MOVOU X12, -64(AX)(R8*1)
  6825. MOVOU X13, -48(AX)(R8*1)
  6826. MOVOU X14, -32(AX)(R8*1)
  6827. MOVOU X15, -16(AX)(R8*1)
  6828. JMP memmove_end_copy_repeat_emit_encodeBlockAsm12BAvx
  6829. emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_256through2048:
  6830. LEAQ -256(R8), R8
  6831. MOVOU (R9), X0
  6832. MOVOU 16(R9), X1
  6833. MOVOU 32(R9), X2
  6834. MOVOU 48(R9), X3
  6835. MOVOU 64(R9), X4
  6836. MOVOU 80(R9), X5
  6837. MOVOU 96(R9), X6
  6838. MOVOU 112(R9), X7
  6839. MOVOU 128(R9), X8
  6840. MOVOU 144(R9), X9
  6841. MOVOU 160(R9), X10
  6842. MOVOU 176(R9), X11
  6843. MOVOU 192(R9), X12
  6844. MOVOU 208(R9), X13
  6845. MOVOU 224(R9), X14
  6846. MOVOU 240(R9), X15
  6847. MOVOU X0, (AX)
  6848. MOVOU X1, 16(AX)
  6849. MOVOU X2, 32(AX)
  6850. MOVOU X3, 48(AX)
  6851. MOVOU X4, 64(AX)
  6852. MOVOU X5, 80(AX)
  6853. MOVOU X6, 96(AX)
  6854. MOVOU X7, 112(AX)
  6855. MOVOU X8, 128(AX)
  6856. MOVOU X9, 144(AX)
  6857. MOVOU X10, 160(AX)
  6858. MOVOU X11, 176(AX)
  6859. MOVOU X12, 192(AX)
  6860. MOVOU X13, 208(AX)
  6861. MOVOU X14, 224(AX)
  6862. MOVOU X15, 240(AX)
  6863. CMPQ R8, $0x00000100
  6864. LEAQ 256(R9), R9
  6865. LEAQ 256(AX), AX
  6866. JGE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_256through2048
  6867. JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_tail
  6868. emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_avxUnaligned:
  6869. LEAQ (R9)(R8*1), R11
  6870. MOVQ AX, R13
  6871. MOVOU -128(R11), X5
  6872. MOVOU -112(R11), X6
  6873. MOVQ $0x00000080, R10
  6874. ANDQ $0xffffffe0, AX
  6875. ADDQ $0x20, AX
  6876. MOVOU -96(R11), X7
  6877. MOVOU -80(R11), X8
  6878. MOVQ AX, R12
  6879. SUBQ R13, R12
  6880. MOVOU -64(R11), X9
  6881. MOVOU -48(R11), X10
  6882. SUBQ R12, R8
  6883. MOVOU -32(R11), X11
  6884. MOVOU -16(R11), X12
  6885. VMOVDQU (R9), Y4
  6886. ADDQ R12, R9
  6887. SUBQ R10, R8
  6888. emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_gobble_128_loop:
  6889. VMOVDQU (R9), Y0
  6890. VMOVDQU 32(R9), Y1
  6891. VMOVDQU 64(R9), Y2
  6892. VMOVDQU 96(R9), Y3
  6893. ADDQ R10, R9
  6894. VMOVDQA Y0, (AX)
  6895. VMOVDQA Y1, 32(AX)
  6896. VMOVDQA Y2, 64(AX)
  6897. VMOVDQA Y3, 96(AX)
  6898. ADDQ R10, AX
  6899. SUBQ R10, R8
  6900. JA emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_gobble_128_loop
  6901. ADDQ R10, R8
  6902. ADDQ AX, R8
  6903. VMOVDQU Y4, (R13)
  6904. VZEROUPPER
  6905. MOVOU X5, -128(R8)
  6906. MOVOU X6, -112(R8)
  6907. MOVOU X7, -96(R8)
  6908. MOVOU X8, -80(R8)
  6909. MOVOU X9, -64(R8)
  6910. MOVOU X10, -48(R8)
  6911. MOVOU X11, -32(R8)
  6912. MOVOU X12, -16(R8)
  6913. memmove_end_copy_repeat_emit_encodeBlockAsm12BAvx:
  6914. MOVQ BP, AX
  6915. emit_literal_done_repeat_emit_encodeBlockAsm12BAvx:
  6916. ADDL $0x05, CX
  6917. MOVL CX, BP
  6918. SUBL 16(SP), BP
  6919. MOVQ src_len+32(FP), R8
  6920. SUBL CX, R8
  6921. LEAQ (DX)(CX*1), R9
  6922. LEAQ (DX)(BP*1), BP
  6923. XORL R11, R11
  6924. CMPL R8, $0x08
  6925. JL matchlen_single_repeat_extend
  6926. matchlen_loopback_repeat_extend:
  6927. MOVQ (R9)(R11*1), R10
  6928. XORQ (BP)(R11*1), R10
  6929. TESTQ R10, R10
  6930. JZ matchlen_loop_repeat_extend
  6931. BSFQ R10, R10
  6932. SARQ $0x03, R10
  6933. LEAL (R11)(R10*1), R11
  6934. JMP repeat_extend_forward_end_encodeBlockAsm12BAvx
  6935. matchlen_loop_repeat_extend:
  6936. LEAL -8(R8), R8
  6937. LEAL 8(R11), R11
  6938. CMPL R8, $0x08
  6939. JGE matchlen_loopback_repeat_extend
  6940. matchlen_single_repeat_extend:
  6941. TESTL R8, R8
  6942. JZ repeat_extend_forward_end_encodeBlockAsm12BAvx
  6943. matchlen_single_loopback_repeat_extend:
  6944. MOVB (R9)(R11*1), R10
  6945. CMPB (BP)(R11*1), R10
  6946. JNE repeat_extend_forward_end_encodeBlockAsm12BAvx
  6947. LEAL 1(R11), R11
  6948. DECL R8
  6949. JNZ matchlen_single_loopback_repeat_extend
  6950. repeat_extend_forward_end_encodeBlockAsm12BAvx:
  6951. ADDL R11, CX
  6952. MOVL CX, BP
  6953. SUBL SI, BP
  6954. MOVL 16(SP), SI
  6955. TESTL DI, DI
  6956. JZ repeat_as_copy_encodeBlockAsm12BAvx
  6957. emit_repeat_again_match_repeat_encodeBlockAsm12BAvx:
  6958. MOVL BP, DI
  6959. LEAL -4(BP), BP
  6960. CMPL DI, $0x08
  6961. JLE repeat_two_match_repeat_encodeBlockAsm12BAvx
  6962. CMPL DI, $0x0c
  6963. JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm12BAvx
  6964. CMPL SI, $0x00000800
  6965. JLT repeat_two_offset_match_repeat_encodeBlockAsm12BAvx
  6966. cant_repeat_two_offset_match_repeat_encodeBlockAsm12BAvx:
  6967. CMPL BP, $0x00000104
  6968. JLT repeat_three_match_repeat_encodeBlockAsm12BAvx
  6969. CMPL BP, $0x00010100
  6970. JLT repeat_four_match_repeat_encodeBlockAsm12BAvx
  6971. CMPL BP, $0x0100ffff
  6972. JLT repeat_five_match_repeat_encodeBlockAsm12BAvx
  6973. LEAL -16842747(BP), BP
  6974. MOVW $0x001d, (AX)
  6975. MOVW $0xfffb, 2(AX)
  6976. MOVB $0xff, 4(AX)
  6977. ADDQ $0x05, AX
  6978. JMP emit_repeat_again_match_repeat_encodeBlockAsm12BAvx
  6979. repeat_five_match_repeat_encodeBlockAsm12BAvx:
  6980. LEAL -65536(BP), BP
  6981. MOVL BP, SI
  6982. MOVW $0x001d, (AX)
  6983. MOVW BP, 2(AX)
  6984. SARL $0x10, SI
  6985. MOVB SI, 4(AX)
  6986. ADDQ $0x05, AX
  6987. JMP repeat_end_emit_encodeBlockAsm12BAvx
  6988. repeat_four_match_repeat_encodeBlockAsm12BAvx:
  6989. LEAL -256(BP), BP
  6990. MOVW $0x0019, (AX)
  6991. MOVW BP, 2(AX)
  6992. ADDQ $0x04, AX
  6993. JMP repeat_end_emit_encodeBlockAsm12BAvx
  6994. repeat_three_match_repeat_encodeBlockAsm12BAvx:
  6995. LEAL -4(BP), BP
  6996. MOVW $0x0015, (AX)
  6997. MOVB BP, 2(AX)
  6998. ADDQ $0x03, AX
  6999. JMP repeat_end_emit_encodeBlockAsm12BAvx
  7000. repeat_two_match_repeat_encodeBlockAsm12BAvx:
  7001. SHLL $0x02, BP
  7002. ORL $0x01, BP
  7003. MOVW BP, (AX)
  7004. ADDQ $0x02, AX
  7005. JMP repeat_end_emit_encodeBlockAsm12BAvx
  7006. repeat_two_offset_match_repeat_encodeBlockAsm12BAvx:
  7007. XORQ DI, DI
  7008. LEAL 1(DI)(BP*4), BP
  7009. MOVB SI, 1(AX)
  7010. SARL $0x08, SI
  7011. SHLL $0x05, SI
  7012. ORL SI, BP
  7013. MOVB BP, (AX)
  7014. ADDQ $0x02, AX
  7015. JMP repeat_end_emit_encodeBlockAsm12BAvx
  7016. repeat_as_copy_encodeBlockAsm12BAvx:
  7017. CMPL SI, $0x00010000
  7018. JL two_byte_offset_repeat_as_copy_encodeBlockAsm12BAvx
  7019. four_bytes_loop_back_repeat_as_copy_encodeBlockAsm12BAvx:
  7020. CMPL BP, $0x40
  7021. JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm12BAvx
  7022. MOVB $0xff, (AX)
  7023. MOVL SI, 1(AX)
  7024. LEAL -64(BP), BP
  7025. ADDQ $0x05, AX
  7026. CMPL BP, $0x04
  7027. JL four_bytes_remain_repeat_as_copy_encodeBlockAsm12BAvx
  7028. emit_repeat_again_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy:
  7029. MOVL BP, DI
  7030. LEAL -4(BP), BP
  7031. CMPL DI, $0x08
  7032. JLE repeat_two_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy
  7033. CMPL DI, $0x0c
  7034. JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy
  7035. CMPL SI, $0x00000800
  7036. JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy
  7037. cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy:
  7038. CMPL BP, $0x00000104
  7039. JLT repeat_three_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy
  7040. CMPL BP, $0x00010100
  7041. JLT repeat_four_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy
  7042. CMPL BP, $0x0100ffff
  7043. JLT repeat_five_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy
  7044. LEAL -16842747(BP), BP
  7045. MOVW $0x001d, (AX)
  7046. MOVW $0xfffb, 2(AX)
  7047. MOVB $0xff, 4(AX)
  7048. ADDQ $0x05, AX
  7049. JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy
  7050. repeat_five_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy:
  7051. LEAL -65536(BP), BP
  7052. MOVL BP, SI
  7053. MOVW $0x001d, (AX)
  7054. MOVW BP, 2(AX)
  7055. SARL $0x10, SI
  7056. MOVB SI, 4(AX)
  7057. ADDQ $0x05, AX
  7058. JMP repeat_end_emit_encodeBlockAsm12BAvx
  7059. repeat_four_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy:
  7060. LEAL -256(BP), BP
  7061. MOVW $0x0019, (AX)
  7062. MOVW BP, 2(AX)
  7063. ADDQ $0x04, AX
  7064. JMP repeat_end_emit_encodeBlockAsm12BAvx
  7065. repeat_three_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy:
  7066. LEAL -4(BP), BP
  7067. MOVW $0x0015, (AX)
  7068. MOVB BP, 2(AX)
  7069. ADDQ $0x03, AX
  7070. JMP repeat_end_emit_encodeBlockAsm12BAvx
  7071. repeat_two_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy:
  7072. SHLL $0x02, BP
  7073. ORL $0x01, BP
  7074. MOVW BP, (AX)
  7075. ADDQ $0x02, AX
  7076. JMP repeat_end_emit_encodeBlockAsm12BAvx
  7077. repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy:
  7078. XORQ DI, DI
  7079. LEAL 1(DI)(BP*4), BP
  7080. MOVB SI, 1(AX)
  7081. SARL $0x08, SI
  7082. SHLL $0x05, SI
  7083. ORL SI, BP
  7084. MOVB BP, (AX)
  7085. ADDQ $0x02, AX
  7086. JMP repeat_end_emit_encodeBlockAsm12BAvx
  7087. JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm12BAvx
  7088. four_bytes_remain_repeat_as_copy_encodeBlockAsm12BAvx:
  7089. TESTL BP, BP
  7090. JZ repeat_end_emit_encodeBlockAsm12BAvx
  7091. MOVB $0x03, BL
  7092. LEAL -4(BX)(BP*4), BP
  7093. MOVB BP, (AX)
  7094. MOVL SI, 1(AX)
  7095. ADDQ $0x05, AX
  7096. JMP repeat_end_emit_encodeBlockAsm12BAvx
  7097. two_byte_offset_repeat_as_copy_encodeBlockAsm12BAvx:
  7098. CMPL BP, $0x40
  7099. JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm12BAvx
  7100. MOVB $0xee, (AX)
  7101. MOVW SI, 1(AX)
  7102. LEAL -60(BP), BP
  7103. ADDQ $0x03, AX
  7104. emit_repeat_again_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short:
  7105. MOVL BP, DI
  7106. LEAL -4(BP), BP
  7107. CMPL DI, $0x08
  7108. JLE repeat_two_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short
  7109. CMPL DI, $0x0c
  7110. JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short
  7111. CMPL SI, $0x00000800
  7112. JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short
  7113. cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short:
  7114. CMPL BP, $0x00000104
  7115. JLT repeat_three_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short
  7116. CMPL BP, $0x00010100
  7117. JLT repeat_four_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short
  7118. CMPL BP, $0x0100ffff
  7119. JLT repeat_five_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short
  7120. LEAL -16842747(BP), BP
  7121. MOVW $0x001d, (AX)
  7122. MOVW $0xfffb, 2(AX)
  7123. MOVB $0xff, 4(AX)
  7124. ADDQ $0x05, AX
  7125. JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short
  7126. repeat_five_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short:
  7127. LEAL -65536(BP), BP
  7128. MOVL BP, SI
  7129. MOVW $0x001d, (AX)
  7130. MOVW BP, 2(AX)
  7131. SARL $0x10, SI
  7132. MOVB SI, 4(AX)
  7133. ADDQ $0x05, AX
  7134. JMP repeat_end_emit_encodeBlockAsm12BAvx
  7135. repeat_four_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short:
  7136. LEAL -256(BP), BP
  7137. MOVW $0x0019, (AX)
  7138. MOVW BP, 2(AX)
  7139. ADDQ $0x04, AX
  7140. JMP repeat_end_emit_encodeBlockAsm12BAvx
  7141. repeat_three_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short:
  7142. LEAL -4(BP), BP
  7143. MOVW $0x0015, (AX)
  7144. MOVB BP, 2(AX)
  7145. ADDQ $0x03, AX
  7146. JMP repeat_end_emit_encodeBlockAsm12BAvx
  7147. repeat_two_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short:
  7148. SHLL $0x02, BP
  7149. ORL $0x01, BP
  7150. MOVW BP, (AX)
  7151. ADDQ $0x02, AX
  7152. JMP repeat_end_emit_encodeBlockAsm12BAvx
  7153. repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short:
  7154. XORQ DI, DI
  7155. LEAL 1(DI)(BP*4), BP
  7156. MOVB SI, 1(AX)
  7157. SARL $0x08, SI
  7158. SHLL $0x05, SI
  7159. ORL SI, BP
  7160. MOVB BP, (AX)
  7161. ADDQ $0x02, AX
  7162. JMP repeat_end_emit_encodeBlockAsm12BAvx
  7163. JMP two_byte_offset_repeat_as_copy_encodeBlockAsm12BAvx
  7164. two_byte_offset_short_repeat_as_copy_encodeBlockAsm12BAvx:
  7165. CMPL BP, $0x0c
  7166. JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12BAvx
  7167. CMPL SI, $0x00000800
  7168. JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12BAvx
  7169. MOVB $0x01, BL
  7170. LEAL -16(BX)(BP*4), BP
  7171. MOVB SI, 1(AX)
  7172. SHRL $0x08, SI
  7173. SHLL $0x05, SI
  7174. ORL SI, BP
  7175. MOVB BP, (AX)
  7176. ADDQ $0x02, AX
  7177. JMP repeat_end_emit_encodeBlockAsm12BAvx
  7178. emit_copy_three_repeat_as_copy_encodeBlockAsm12BAvx:
  7179. MOVB $0x02, BL
  7180. LEAL -4(BX)(BP*4), BP
  7181. MOVB BP, (AX)
  7182. MOVW SI, 1(AX)
  7183. ADDQ $0x03, AX
  7184. repeat_end_emit_encodeBlockAsm12BAvx:
  7185. MOVL CX, 12(SP)
  7186. CMPL CX, 8(SP)
  7187. JGE emit_remainder_encodeBlockAsm12BAvx
  7188. JMP search_loop_encodeBlockAsm12BAvx
  7189. no_repeat_found_encodeBlockAsm12BAvx:
  7190. CMPL (DX)(BP*1), SI
  7191. JEQ candidate_match_encodeBlockAsm12BAvx
  7192. SHRQ $0x08, SI
  7193. MOVL 24(SP)(R9*4), BP
  7194. LEAL 2(CX), R8
  7195. CMPL (DX)(DI*1), SI
  7196. JEQ candidate2_match_encodeBlockAsm12BAvx
  7197. MOVL R8, 24(SP)(R9*4)
  7198. SHRQ $0x08, SI
  7199. CMPL (DX)(BP*1), SI
  7200. JEQ candidate3_match_encodeBlockAsm12BAvx
  7201. MOVL 20(SP), CX
  7202. JMP search_loop_encodeBlockAsm12BAvx
  7203. candidate3_match_encodeBlockAsm12BAvx:
  7204. ADDL $0x02, CX
  7205. JMP candidate_match_encodeBlockAsm12BAvx
  7206. candidate2_match_encodeBlockAsm12BAvx:
  7207. MOVL R8, 24(SP)(R9*4)
  7208. INCL CX
  7209. MOVL DI, BP
  7210. candidate_match_encodeBlockAsm12BAvx:
  7211. MOVL 12(SP), SI
  7212. TESTL BP, BP
  7213. JZ match_extend_back_end_encodeBlockAsm12BAvx
  7214. match_extend_back_loop_encodeBlockAsm12BAvx:
  7215. CMPL CX, SI
  7216. JLE match_extend_back_end_encodeBlockAsm12BAvx
  7217. MOVB -1(DX)(BP*1), BL
  7218. MOVB -1(DX)(CX*1), DI
  7219. CMPB BL, DI
  7220. JNE match_extend_back_end_encodeBlockAsm12BAvx
  7221. LEAL -1(CX), CX
  7222. DECL BP
  7223. JZ match_extend_back_end_encodeBlockAsm12BAvx
  7224. JMP match_extend_back_loop_encodeBlockAsm12BAvx
  7225. match_extend_back_end_encodeBlockAsm12BAvx:
  7226. MOVL CX, SI
  7227. SUBL 12(SP), SI
  7228. LEAQ 4(AX)(SI*1), SI
  7229. CMPQ SI, (SP)
  7230. JL match_dst_size_check_encodeBlockAsm12BAvx
  7231. MOVQ $0x00000000, ret+48(FP)
  7232. RET
  7233. match_dst_size_check_encodeBlockAsm12BAvx:
  7234. MOVL CX, SI
  7235. MOVL 12(SP), DI
  7236. CMPL DI, SI
  7237. JEQ emit_literal_done_match_emit_encodeBlockAsm12BAvx
  7238. MOVL SI, R8
  7239. MOVL SI, 12(SP)
  7240. LEAQ (DX)(DI*1), SI
  7241. SUBL DI, R8
  7242. MOVL R8, DI
  7243. SUBL $0x01, DI
  7244. JC emit_literal_done_match_emit_encodeBlockAsm12BAvx
  7245. CMPL DI, $0x3c
  7246. JLT one_byte_match_emit_encodeBlockAsm12BAvx
  7247. CMPL DI, $0x00000100
  7248. JLT two_bytes_match_emit_encodeBlockAsm12BAvx
  7249. CMPL DI, $0x00010000
  7250. JLT three_bytes_match_emit_encodeBlockAsm12BAvx
  7251. CMPL DI, $0x01000000
  7252. JLT four_bytes_match_emit_encodeBlockAsm12BAvx
  7253. MOVB $0xfc, (AX)
  7254. MOVL DI, 1(AX)
  7255. ADDQ $0x05, AX
  7256. JMP memmove_match_emit_encodeBlockAsm12BAvx
  7257. four_bytes_match_emit_encodeBlockAsm12BAvx:
  7258. MOVL DI, R9
  7259. SHRL $0x10, R9
  7260. MOVB $0xf8, (AX)
  7261. MOVW DI, 1(AX)
  7262. MOVB R9, 3(AX)
  7263. ADDQ $0x04, AX
  7264. JMP memmove_match_emit_encodeBlockAsm12BAvx
  7265. three_bytes_match_emit_encodeBlockAsm12BAvx:
  7266. MOVB $0xf4, (AX)
  7267. MOVW DI, 1(AX)
  7268. ADDQ $0x03, AX
  7269. JMP memmove_match_emit_encodeBlockAsm12BAvx
  7270. two_bytes_match_emit_encodeBlockAsm12BAvx:
  7271. MOVB $0xf0, (AX)
  7272. MOVB DI, 1(AX)
  7273. ADDQ $0x02, AX
  7274. JMP memmove_match_emit_encodeBlockAsm12BAvx
  7275. one_byte_match_emit_encodeBlockAsm12BAvx:
  7276. SHLB $0x02, DI
  7277. MOVB DI, (AX)
  7278. ADDQ $0x01, AX
  7279. memmove_match_emit_encodeBlockAsm12BAvx:
  7280. LEAQ (AX)(R8*1), DI
  7281. NOP
  7282. emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_tail:
  7283. TESTQ R8, R8
  7284. JEQ memmove_end_copy_match_emit_encodeBlockAsm12BAvx
  7285. CMPQ R8, $0x02
  7286. JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_1or2
  7287. CMPQ R8, $0x04
  7288. JB emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_3
  7289. JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_4
  7290. CMPQ R8, $0x08
  7291. JB emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_5through7
  7292. JE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_8
  7293. CMPQ R8, $0x10
  7294. JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_9through16
  7295. CMPQ R8, $0x20
  7296. JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_17through32
  7297. CMPQ R8, $0x40
  7298. JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_33through64
  7299. CMPQ R8, $0x80
  7300. JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_65through128
  7301. CMPQ R8, $0x00000100
  7302. JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_129through256
  7303. JMP emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_avxUnaligned
  7304. emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_1or2:
  7305. MOVB (SI), R9
  7306. MOVB -1(SI)(R8*1), R10
  7307. MOVB R9, (AX)
  7308. MOVB R10, -1(AX)(R8*1)
  7309. JMP memmove_end_copy_match_emit_encodeBlockAsm12BAvx
  7310. emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_4:
  7311. MOVL (SI), R9
  7312. MOVL R9, (AX)
  7313. JMP memmove_end_copy_match_emit_encodeBlockAsm12BAvx
  7314. emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_3:
  7315. MOVW (SI), R9
  7316. MOVB 2(SI), R10
  7317. MOVW R9, (AX)
  7318. MOVB R10, 2(AX)
  7319. JMP memmove_end_copy_match_emit_encodeBlockAsm12BAvx
  7320. emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_5through7:
  7321. MOVL (SI), R9
  7322. MOVL -4(SI)(R8*1), R10
  7323. MOVL R9, (AX)
  7324. MOVL R10, -4(AX)(R8*1)
  7325. JMP memmove_end_copy_match_emit_encodeBlockAsm12BAvx
  7326. emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_8:
  7327. MOVQ (SI), R9
  7328. MOVQ R9, (AX)
  7329. JMP memmove_end_copy_match_emit_encodeBlockAsm12BAvx
  7330. emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_9through16:
  7331. MOVQ (SI), R9
  7332. MOVQ -8(SI)(R8*1), R10
  7333. MOVQ R9, (AX)
  7334. MOVQ R10, -8(AX)(R8*1)
  7335. JMP memmove_end_copy_match_emit_encodeBlockAsm12BAvx
  7336. emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_17through32:
  7337. MOVOU (SI), X0
  7338. MOVOU -16(SI)(R8*1), X1
  7339. MOVOU X0, (AX)
  7340. MOVOU X1, -16(AX)(R8*1)
  7341. JMP memmove_end_copy_match_emit_encodeBlockAsm12BAvx
  7342. emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_33through64:
  7343. MOVOU (SI), X0
  7344. MOVOU 16(SI), X1
  7345. MOVOU -32(SI)(R8*1), X2
  7346. MOVOU -16(SI)(R8*1), X3
  7347. MOVOU X0, (AX)
  7348. MOVOU X1, 16(AX)
  7349. MOVOU X2, -32(AX)(R8*1)
  7350. MOVOU X3, -16(AX)(R8*1)
  7351. JMP memmove_end_copy_match_emit_encodeBlockAsm12BAvx
  7352. emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_65through128:
  7353. MOVOU (SI), X0
  7354. MOVOU 16(SI), X1
  7355. MOVOU 32(SI), X2
  7356. MOVOU 48(SI), X3
  7357. MOVOU -64(SI)(R8*1), X12
  7358. MOVOU -48(SI)(R8*1), X13
  7359. MOVOU -32(SI)(R8*1), X14
  7360. MOVOU -16(SI)(R8*1), X15
  7361. MOVOU X0, (AX)
  7362. MOVOU X1, 16(AX)
  7363. MOVOU X2, 32(AX)
  7364. MOVOU X3, 48(AX)
  7365. MOVOU X12, -64(AX)(R8*1)
  7366. MOVOU X13, -48(AX)(R8*1)
  7367. MOVOU X14, -32(AX)(R8*1)
  7368. MOVOU X15, -16(AX)(R8*1)
  7369. JMP memmove_end_copy_match_emit_encodeBlockAsm12BAvx
  7370. emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_129through256:
  7371. MOVOU (SI), X0
  7372. MOVOU 16(SI), X1
  7373. MOVOU 32(SI), X2
  7374. MOVOU 48(SI), X3
  7375. MOVOU 64(SI), X4
  7376. MOVOU 80(SI), X5
  7377. MOVOU 96(SI), X6
  7378. MOVOU 112(SI), X7
  7379. MOVOU -128(SI)(R8*1), X8
  7380. MOVOU -112(SI)(R8*1), X9
  7381. MOVOU -96(SI)(R8*1), X10
  7382. MOVOU -80(SI)(R8*1), X11
  7383. MOVOU -64(SI)(R8*1), X12
  7384. MOVOU -48(SI)(R8*1), X13
  7385. MOVOU -32(SI)(R8*1), X14
  7386. MOVOU -16(SI)(R8*1), X15
  7387. MOVOU X0, (AX)
  7388. MOVOU X1, 16(AX)
  7389. MOVOU X2, 32(AX)
  7390. MOVOU X3, 48(AX)
  7391. MOVOU X4, 64(AX)
  7392. MOVOU X5, 80(AX)
  7393. MOVOU X6, 96(AX)
  7394. MOVOU X7, 112(AX)
  7395. MOVOU X8, -128(AX)(R8*1)
  7396. MOVOU X9, -112(AX)(R8*1)
  7397. MOVOU X10, -96(AX)(R8*1)
  7398. MOVOU X11, -80(AX)(R8*1)
  7399. MOVOU X12, -64(AX)(R8*1)
  7400. MOVOU X13, -48(AX)(R8*1)
  7401. MOVOU X14, -32(AX)(R8*1)
  7402. MOVOU X15, -16(AX)(R8*1)
  7403. JMP memmove_end_copy_match_emit_encodeBlockAsm12BAvx
  7404. emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_256through2048:
  7405. LEAQ -256(R8), R8
  7406. MOVOU (SI), X0
  7407. MOVOU 16(SI), X1
  7408. MOVOU 32(SI), X2
  7409. MOVOU 48(SI), X3
  7410. MOVOU 64(SI), X4
  7411. MOVOU 80(SI), X5
  7412. MOVOU 96(SI), X6
  7413. MOVOU 112(SI), X7
  7414. MOVOU 128(SI), X8
  7415. MOVOU 144(SI), X9
  7416. MOVOU 160(SI), X10
  7417. MOVOU 176(SI), X11
  7418. MOVOU 192(SI), X12
  7419. MOVOU 208(SI), X13
  7420. MOVOU 224(SI), X14
  7421. MOVOU 240(SI), X15
  7422. MOVOU X0, (AX)
  7423. MOVOU X1, 16(AX)
  7424. MOVOU X2, 32(AX)
  7425. MOVOU X3, 48(AX)
  7426. MOVOU X4, 64(AX)
  7427. MOVOU X5, 80(AX)
  7428. MOVOU X6, 96(AX)
  7429. MOVOU X7, 112(AX)
  7430. MOVOU X8, 128(AX)
  7431. MOVOU X9, 144(AX)
  7432. MOVOU X10, 160(AX)
  7433. MOVOU X11, 176(AX)
  7434. MOVOU X12, 192(AX)
  7435. MOVOU X13, 208(AX)
  7436. MOVOU X14, 224(AX)
  7437. MOVOU X15, 240(AX)
  7438. CMPQ R8, $0x00000100
  7439. LEAQ 256(SI), SI
  7440. LEAQ 256(AX), AX
  7441. JGE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_256through2048
  7442. JMP emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_tail
  7443. emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_avxUnaligned:
  7444. LEAQ (SI)(R8*1), R10
  7445. MOVQ AX, R12
  7446. MOVOU -128(R10), X5
  7447. MOVOU -112(R10), X6
  7448. MOVQ $0x00000080, R9
  7449. ANDQ $0xffffffe0, AX
  7450. ADDQ $0x20, AX
  7451. MOVOU -96(R10), X7
  7452. MOVOU -80(R10), X8
  7453. MOVQ AX, R11
  7454. SUBQ R12, R11
  7455. MOVOU -64(R10), X9
  7456. MOVOU -48(R10), X10
  7457. SUBQ R11, R8
  7458. MOVOU -32(R10), X11
  7459. MOVOU -16(R10), X12
  7460. VMOVDQU (SI), Y4
  7461. ADDQ R11, SI
  7462. SUBQ R9, R8
  7463. emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_gobble_128_loop:
  7464. VMOVDQU (SI), Y0
  7465. VMOVDQU 32(SI), Y1
  7466. VMOVDQU 64(SI), Y2
  7467. VMOVDQU 96(SI), Y3
  7468. ADDQ R9, SI
  7469. VMOVDQA Y0, (AX)
  7470. VMOVDQA Y1, 32(AX)
  7471. VMOVDQA Y2, 64(AX)
  7472. VMOVDQA Y3, 96(AX)
  7473. ADDQ R9, AX
  7474. SUBQ R9, R8
  7475. JA emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_gobble_128_loop
  7476. ADDQ R9, R8
  7477. ADDQ AX, R8
  7478. VMOVDQU Y4, (R12)
  7479. VZEROUPPER
  7480. MOVOU X5, -128(R8)
  7481. MOVOU X6, -112(R8)
  7482. MOVOU X7, -96(R8)
  7483. MOVOU X8, -80(R8)
  7484. MOVOU X9, -64(R8)
  7485. MOVOU X10, -48(R8)
  7486. MOVOU X11, -32(R8)
  7487. MOVOU X12, -16(R8)
  7488. memmove_end_copy_match_emit_encodeBlockAsm12BAvx:
  7489. MOVQ DI, AX
  7490. emit_literal_done_match_emit_encodeBlockAsm12BAvx:
  7491. match_nolit_loop_encodeBlockAsm12BAvx:
  7492. MOVL CX, SI
  7493. SUBL BP, SI
  7494. MOVL SI, 16(SP)
  7495. ADDL $0x04, CX
  7496. ADDL $0x04, BP
  7497. MOVQ src_len+32(FP), SI
  7498. SUBL CX, SI
  7499. LEAQ (DX)(CX*1), DI
  7500. LEAQ (DX)(BP*1), BP
  7501. XORL R9, R9
  7502. CMPL SI, $0x08
  7503. JL matchlen_single_match_nolit_encodeBlockAsm12BAvx
  7504. matchlen_loopback_match_nolit_encodeBlockAsm12BAvx:
  7505. MOVQ (DI)(R9*1), R8
  7506. XORQ (BP)(R9*1), R8
  7507. TESTQ R8, R8
  7508. JZ matchlen_loop_match_nolit_encodeBlockAsm12BAvx
  7509. BSFQ R8, R8
  7510. SARQ $0x03, R8
  7511. LEAL (R9)(R8*1), R9
  7512. JMP match_nolit_end_encodeBlockAsm12BAvx
  7513. matchlen_loop_match_nolit_encodeBlockAsm12BAvx:
  7514. LEAL -8(SI), SI
  7515. LEAL 8(R9), R9
  7516. CMPL SI, $0x08
  7517. JGE matchlen_loopback_match_nolit_encodeBlockAsm12BAvx
  7518. matchlen_single_match_nolit_encodeBlockAsm12BAvx:
  7519. TESTL SI, SI
  7520. JZ match_nolit_end_encodeBlockAsm12BAvx
  7521. matchlen_single_loopback_match_nolit_encodeBlockAsm12BAvx:
  7522. MOVB (DI)(R9*1), R8
  7523. CMPB (BP)(R9*1), R8
  7524. JNE match_nolit_end_encodeBlockAsm12BAvx
  7525. LEAL 1(R9), R9
  7526. DECL SI
  7527. JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm12BAvx
  7528. match_nolit_end_encodeBlockAsm12BAvx:
  7529. ADDL R9, CX
  7530. MOVL 16(SP), BP
  7531. ADDL $0x04, R9
  7532. CMPL BP, $0x00010000
  7533. JL two_byte_offset_match_nolit_encodeBlockAsm12BAvx
  7534. four_bytes_loop_back_match_nolit_encodeBlockAsm12BAvx:
  7535. CMPL R9, $0x40
  7536. JLE four_bytes_remain_match_nolit_encodeBlockAsm12BAvx
  7537. MOVB $0xff, (AX)
  7538. MOVL BP, 1(AX)
  7539. LEAL -64(R9), R9
  7540. ADDQ $0x05, AX
  7541. CMPL R9, $0x04
  7542. JL four_bytes_remain_match_nolit_encodeBlockAsm12BAvx
  7543. emit_repeat_again_match_nolit_encodeBlockAsm12BAvx_emit_copy:
  7544. MOVL R9, SI
  7545. LEAL -4(R9), R9
  7546. CMPL SI, $0x08
  7547. JLE repeat_two_match_nolit_encodeBlockAsm12BAvx_emit_copy
  7548. CMPL SI, $0x0c
  7549. JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy
  7550. CMPL BP, $0x00000800
  7551. JLT repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy
  7552. cant_repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy:
  7553. CMPL R9, $0x00000104
  7554. JLT repeat_three_match_nolit_encodeBlockAsm12BAvx_emit_copy
  7555. CMPL R9, $0x00010100
  7556. JLT repeat_four_match_nolit_encodeBlockAsm12BAvx_emit_copy
  7557. CMPL R9, $0x0100ffff
  7558. JLT repeat_five_match_nolit_encodeBlockAsm12BAvx_emit_copy
  7559. LEAL -16842747(R9), R9
  7560. MOVW $0x001d, (AX)
  7561. MOVW $0xfffb, 2(AX)
  7562. MOVB $0xff, 4(AX)
  7563. ADDQ $0x05, AX
  7564. JMP emit_repeat_again_match_nolit_encodeBlockAsm12BAvx_emit_copy
  7565. repeat_five_match_nolit_encodeBlockAsm12BAvx_emit_copy:
  7566. LEAL -65536(R9), R9
  7567. MOVL R9, BP
  7568. MOVW $0x001d, (AX)
  7569. MOVW R9, 2(AX)
  7570. SARL $0x10, BP
  7571. MOVB BP, 4(AX)
  7572. ADDQ $0x05, AX
  7573. JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
  7574. repeat_four_match_nolit_encodeBlockAsm12BAvx_emit_copy:
  7575. LEAL -256(R9), R9
  7576. MOVW $0x0019, (AX)
  7577. MOVW R9, 2(AX)
  7578. ADDQ $0x04, AX
  7579. JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
  7580. repeat_three_match_nolit_encodeBlockAsm12BAvx_emit_copy:
  7581. LEAL -4(R9), R9
  7582. MOVW $0x0015, (AX)
  7583. MOVB R9, 2(AX)
  7584. ADDQ $0x03, AX
  7585. JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
  7586. repeat_two_match_nolit_encodeBlockAsm12BAvx_emit_copy:
  7587. SHLL $0x02, R9
  7588. ORL $0x01, R9
  7589. MOVW R9, (AX)
  7590. ADDQ $0x02, AX
  7591. JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
  7592. repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy:
  7593. XORQ SI, SI
  7594. LEAL 1(SI)(R9*4), R9
  7595. MOVB BP, 1(AX)
  7596. SARL $0x08, BP
  7597. SHLL $0x05, BP
  7598. ORL BP, R9
  7599. MOVB R9, (AX)
  7600. ADDQ $0x02, AX
  7601. JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
  7602. JMP four_bytes_loop_back_match_nolit_encodeBlockAsm12BAvx
  7603. four_bytes_remain_match_nolit_encodeBlockAsm12BAvx:
  7604. TESTL R9, R9
  7605. JZ match_nolit_emitcopy_end_encodeBlockAsm12BAvx
  7606. MOVB $0x03, BL
  7607. LEAL -4(BX)(R9*4), R9
  7608. MOVB R9, (AX)
  7609. MOVL BP, 1(AX)
  7610. ADDQ $0x05, AX
  7611. JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
  7612. two_byte_offset_match_nolit_encodeBlockAsm12BAvx:
  7613. CMPL R9, $0x40
  7614. JLE two_byte_offset_short_match_nolit_encodeBlockAsm12BAvx
  7615. MOVB $0xee, (AX)
  7616. MOVW BP, 1(AX)
  7617. LEAL -60(R9), R9
  7618. ADDQ $0x03, AX
  7619. emit_repeat_again_match_nolit_encodeBlockAsm12BAvx_emit_copy_short:
  7620. MOVL R9, SI
  7621. LEAL -4(R9), R9
  7622. CMPL SI, $0x08
  7623. JLE repeat_two_match_nolit_encodeBlockAsm12BAvx_emit_copy_short
  7624. CMPL SI, $0x0c
  7625. JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy_short
  7626. CMPL BP, $0x00000800
  7627. JLT repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy_short
  7628. cant_repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy_short:
  7629. CMPL R9, $0x00000104
  7630. JLT repeat_three_match_nolit_encodeBlockAsm12BAvx_emit_copy_short
  7631. CMPL R9, $0x00010100
  7632. JLT repeat_four_match_nolit_encodeBlockAsm12BAvx_emit_copy_short
  7633. CMPL R9, $0x0100ffff
  7634. JLT repeat_five_match_nolit_encodeBlockAsm12BAvx_emit_copy_short
  7635. LEAL -16842747(R9), R9
  7636. MOVW $0x001d, (AX)
  7637. MOVW $0xfffb, 2(AX)
  7638. MOVB $0xff, 4(AX)
  7639. ADDQ $0x05, AX
  7640. JMP emit_repeat_again_match_nolit_encodeBlockAsm12BAvx_emit_copy_short
  7641. repeat_five_match_nolit_encodeBlockAsm12BAvx_emit_copy_short:
  7642. LEAL -65536(R9), R9
  7643. MOVL R9, BP
  7644. MOVW $0x001d, (AX)
  7645. MOVW R9, 2(AX)
  7646. SARL $0x10, BP
  7647. MOVB BP, 4(AX)
  7648. ADDQ $0x05, AX
  7649. JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
  7650. repeat_four_match_nolit_encodeBlockAsm12BAvx_emit_copy_short:
  7651. LEAL -256(R9), R9
  7652. MOVW $0x0019, (AX)
  7653. MOVW R9, 2(AX)
  7654. ADDQ $0x04, AX
  7655. JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
  7656. repeat_three_match_nolit_encodeBlockAsm12BAvx_emit_copy_short:
  7657. LEAL -4(R9), R9
  7658. MOVW $0x0015, (AX)
  7659. MOVB R9, 2(AX)
  7660. ADDQ $0x03, AX
  7661. JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
  7662. repeat_two_match_nolit_encodeBlockAsm12BAvx_emit_copy_short:
  7663. SHLL $0x02, R9
  7664. ORL $0x01, R9
  7665. MOVW R9, (AX)
  7666. ADDQ $0x02, AX
  7667. JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
  7668. repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy_short:
  7669. XORQ SI, SI
  7670. LEAL 1(SI)(R9*4), R9
  7671. MOVB BP, 1(AX)
  7672. SARL $0x08, BP
  7673. SHLL $0x05, BP
  7674. ORL BP, R9
  7675. MOVB R9, (AX)
  7676. ADDQ $0x02, AX
  7677. JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
  7678. JMP two_byte_offset_match_nolit_encodeBlockAsm12BAvx
  7679. two_byte_offset_short_match_nolit_encodeBlockAsm12BAvx:
  7680. CMPL R9, $0x0c
  7681. JGE emit_copy_three_match_nolit_encodeBlockAsm12BAvx
  7682. CMPL BP, $0x00000800
  7683. JGE emit_copy_three_match_nolit_encodeBlockAsm12BAvx
  7684. MOVB $0x01, BL
  7685. LEAL -16(BX)(R9*4), R9
  7686. MOVB BP, 1(AX)
  7687. SHRL $0x08, BP
  7688. SHLL $0x05, BP
  7689. ORL BP, R9
  7690. MOVB R9, (AX)
  7691. ADDQ $0x02, AX
  7692. JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
  7693. emit_copy_three_match_nolit_encodeBlockAsm12BAvx:
  7694. MOVB $0x02, BL
  7695. LEAL -4(BX)(R9*4), R9
  7696. MOVB R9, (AX)
  7697. MOVW BP, 1(AX)
  7698. ADDQ $0x03, AX
  7699. match_nolit_emitcopy_end_encodeBlockAsm12BAvx:
  7700. MOVL CX, 12(SP)
  7701. CMPL CX, 8(SP)
  7702. JGE emit_remainder_encodeBlockAsm12BAvx
  7703. CMPQ AX, (SP)
  7704. JL match_nolit_dst_ok_encodeBlockAsm12BAvx
  7705. MOVQ $0x00000000, ret+48(FP)
  7706. RET
  7707. match_nolit_dst_ok_encodeBlockAsm12BAvx:
  7708. MOVQ -2(DX)(CX*1), SI
  7709. MOVQ $0x000000cf1bbcdcbb, BP
  7710. MOVQ SI, DI
  7711. SHRQ $0x10, SI
  7712. MOVQ SI, R8
  7713. SHLQ $0x18, DI
  7714. IMULQ BP, DI
  7715. SHRQ $0x34, DI
  7716. SHLQ $0x18, R8
  7717. IMULQ BP, R8
  7718. SHRQ $0x34, R8
  7719. LEAL -2(CX), R9
  7720. MOVL 24(SP)(R8*4), BP
  7721. MOVL R9, 24(SP)(DI*4)
  7722. MOVL CX, 24(SP)(R8*4)
  7723. CMPL (DX)(BP*1), SI
  7724. JEQ match_nolit_loop_encodeBlockAsm12BAvx
  7725. INCL CX
  7726. JMP search_loop_encodeBlockAsm12BAvx
  7727. emit_remainder_encodeBlockAsm12BAvx:
  7728. MOVQ src_len+32(FP), CX
  7729. SUBL 12(SP), CX
  7730. LEAQ 4(AX)(CX*1), CX
  7731. CMPQ CX, (SP)
  7732. JL emit_remainder_ok_encodeBlockAsm12BAvx
  7733. MOVQ $0x00000000, ret+48(FP)
  7734. RET
  7735. emit_remainder_ok_encodeBlockAsm12BAvx:
  7736. MOVQ src_len+32(FP), CX
  7737. MOVL 12(SP), BX
  7738. CMPL BX, CX
  7739. JEQ emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
  7740. MOVL CX, BP
  7741. MOVL CX, 12(SP)
  7742. LEAQ (DX)(BX*1), CX
  7743. SUBL BX, BP
  7744. MOVL BP, DX
  7745. SUBL $0x01, DX
  7746. JC emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
  7747. CMPL DX, $0x3c
  7748. JLT one_byte_emit_remainder_encodeBlockAsm12BAvx
  7749. CMPL DX, $0x00000100
  7750. JLT two_bytes_emit_remainder_encodeBlockAsm12BAvx
  7751. CMPL DX, $0x00010000
  7752. JLT three_bytes_emit_remainder_encodeBlockAsm12BAvx
  7753. CMPL DX, $0x01000000
  7754. JLT four_bytes_emit_remainder_encodeBlockAsm12BAvx
  7755. MOVB $0xfc, (AX)
  7756. MOVL DX, 1(AX)
  7757. ADDQ $0x05, AX
  7758. JMP memmove_emit_remainder_encodeBlockAsm12BAvx
  7759. four_bytes_emit_remainder_encodeBlockAsm12BAvx:
  7760. MOVL DX, BX
  7761. SHRL $0x10, BX
  7762. MOVB $0xf8, (AX)
  7763. MOVW DX, 1(AX)
  7764. MOVB BL, 3(AX)
  7765. ADDQ $0x04, AX
  7766. JMP memmove_emit_remainder_encodeBlockAsm12BAvx
  7767. three_bytes_emit_remainder_encodeBlockAsm12BAvx:
  7768. MOVB $0xf4, (AX)
  7769. MOVW DX, 1(AX)
  7770. ADDQ $0x03, AX
  7771. JMP memmove_emit_remainder_encodeBlockAsm12BAvx
  7772. two_bytes_emit_remainder_encodeBlockAsm12BAvx:
  7773. MOVB $0xf0, (AX)
  7774. MOVB DL, 1(AX)
  7775. ADDQ $0x02, AX
  7776. JMP memmove_emit_remainder_encodeBlockAsm12BAvx
  7777. one_byte_emit_remainder_encodeBlockAsm12BAvx:
  7778. SHLB $0x02, DL
  7779. MOVB DL, (AX)
  7780. ADDQ $0x01, AX
  7781. memmove_emit_remainder_encodeBlockAsm12BAvx:
  7782. LEAQ (AX)(BP*1), DX
  7783. MOVL BP, BX
  7784. NOP
  7785. emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_tail:
  7786. TESTQ BX, BX
  7787. JEQ memmove_end_copy_emit_remainder_encodeBlockAsm12BAvx
  7788. CMPQ BX, $0x02
  7789. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_1or2
  7790. CMPQ BX, $0x04
  7791. JB emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_3
  7792. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_4
  7793. CMPQ BX, $0x08
  7794. JB emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_5through7
  7795. JE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_8
  7796. CMPQ BX, $0x10
  7797. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_9through16
  7798. CMPQ BX, $0x20
  7799. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_17through32
  7800. CMPQ BX, $0x40
  7801. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_33through64
  7802. CMPQ BX, $0x80
  7803. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_65through128
  7804. CMPQ BX, $0x00000100
  7805. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_129through256
  7806. JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_avxUnaligned
  7807. emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_1or2:
  7808. MOVB (CX), BP
  7809. MOVB -1(CX)(BX*1), SI
  7810. MOVB BP, (AX)
  7811. MOVB SI, -1(AX)(BX*1)
  7812. JMP memmove_end_copy_emit_remainder_encodeBlockAsm12BAvx
  7813. emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_4:
  7814. MOVL (CX), BP
  7815. MOVL BP, (AX)
  7816. JMP memmove_end_copy_emit_remainder_encodeBlockAsm12BAvx
  7817. emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_3:
  7818. MOVW (CX), BP
  7819. MOVB 2(CX), SI
  7820. MOVW BP, (AX)
  7821. MOVB SI, 2(AX)
  7822. JMP memmove_end_copy_emit_remainder_encodeBlockAsm12BAvx
  7823. emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_5through7:
  7824. MOVL (CX), BP
  7825. MOVL -4(CX)(BX*1), SI
  7826. MOVL BP, (AX)
  7827. MOVL SI, -4(AX)(BX*1)
  7828. JMP memmove_end_copy_emit_remainder_encodeBlockAsm12BAvx
  7829. emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_8:
  7830. MOVQ (CX), BP
  7831. MOVQ BP, (AX)
  7832. JMP memmove_end_copy_emit_remainder_encodeBlockAsm12BAvx
  7833. emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_9through16:
  7834. MOVQ (CX), BP
  7835. MOVQ -8(CX)(BX*1), SI
  7836. MOVQ BP, (AX)
  7837. MOVQ SI, -8(AX)(BX*1)
  7838. JMP memmove_end_copy_emit_remainder_encodeBlockAsm12BAvx
  7839. emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_17through32:
  7840. MOVOU (CX), X0
  7841. MOVOU -16(CX)(BX*1), X1
  7842. MOVOU X0, (AX)
  7843. MOVOU X1, -16(AX)(BX*1)
  7844. JMP memmove_end_copy_emit_remainder_encodeBlockAsm12BAvx
  7845. emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_33through64:
  7846. MOVOU (CX), X0
  7847. MOVOU 16(CX), X1
  7848. MOVOU -32(CX)(BX*1), X2
  7849. MOVOU -16(CX)(BX*1), X3
  7850. MOVOU X0, (AX)
  7851. MOVOU X1, 16(AX)
  7852. MOVOU X2, -32(AX)(BX*1)
  7853. MOVOU X3, -16(AX)(BX*1)
  7854. JMP memmove_end_copy_emit_remainder_encodeBlockAsm12BAvx
  7855. emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_65through128:
  7856. MOVOU (CX), X0
  7857. MOVOU 16(CX), X1
  7858. MOVOU 32(CX), X2
  7859. MOVOU 48(CX), X3
  7860. MOVOU -64(CX)(BX*1), X12
  7861. MOVOU -48(CX)(BX*1), X13
  7862. MOVOU -32(CX)(BX*1), X14
  7863. MOVOU -16(CX)(BX*1), X15
  7864. MOVOU X0, (AX)
  7865. MOVOU X1, 16(AX)
  7866. MOVOU X2, 32(AX)
  7867. MOVOU X3, 48(AX)
  7868. MOVOU X12, -64(AX)(BX*1)
  7869. MOVOU X13, -48(AX)(BX*1)
  7870. MOVOU X14, -32(AX)(BX*1)
  7871. MOVOU X15, -16(AX)(BX*1)
  7872. JMP memmove_end_copy_emit_remainder_encodeBlockAsm12BAvx
  7873. emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_129through256:
  7874. MOVOU (CX), X0
  7875. MOVOU 16(CX), X1
  7876. MOVOU 32(CX), X2
  7877. MOVOU 48(CX), X3
  7878. MOVOU 64(CX), X4
  7879. MOVOU 80(CX), X5
  7880. MOVOU 96(CX), X6
  7881. MOVOU 112(CX), X7
  7882. MOVOU -128(CX)(BX*1), X8
  7883. MOVOU -112(CX)(BX*1), X9
  7884. MOVOU -96(CX)(BX*1), X10
  7885. MOVOU -80(CX)(BX*1), X11
  7886. MOVOU -64(CX)(BX*1), X12
  7887. MOVOU -48(CX)(BX*1), X13
  7888. MOVOU -32(CX)(BX*1), X14
  7889. MOVOU -16(CX)(BX*1), X15
  7890. MOVOU X0, (AX)
  7891. MOVOU X1, 16(AX)
  7892. MOVOU X2, 32(AX)
  7893. MOVOU X3, 48(AX)
  7894. MOVOU X4, 64(AX)
  7895. MOVOU X5, 80(AX)
  7896. MOVOU X6, 96(AX)
  7897. MOVOU X7, 112(AX)
  7898. MOVOU X8, -128(AX)(BX*1)
  7899. MOVOU X9, -112(AX)(BX*1)
  7900. MOVOU X10, -96(AX)(BX*1)
  7901. MOVOU X11, -80(AX)(BX*1)
  7902. MOVOU X12, -64(AX)(BX*1)
  7903. MOVOU X13, -48(AX)(BX*1)
  7904. MOVOU X14, -32(AX)(BX*1)
  7905. MOVOU X15, -16(AX)(BX*1)
  7906. JMP memmove_end_copy_emit_remainder_encodeBlockAsm12BAvx
  7907. emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_256through2048:
  7908. LEAQ -256(BX), BX
  7909. MOVOU (CX), X0
  7910. MOVOU 16(CX), X1
  7911. MOVOU 32(CX), X2
  7912. MOVOU 48(CX), X3
  7913. MOVOU 64(CX), X4
  7914. MOVOU 80(CX), X5
  7915. MOVOU 96(CX), X6
  7916. MOVOU 112(CX), X7
  7917. MOVOU 128(CX), X8
  7918. MOVOU 144(CX), X9
  7919. MOVOU 160(CX), X10
  7920. MOVOU 176(CX), X11
  7921. MOVOU 192(CX), X12
  7922. MOVOU 208(CX), X13
  7923. MOVOU 224(CX), X14
  7924. MOVOU 240(CX), X15
  7925. MOVOU X0, (AX)
  7926. MOVOU X1, 16(AX)
  7927. MOVOU X2, 32(AX)
  7928. MOVOU X3, 48(AX)
  7929. MOVOU X4, 64(AX)
  7930. MOVOU X5, 80(AX)
  7931. MOVOU X6, 96(AX)
  7932. MOVOU X7, 112(AX)
  7933. MOVOU X8, 128(AX)
  7934. MOVOU X9, 144(AX)
  7935. MOVOU X10, 160(AX)
  7936. MOVOU X11, 176(AX)
  7937. MOVOU X12, 192(AX)
  7938. MOVOU X13, 208(AX)
  7939. MOVOU X14, 224(AX)
  7940. MOVOU X15, 240(AX)
  7941. CMPQ BX, $0x00000100
  7942. LEAQ 256(CX), CX
  7943. LEAQ 256(AX), AX
  7944. JGE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_256through2048
  7945. JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_tail
  7946. emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_avxUnaligned:
  7947. LEAQ (CX)(BX*1), SI
  7948. MOVQ AX, R8
  7949. MOVOU -128(SI), X5
  7950. MOVOU -112(SI), X6
  7951. MOVQ $0x00000080, BP
  7952. ANDQ $0xffffffe0, AX
  7953. ADDQ $0x20, AX
  7954. MOVOU -96(SI), X7
  7955. MOVOU -80(SI), X8
  7956. MOVQ AX, DI
  7957. SUBQ R8, DI
  7958. MOVOU -64(SI), X9
  7959. MOVOU -48(SI), X10
  7960. SUBQ DI, BX
  7961. MOVOU -32(SI), X11
  7962. MOVOU -16(SI), X12
  7963. VMOVDQU (CX), Y4
  7964. ADDQ DI, CX
  7965. SUBQ BP, BX
  7966. emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_gobble_128_loop:
  7967. VMOVDQU (CX), Y0
  7968. VMOVDQU 32(CX), Y1
  7969. VMOVDQU 64(CX), Y2
  7970. VMOVDQU 96(CX), Y3
  7971. ADDQ BP, CX
  7972. VMOVDQA Y0, (AX)
  7973. VMOVDQA Y1, 32(AX)
  7974. VMOVDQA Y2, 64(AX)
  7975. VMOVDQA Y3, 96(AX)
  7976. ADDQ BP, AX
  7977. SUBQ BP, BX
  7978. JA emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_gobble_128_loop
  7979. ADDQ BP, BX
  7980. ADDQ AX, BX
  7981. VMOVDQU Y4, (R8)
  7982. VZEROUPPER
  7983. MOVOU X5, -128(BX)
  7984. MOVOU X6, -112(BX)
  7985. MOVOU X7, -96(BX)
  7986. MOVOU X8, -80(BX)
  7987. MOVOU X9, -64(BX)
  7988. MOVOU X10, -48(BX)
  7989. MOVOU X11, -32(BX)
  7990. MOVOU X12, -16(BX)
  7991. memmove_end_copy_emit_remainder_encodeBlockAsm12BAvx:
  7992. MOVQ DX, AX
  7993. emit_literal_done_emit_remainder_encodeBlockAsm12BAvx:
  7994. MOVQ dst_base+0(FP), CX
  7995. SUBQ CX, AX
  7996. MOVQ AX, ret+48(FP)
  7997. RET
  7998. // func encodeBlockAsm10BAvx(dst []byte, src []byte) int
  7999. // Requires: AVX, SSE2
  8000. TEXT ·encodeBlockAsm10BAvx(SB), $4120-56
  8001. MOVQ dst_base+0(FP), AX
  8002. MOVQ $0x00000020, CX
  8003. LEAQ 24(SP), DX
  8004. PXOR X0, X0
  8005. zero_loop_encodeBlockAsm10BAvx:
  8006. MOVOU X0, (DX)
  8007. MOVOU X0, 16(DX)
  8008. MOVOU X0, 32(DX)
  8009. MOVOU X0, 48(DX)
  8010. MOVOU X0, 64(DX)
  8011. MOVOU X0, 80(DX)
  8012. MOVOU X0, 96(DX)
  8013. MOVOU X0, 112(DX)
  8014. ADDQ $0x80, DX
  8015. DECQ CX
  8016. JNZ zero_loop_encodeBlockAsm10BAvx
  8017. MOVL $0x00000000, 12(SP)
  8018. MOVQ src_len+32(FP), CX
  8019. LEAQ -5(CX), DX
  8020. LEAQ -8(CX), BP
  8021. MOVL BP, 8(SP)
  8022. SHRQ $0x05, CX
  8023. SUBL CX, DX
  8024. LEAQ (AX)(DX*1), DX
  8025. MOVQ DX, (SP)
  8026. MOVL $0x00000001, CX
  8027. MOVL CX, 16(SP)
  8028. MOVQ src_base+24(FP), DX
  8029. search_loop_encodeBlockAsm10BAvx:
  8030. MOVQ (DX)(CX*1), SI
  8031. MOVL CX, BP
  8032. SUBL 12(SP), BP
  8033. SHRL $0x05, BP
  8034. LEAL 4(CX)(BP*1), BP
  8035. MOVL 8(SP), DI
  8036. CMPL BP, DI
  8037. JGT emit_remainder_encodeBlockAsm10BAvx
  8038. MOVL BP, 20(SP)
  8039. MOVQ $0x9e3779b1, R8
  8040. MOVQ SI, R9
  8041. MOVQ SI, R10
  8042. SHRQ $0x08, R10
  8043. SHLQ $0x20, R9
  8044. IMULQ R8, R9
  8045. SHRQ $0x36, R9
  8046. SHLQ $0x20, R10
  8047. IMULQ R8, R10
  8048. SHRQ $0x36, R10
  8049. MOVL 24(SP)(R9*4), BP
  8050. MOVL 24(SP)(R10*4), DI
  8051. MOVL CX, 24(SP)(R9*4)
  8052. LEAL 1(CX), R9
  8053. MOVL R9, 24(SP)(R10*4)
  8054. MOVQ SI, R9
  8055. SHRQ $0x10, R9
  8056. SHLQ $0x20, R9
  8057. IMULQ R8, R9
  8058. SHRQ $0x36, R9
  8059. MOVL CX, R8
  8060. SUBL 16(SP), R8
  8061. MOVL 1(DX)(R8*1), R10
  8062. MOVQ SI, R8
  8063. SHRQ $0x08, R8
  8064. CMPL R8, R10
  8065. JNE no_repeat_found_encodeBlockAsm10BAvx
  8066. LEAL 1(CX), SI
  8067. MOVL 12(SP), DI
  8068. MOVL SI, BP
  8069. SUBL 16(SP), BP
  8070. JZ repeat_extend_back_end_encodeBlockAsm10BAvx
  8071. repeat_extend_back_loop_encodeBlockAsm10BAvx:
  8072. CMPL SI, DI
  8073. JLE repeat_extend_back_end_encodeBlockAsm10BAvx
  8074. MOVB -1(DX)(BP*1), BL
  8075. MOVB -1(DX)(SI*1), R8
  8076. CMPB BL, R8
  8077. JNE repeat_extend_back_end_encodeBlockAsm10BAvx
  8078. LEAL -1(SI), SI
  8079. DECL BP
  8080. JNZ repeat_extend_back_loop_encodeBlockAsm10BAvx
  8081. repeat_extend_back_end_encodeBlockAsm10BAvx:
  8082. MOVL 12(SP), BP
  8083. CMPL BP, SI
  8084. JEQ emit_literal_done_repeat_emit_encodeBlockAsm10BAvx
  8085. MOVL SI, R8
  8086. MOVL SI, 12(SP)
  8087. LEAQ (DX)(BP*1), R9
  8088. SUBL BP, R8
  8089. MOVL R8, BP
  8090. SUBL $0x01, BP
  8091. JC emit_literal_done_repeat_emit_encodeBlockAsm10BAvx
  8092. CMPL BP, $0x3c
  8093. JLT one_byte_repeat_emit_encodeBlockAsm10BAvx
  8094. CMPL BP, $0x00000100
  8095. JLT two_bytes_repeat_emit_encodeBlockAsm10BAvx
  8096. CMPL BP, $0x00010000
  8097. JLT three_bytes_repeat_emit_encodeBlockAsm10BAvx
  8098. CMPL BP, $0x01000000
  8099. JLT four_bytes_repeat_emit_encodeBlockAsm10BAvx
  8100. MOVB $0xfc, (AX)
  8101. MOVL BP, 1(AX)
  8102. ADDQ $0x05, AX
  8103. JMP memmove_repeat_emit_encodeBlockAsm10BAvx
  8104. four_bytes_repeat_emit_encodeBlockAsm10BAvx:
  8105. MOVL BP, R10
  8106. SHRL $0x10, R10
  8107. MOVB $0xf8, (AX)
  8108. MOVW BP, 1(AX)
  8109. MOVB R10, 3(AX)
  8110. ADDQ $0x04, AX
  8111. JMP memmove_repeat_emit_encodeBlockAsm10BAvx
  8112. three_bytes_repeat_emit_encodeBlockAsm10BAvx:
  8113. MOVB $0xf4, (AX)
  8114. MOVW BP, 1(AX)
  8115. ADDQ $0x03, AX
  8116. JMP memmove_repeat_emit_encodeBlockAsm10BAvx
  8117. two_bytes_repeat_emit_encodeBlockAsm10BAvx:
  8118. MOVB $0xf0, (AX)
  8119. MOVB BP, 1(AX)
  8120. ADDQ $0x02, AX
  8121. JMP memmove_repeat_emit_encodeBlockAsm10BAvx
  8122. one_byte_repeat_emit_encodeBlockAsm10BAvx:
  8123. SHLB $0x02, BP
  8124. MOVB BP, (AX)
  8125. ADDQ $0x01, AX
  8126. memmove_repeat_emit_encodeBlockAsm10BAvx:
  8127. LEAQ (AX)(R8*1), BP
  8128. NOP
  8129. emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_tail:
  8130. TESTQ R8, R8
  8131. JEQ memmove_end_copy_repeat_emit_encodeBlockAsm10BAvx
  8132. CMPQ R8, $0x02
  8133. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_1or2
  8134. CMPQ R8, $0x04
  8135. JB emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_3
  8136. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_4
  8137. CMPQ R8, $0x08
  8138. JB emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_5through7
  8139. JE emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_8
  8140. CMPQ R8, $0x10
  8141. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_9through16
  8142. CMPQ R8, $0x20
  8143. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_17through32
  8144. CMPQ R8, $0x40
  8145. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_33through64
  8146. CMPQ R8, $0x80
  8147. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_65through128
  8148. CMPQ R8, $0x00000100
  8149. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_129through256
  8150. JMP emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_avxUnaligned
  8151. emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_1or2:
  8152. MOVB (R9), R10
  8153. MOVB -1(R9)(R8*1), R11
  8154. MOVB R10, (AX)
  8155. MOVB R11, -1(AX)(R8*1)
  8156. JMP memmove_end_copy_repeat_emit_encodeBlockAsm10BAvx
  8157. emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_4:
  8158. MOVL (R9), R10
  8159. MOVL R10, (AX)
  8160. JMP memmove_end_copy_repeat_emit_encodeBlockAsm10BAvx
  8161. emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_3:
  8162. MOVW (R9), R10
  8163. MOVB 2(R9), R11
  8164. MOVW R10, (AX)
  8165. MOVB R11, 2(AX)
  8166. JMP memmove_end_copy_repeat_emit_encodeBlockAsm10BAvx
  8167. emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_5through7:
  8168. MOVL (R9), R10
  8169. MOVL -4(R9)(R8*1), R11
  8170. MOVL R10, (AX)
  8171. MOVL R11, -4(AX)(R8*1)
  8172. JMP memmove_end_copy_repeat_emit_encodeBlockAsm10BAvx
  8173. emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_8:
  8174. MOVQ (R9), R10
  8175. MOVQ R10, (AX)
  8176. JMP memmove_end_copy_repeat_emit_encodeBlockAsm10BAvx
  8177. emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_9through16:
  8178. MOVQ (R9), R10
  8179. MOVQ -8(R9)(R8*1), R11
  8180. MOVQ R10, (AX)
  8181. MOVQ R11, -8(AX)(R8*1)
  8182. JMP memmove_end_copy_repeat_emit_encodeBlockAsm10BAvx
  8183. emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_17through32:
  8184. MOVOU (R9), X0
  8185. MOVOU -16(R9)(R8*1), X1
  8186. MOVOU X0, (AX)
  8187. MOVOU X1, -16(AX)(R8*1)
  8188. JMP memmove_end_copy_repeat_emit_encodeBlockAsm10BAvx
  8189. emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_33through64:
  8190. MOVOU (R9), X0
  8191. MOVOU 16(R9), X1
  8192. MOVOU -32(R9)(R8*1), X2
  8193. MOVOU -16(R9)(R8*1), X3
  8194. MOVOU X0, (AX)
  8195. MOVOU X1, 16(AX)
  8196. MOVOU X2, -32(AX)(R8*1)
  8197. MOVOU X3, -16(AX)(R8*1)
  8198. JMP memmove_end_copy_repeat_emit_encodeBlockAsm10BAvx
  8199. emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_65through128:
  8200. MOVOU (R9), X0
  8201. MOVOU 16(R9), X1
  8202. MOVOU 32(R9), X2
  8203. MOVOU 48(R9), X3
  8204. MOVOU -64(R9)(R8*1), X12
  8205. MOVOU -48(R9)(R8*1), X13
  8206. MOVOU -32(R9)(R8*1), X14
  8207. MOVOU -16(R9)(R8*1), X15
  8208. MOVOU X0, (AX)
  8209. MOVOU X1, 16(AX)
  8210. MOVOU X2, 32(AX)
  8211. MOVOU X3, 48(AX)
  8212. MOVOU X12, -64(AX)(R8*1)
  8213. MOVOU X13, -48(AX)(R8*1)
  8214. MOVOU X14, -32(AX)(R8*1)
  8215. MOVOU X15, -16(AX)(R8*1)
  8216. JMP memmove_end_copy_repeat_emit_encodeBlockAsm10BAvx
  8217. emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_129through256:
  8218. MOVOU (R9), X0
  8219. MOVOU 16(R9), X1
  8220. MOVOU 32(R9), X2
  8221. MOVOU 48(R9), X3
  8222. MOVOU 64(R9), X4
  8223. MOVOU 80(R9), X5
  8224. MOVOU 96(R9), X6
  8225. MOVOU 112(R9), X7
  8226. MOVOU -128(R9)(R8*1), X8
  8227. MOVOU -112(R9)(R8*1), X9
  8228. MOVOU -96(R9)(R8*1), X10
  8229. MOVOU -80(R9)(R8*1), X11
  8230. MOVOU -64(R9)(R8*1), X12
  8231. MOVOU -48(R9)(R8*1), X13
  8232. MOVOU -32(R9)(R8*1), X14
  8233. MOVOU -16(R9)(R8*1), X15
  8234. MOVOU X0, (AX)
  8235. MOVOU X1, 16(AX)
  8236. MOVOU X2, 32(AX)
  8237. MOVOU X3, 48(AX)
  8238. MOVOU X4, 64(AX)
  8239. MOVOU X5, 80(AX)
  8240. MOVOU X6, 96(AX)
  8241. MOVOU X7, 112(AX)
  8242. MOVOU X8, -128(AX)(R8*1)
  8243. MOVOU X9, -112(AX)(R8*1)
  8244. MOVOU X10, -96(AX)(R8*1)
  8245. MOVOU X11, -80(AX)(R8*1)
  8246. MOVOU X12, -64(AX)(R8*1)
  8247. MOVOU X13, -48(AX)(R8*1)
  8248. MOVOU X14, -32(AX)(R8*1)
  8249. MOVOU X15, -16(AX)(R8*1)
  8250. JMP memmove_end_copy_repeat_emit_encodeBlockAsm10BAvx
  8251. emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_256through2048:
  8252. LEAQ -256(R8), R8
  8253. MOVOU (R9), X0
  8254. MOVOU 16(R9), X1
  8255. MOVOU 32(R9), X2
  8256. MOVOU 48(R9), X3
  8257. MOVOU 64(R9), X4
  8258. MOVOU 80(R9), X5
  8259. MOVOU 96(R9), X6
  8260. MOVOU 112(R9), X7
  8261. MOVOU 128(R9), X8
  8262. MOVOU 144(R9), X9
  8263. MOVOU 160(R9), X10
  8264. MOVOU 176(R9), X11
  8265. MOVOU 192(R9), X12
  8266. MOVOU 208(R9), X13
  8267. MOVOU 224(R9), X14
  8268. MOVOU 240(R9), X15
  8269. MOVOU X0, (AX)
  8270. MOVOU X1, 16(AX)
  8271. MOVOU X2, 32(AX)
  8272. MOVOU X3, 48(AX)
  8273. MOVOU X4, 64(AX)
  8274. MOVOU X5, 80(AX)
  8275. MOVOU X6, 96(AX)
  8276. MOVOU X7, 112(AX)
  8277. MOVOU X8, 128(AX)
  8278. MOVOU X9, 144(AX)
  8279. MOVOU X10, 160(AX)
  8280. MOVOU X11, 176(AX)
  8281. MOVOU X12, 192(AX)
  8282. MOVOU X13, 208(AX)
  8283. MOVOU X14, 224(AX)
  8284. MOVOU X15, 240(AX)
  8285. CMPQ R8, $0x00000100
  8286. LEAQ 256(R9), R9
  8287. LEAQ 256(AX), AX
  8288. JGE emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_256through2048
  8289. JMP emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_tail
  8290. emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_avxUnaligned:
  8291. LEAQ (R9)(R8*1), R11
  8292. MOVQ AX, R13
  8293. MOVOU -128(R11), X5
  8294. MOVOU -112(R11), X6
  8295. MOVQ $0x00000080, R10
  8296. ANDQ $0xffffffe0, AX
  8297. ADDQ $0x20, AX
  8298. MOVOU -96(R11), X7
  8299. MOVOU -80(R11), X8
  8300. MOVQ AX, R12
  8301. SUBQ R13, R12
  8302. MOVOU -64(R11), X9
  8303. MOVOU -48(R11), X10
  8304. SUBQ R12, R8
  8305. MOVOU -32(R11), X11
  8306. MOVOU -16(R11), X12
  8307. VMOVDQU (R9), Y4
  8308. ADDQ R12, R9
  8309. SUBQ R10, R8
  8310. emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_gobble_128_loop:
  8311. VMOVDQU (R9), Y0
  8312. VMOVDQU 32(R9), Y1
  8313. VMOVDQU 64(R9), Y2
  8314. VMOVDQU 96(R9), Y3
  8315. ADDQ R10, R9
  8316. VMOVDQA Y0, (AX)
  8317. VMOVDQA Y1, 32(AX)
  8318. VMOVDQA Y2, 64(AX)
  8319. VMOVDQA Y3, 96(AX)
  8320. ADDQ R10, AX
  8321. SUBQ R10, R8
  8322. JA emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_gobble_128_loop
  8323. ADDQ R10, R8
  8324. ADDQ AX, R8
  8325. VMOVDQU Y4, (R13)
  8326. VZEROUPPER
  8327. MOVOU X5, -128(R8)
  8328. MOVOU X6, -112(R8)
  8329. MOVOU X7, -96(R8)
  8330. MOVOU X8, -80(R8)
  8331. MOVOU X9, -64(R8)
  8332. MOVOU X10, -48(R8)
  8333. MOVOU X11, -32(R8)
  8334. MOVOU X12, -16(R8)
  8335. memmove_end_copy_repeat_emit_encodeBlockAsm10BAvx:
  8336. MOVQ BP, AX
  8337. emit_literal_done_repeat_emit_encodeBlockAsm10BAvx:
  8338. ADDL $0x05, CX
  8339. MOVL CX, BP
  8340. SUBL 16(SP), BP
  8341. MOVQ src_len+32(FP), R8
  8342. SUBL CX, R8
  8343. LEAQ (DX)(CX*1), R9
  8344. LEAQ (DX)(BP*1), BP
  8345. XORL R11, R11
  8346. CMPL R8, $0x08
  8347. JL matchlen_single_repeat_extend
  8348. matchlen_loopback_repeat_extend:
  8349. MOVQ (R9)(R11*1), R10
  8350. XORQ (BP)(R11*1), R10
  8351. TESTQ R10, R10
  8352. JZ matchlen_loop_repeat_extend
  8353. BSFQ R10, R10
  8354. SARQ $0x03, R10
  8355. LEAL (R11)(R10*1), R11
  8356. JMP repeat_extend_forward_end_encodeBlockAsm10BAvx
  8357. matchlen_loop_repeat_extend:
  8358. LEAL -8(R8), R8
  8359. LEAL 8(R11), R11
  8360. CMPL R8, $0x08
  8361. JGE matchlen_loopback_repeat_extend
  8362. matchlen_single_repeat_extend:
  8363. TESTL R8, R8
  8364. JZ repeat_extend_forward_end_encodeBlockAsm10BAvx
  8365. matchlen_single_loopback_repeat_extend:
  8366. MOVB (R9)(R11*1), R10
  8367. CMPB (BP)(R11*1), R10
  8368. JNE repeat_extend_forward_end_encodeBlockAsm10BAvx
  8369. LEAL 1(R11), R11
  8370. DECL R8
  8371. JNZ matchlen_single_loopback_repeat_extend
  8372. repeat_extend_forward_end_encodeBlockAsm10BAvx:
  8373. ADDL R11, CX
  8374. MOVL CX, BP
  8375. SUBL SI, BP
  8376. MOVL 16(SP), SI
  8377. TESTL DI, DI
  8378. JZ repeat_as_copy_encodeBlockAsm10BAvx
  8379. emit_repeat_again_match_repeat_encodeBlockAsm10BAvx:
  8380. MOVL BP, DI
  8381. LEAL -4(BP), BP
  8382. CMPL DI, $0x08
  8383. JLE repeat_two_match_repeat_encodeBlockAsm10BAvx
  8384. CMPL DI, $0x0c
  8385. JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm10BAvx
  8386. CMPL SI, $0x00000800
  8387. JLT repeat_two_offset_match_repeat_encodeBlockAsm10BAvx
  8388. cant_repeat_two_offset_match_repeat_encodeBlockAsm10BAvx:
  8389. CMPL BP, $0x00000104
  8390. JLT repeat_three_match_repeat_encodeBlockAsm10BAvx
  8391. CMPL BP, $0x00010100
  8392. JLT repeat_four_match_repeat_encodeBlockAsm10BAvx
  8393. CMPL BP, $0x0100ffff
  8394. JLT repeat_five_match_repeat_encodeBlockAsm10BAvx
  8395. LEAL -16842747(BP), BP
  8396. MOVW $0x001d, (AX)
  8397. MOVW $0xfffb, 2(AX)
  8398. MOVB $0xff, 4(AX)
  8399. ADDQ $0x05, AX
  8400. JMP emit_repeat_again_match_repeat_encodeBlockAsm10BAvx
  8401. repeat_five_match_repeat_encodeBlockAsm10BAvx:
  8402. LEAL -65536(BP), BP
  8403. MOVL BP, SI
  8404. MOVW $0x001d, (AX)
  8405. MOVW BP, 2(AX)
  8406. SARL $0x10, SI
  8407. MOVB SI, 4(AX)
  8408. ADDQ $0x05, AX
  8409. JMP repeat_end_emit_encodeBlockAsm10BAvx
  8410. repeat_four_match_repeat_encodeBlockAsm10BAvx:
  8411. LEAL -256(BP), BP
  8412. MOVW $0x0019, (AX)
  8413. MOVW BP, 2(AX)
  8414. ADDQ $0x04, AX
  8415. JMP repeat_end_emit_encodeBlockAsm10BAvx
  8416. repeat_three_match_repeat_encodeBlockAsm10BAvx:
  8417. LEAL -4(BP), BP
  8418. MOVW $0x0015, (AX)
  8419. MOVB BP, 2(AX)
  8420. ADDQ $0x03, AX
  8421. JMP repeat_end_emit_encodeBlockAsm10BAvx
  8422. repeat_two_match_repeat_encodeBlockAsm10BAvx:
  8423. SHLL $0x02, BP
  8424. ORL $0x01, BP
  8425. MOVW BP, (AX)
  8426. ADDQ $0x02, AX
  8427. JMP repeat_end_emit_encodeBlockAsm10BAvx
  8428. repeat_two_offset_match_repeat_encodeBlockAsm10BAvx:
  8429. XORQ DI, DI
  8430. LEAL 1(DI)(BP*4), BP
  8431. MOVB SI, 1(AX)
  8432. SARL $0x08, SI
  8433. SHLL $0x05, SI
  8434. ORL SI, BP
  8435. MOVB BP, (AX)
  8436. ADDQ $0x02, AX
  8437. JMP repeat_end_emit_encodeBlockAsm10BAvx
  8438. repeat_as_copy_encodeBlockAsm10BAvx:
  8439. CMPL SI, $0x00010000
  8440. JL two_byte_offset_repeat_as_copy_encodeBlockAsm10BAvx
  8441. four_bytes_loop_back_repeat_as_copy_encodeBlockAsm10BAvx:
  8442. CMPL BP, $0x40
  8443. JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm10BAvx
  8444. MOVB $0xff, (AX)
  8445. MOVL SI, 1(AX)
  8446. LEAL -64(BP), BP
  8447. ADDQ $0x05, AX
  8448. CMPL BP, $0x04
  8449. JL four_bytes_remain_repeat_as_copy_encodeBlockAsm10BAvx
  8450. emit_repeat_again_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy:
  8451. MOVL BP, DI
  8452. LEAL -4(BP), BP
  8453. CMPL DI, $0x08
  8454. JLE repeat_two_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy
  8455. CMPL DI, $0x0c
  8456. JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy
  8457. CMPL SI, $0x00000800
  8458. JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy
  8459. cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy:
  8460. CMPL BP, $0x00000104
  8461. JLT repeat_three_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy
  8462. CMPL BP, $0x00010100
  8463. JLT repeat_four_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy
  8464. CMPL BP, $0x0100ffff
  8465. JLT repeat_five_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy
  8466. LEAL -16842747(BP), BP
  8467. MOVW $0x001d, (AX)
  8468. MOVW $0xfffb, 2(AX)
  8469. MOVB $0xff, 4(AX)
  8470. ADDQ $0x05, AX
  8471. JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy
  8472. repeat_five_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy:
  8473. LEAL -65536(BP), BP
  8474. MOVL BP, SI
  8475. MOVW $0x001d, (AX)
  8476. MOVW BP, 2(AX)
  8477. SARL $0x10, SI
  8478. MOVB SI, 4(AX)
  8479. ADDQ $0x05, AX
  8480. JMP repeat_end_emit_encodeBlockAsm10BAvx
  8481. repeat_four_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy:
  8482. LEAL -256(BP), BP
  8483. MOVW $0x0019, (AX)
  8484. MOVW BP, 2(AX)
  8485. ADDQ $0x04, AX
  8486. JMP repeat_end_emit_encodeBlockAsm10BAvx
  8487. repeat_three_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy:
  8488. LEAL -4(BP), BP
  8489. MOVW $0x0015, (AX)
  8490. MOVB BP, 2(AX)
  8491. ADDQ $0x03, AX
  8492. JMP repeat_end_emit_encodeBlockAsm10BAvx
  8493. repeat_two_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy:
  8494. SHLL $0x02, BP
  8495. ORL $0x01, BP
  8496. MOVW BP, (AX)
  8497. ADDQ $0x02, AX
  8498. JMP repeat_end_emit_encodeBlockAsm10BAvx
  8499. repeat_two_offset_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy:
  8500. XORQ DI, DI
  8501. LEAL 1(DI)(BP*4), BP
  8502. MOVB SI, 1(AX)
  8503. SARL $0x08, SI
  8504. SHLL $0x05, SI
  8505. ORL SI, BP
  8506. MOVB BP, (AX)
  8507. ADDQ $0x02, AX
  8508. JMP repeat_end_emit_encodeBlockAsm10BAvx
  8509. JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm10BAvx
  8510. four_bytes_remain_repeat_as_copy_encodeBlockAsm10BAvx:
  8511. TESTL BP, BP
  8512. JZ repeat_end_emit_encodeBlockAsm10BAvx
  8513. MOVB $0x03, BL
  8514. LEAL -4(BX)(BP*4), BP
  8515. MOVB BP, (AX)
  8516. MOVL SI, 1(AX)
  8517. ADDQ $0x05, AX
  8518. JMP repeat_end_emit_encodeBlockAsm10BAvx
  8519. two_byte_offset_repeat_as_copy_encodeBlockAsm10BAvx:
  8520. CMPL BP, $0x40
  8521. JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm10BAvx
  8522. MOVB $0xee, (AX)
  8523. MOVW SI, 1(AX)
  8524. LEAL -60(BP), BP
  8525. ADDQ $0x03, AX
  8526. emit_repeat_again_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy_short:
  8527. MOVL BP, DI
  8528. LEAL -4(BP), BP
  8529. CMPL DI, $0x08
  8530. JLE repeat_two_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy_short
  8531. CMPL DI, $0x0c
  8532. JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy_short
  8533. CMPL SI, $0x00000800
  8534. JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy_short
  8535. cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy_short:
  8536. CMPL BP, $0x00000104
  8537. JLT repeat_three_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy_short
  8538. CMPL BP, $0x00010100
  8539. JLT repeat_four_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy_short
  8540. CMPL BP, $0x0100ffff
  8541. JLT repeat_five_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy_short
  8542. LEAL -16842747(BP), BP
  8543. MOVW $0x001d, (AX)
  8544. MOVW $0xfffb, 2(AX)
  8545. MOVB $0xff, 4(AX)
  8546. ADDQ $0x05, AX
  8547. JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy_short
  8548. repeat_five_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy_short:
  8549. LEAL -65536(BP), BP
  8550. MOVL BP, SI
  8551. MOVW $0x001d, (AX)
  8552. MOVW BP, 2(AX)
  8553. SARL $0x10, SI
  8554. MOVB SI, 4(AX)
  8555. ADDQ $0x05, AX
  8556. JMP repeat_end_emit_encodeBlockAsm10BAvx
  8557. repeat_four_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy_short:
  8558. LEAL -256(BP), BP
  8559. MOVW $0x0019, (AX)
  8560. MOVW BP, 2(AX)
  8561. ADDQ $0x04, AX
  8562. JMP repeat_end_emit_encodeBlockAsm10BAvx
  8563. repeat_three_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy_short:
  8564. LEAL -4(BP), BP
  8565. MOVW $0x0015, (AX)
  8566. MOVB BP, 2(AX)
  8567. ADDQ $0x03, AX
  8568. JMP repeat_end_emit_encodeBlockAsm10BAvx
  8569. repeat_two_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy_short:
  8570. SHLL $0x02, BP
  8571. ORL $0x01, BP
  8572. MOVW BP, (AX)
  8573. ADDQ $0x02, AX
  8574. JMP repeat_end_emit_encodeBlockAsm10BAvx
  8575. repeat_two_offset_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy_short:
  8576. XORQ DI, DI
  8577. LEAL 1(DI)(BP*4), BP
  8578. MOVB SI, 1(AX)
  8579. SARL $0x08, SI
  8580. SHLL $0x05, SI
  8581. ORL SI, BP
  8582. MOVB BP, (AX)
  8583. ADDQ $0x02, AX
  8584. JMP repeat_end_emit_encodeBlockAsm10BAvx
  8585. JMP two_byte_offset_repeat_as_copy_encodeBlockAsm10BAvx
  8586. two_byte_offset_short_repeat_as_copy_encodeBlockAsm10BAvx:
  8587. CMPL BP, $0x0c
  8588. JGE emit_copy_three_repeat_as_copy_encodeBlockAsm10BAvx
  8589. CMPL SI, $0x00000800
  8590. JGE emit_copy_three_repeat_as_copy_encodeBlockAsm10BAvx
  8591. MOVB $0x01, BL
  8592. LEAL -16(BX)(BP*4), BP
  8593. MOVB SI, 1(AX)
  8594. SHRL $0x08, SI
  8595. SHLL $0x05, SI
  8596. ORL SI, BP
  8597. MOVB BP, (AX)
  8598. ADDQ $0x02, AX
  8599. JMP repeat_end_emit_encodeBlockAsm10BAvx
  8600. emit_copy_three_repeat_as_copy_encodeBlockAsm10BAvx:
  8601. MOVB $0x02, BL
  8602. LEAL -4(BX)(BP*4), BP
  8603. MOVB BP, (AX)
  8604. MOVW SI, 1(AX)
  8605. ADDQ $0x03, AX
  8606. repeat_end_emit_encodeBlockAsm10BAvx:
  8607. MOVL CX, 12(SP)
  8608. CMPL CX, 8(SP)
  8609. JGE emit_remainder_encodeBlockAsm10BAvx
  8610. JMP search_loop_encodeBlockAsm10BAvx
  8611. no_repeat_found_encodeBlockAsm10BAvx:
  8612. CMPL (DX)(BP*1), SI
  8613. JEQ candidate_match_encodeBlockAsm10BAvx
  8614. SHRQ $0x08, SI
  8615. MOVL 24(SP)(R9*4), BP
  8616. LEAL 2(CX), R8
  8617. CMPL (DX)(DI*1), SI
  8618. JEQ candidate2_match_encodeBlockAsm10BAvx
  8619. MOVL R8, 24(SP)(R9*4)
  8620. SHRQ $0x08, SI
  8621. CMPL (DX)(BP*1), SI
  8622. JEQ candidate3_match_encodeBlockAsm10BAvx
  8623. MOVL 20(SP), CX
  8624. JMP search_loop_encodeBlockAsm10BAvx
  8625. candidate3_match_encodeBlockAsm10BAvx:
  8626. ADDL $0x02, CX
  8627. JMP candidate_match_encodeBlockAsm10BAvx
  8628. candidate2_match_encodeBlockAsm10BAvx:
  8629. MOVL R8, 24(SP)(R9*4)
  8630. INCL CX
  8631. MOVL DI, BP
  8632. candidate_match_encodeBlockAsm10BAvx:
  8633. MOVL 12(SP), SI
  8634. TESTL BP, BP
  8635. JZ match_extend_back_end_encodeBlockAsm10BAvx
  8636. match_extend_back_loop_encodeBlockAsm10BAvx:
  8637. CMPL CX, SI
  8638. JLE match_extend_back_end_encodeBlockAsm10BAvx
  8639. MOVB -1(DX)(BP*1), BL
  8640. MOVB -1(DX)(CX*1), DI
  8641. CMPB BL, DI
  8642. JNE match_extend_back_end_encodeBlockAsm10BAvx
  8643. LEAL -1(CX), CX
  8644. DECL BP
  8645. JZ match_extend_back_end_encodeBlockAsm10BAvx
  8646. JMP match_extend_back_loop_encodeBlockAsm10BAvx
  8647. match_extend_back_end_encodeBlockAsm10BAvx:
  8648. MOVL CX, SI
  8649. SUBL 12(SP), SI
  8650. LEAQ 4(AX)(SI*1), SI
  8651. CMPQ SI, (SP)
  8652. JL match_dst_size_check_encodeBlockAsm10BAvx
  8653. MOVQ $0x00000000, ret+48(FP)
  8654. RET
  8655. match_dst_size_check_encodeBlockAsm10BAvx:
  8656. MOVL CX, SI
  8657. MOVL 12(SP), DI
  8658. CMPL DI, SI
  8659. JEQ emit_literal_done_match_emit_encodeBlockAsm10BAvx
  8660. MOVL SI, R8
  8661. MOVL SI, 12(SP)
  8662. LEAQ (DX)(DI*1), SI
  8663. SUBL DI, R8
  8664. MOVL R8, DI
  8665. SUBL $0x01, DI
  8666. JC emit_literal_done_match_emit_encodeBlockAsm10BAvx
  8667. CMPL DI, $0x3c
  8668. JLT one_byte_match_emit_encodeBlockAsm10BAvx
  8669. CMPL DI, $0x00000100
  8670. JLT two_bytes_match_emit_encodeBlockAsm10BAvx
  8671. CMPL DI, $0x00010000
  8672. JLT three_bytes_match_emit_encodeBlockAsm10BAvx
  8673. CMPL DI, $0x01000000
  8674. JLT four_bytes_match_emit_encodeBlockAsm10BAvx
  8675. MOVB $0xfc, (AX)
  8676. MOVL DI, 1(AX)
  8677. ADDQ $0x05, AX
  8678. JMP memmove_match_emit_encodeBlockAsm10BAvx
  8679. four_bytes_match_emit_encodeBlockAsm10BAvx:
  8680. MOVL DI, R9
  8681. SHRL $0x10, R9
  8682. MOVB $0xf8, (AX)
  8683. MOVW DI, 1(AX)
  8684. MOVB R9, 3(AX)
  8685. ADDQ $0x04, AX
  8686. JMP memmove_match_emit_encodeBlockAsm10BAvx
  8687. three_bytes_match_emit_encodeBlockAsm10BAvx:
  8688. MOVB $0xf4, (AX)
  8689. MOVW DI, 1(AX)
  8690. ADDQ $0x03, AX
  8691. JMP memmove_match_emit_encodeBlockAsm10BAvx
  8692. two_bytes_match_emit_encodeBlockAsm10BAvx:
  8693. MOVB $0xf0, (AX)
  8694. MOVB DI, 1(AX)
  8695. ADDQ $0x02, AX
  8696. JMP memmove_match_emit_encodeBlockAsm10BAvx
  8697. one_byte_match_emit_encodeBlockAsm10BAvx:
  8698. SHLB $0x02, DI
  8699. MOVB DI, (AX)
  8700. ADDQ $0x01, AX
  8701. memmove_match_emit_encodeBlockAsm10BAvx:
  8702. LEAQ (AX)(R8*1), DI
  8703. NOP
  8704. emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_tail:
  8705. TESTQ R8, R8
  8706. JEQ memmove_end_copy_match_emit_encodeBlockAsm10BAvx
  8707. CMPQ R8, $0x02
  8708. JBE emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_1or2
  8709. CMPQ R8, $0x04
  8710. JB emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_3
  8711. JBE emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_4
  8712. CMPQ R8, $0x08
  8713. JB emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_5through7
  8714. JE emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_8
  8715. CMPQ R8, $0x10
  8716. JBE emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_9through16
  8717. CMPQ R8, $0x20
  8718. JBE emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_17through32
  8719. CMPQ R8, $0x40
  8720. JBE emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_33through64
  8721. CMPQ R8, $0x80
  8722. JBE emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_65through128
  8723. CMPQ R8, $0x00000100
  8724. JBE emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_129through256
  8725. JMP emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_avxUnaligned
  8726. emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_1or2:
  8727. MOVB (SI), R9
  8728. MOVB -1(SI)(R8*1), R10
  8729. MOVB R9, (AX)
  8730. MOVB R10, -1(AX)(R8*1)
  8731. JMP memmove_end_copy_match_emit_encodeBlockAsm10BAvx
  8732. emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_4:
  8733. MOVL (SI), R9
  8734. MOVL R9, (AX)
  8735. JMP memmove_end_copy_match_emit_encodeBlockAsm10BAvx
  8736. emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_3:
  8737. MOVW (SI), R9
  8738. MOVB 2(SI), R10
  8739. MOVW R9, (AX)
  8740. MOVB R10, 2(AX)
  8741. JMP memmove_end_copy_match_emit_encodeBlockAsm10BAvx
  8742. emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_5through7:
  8743. MOVL (SI), R9
  8744. MOVL -4(SI)(R8*1), R10
  8745. MOVL R9, (AX)
  8746. MOVL R10, -4(AX)(R8*1)
  8747. JMP memmove_end_copy_match_emit_encodeBlockAsm10BAvx
  8748. emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_8:
  8749. MOVQ (SI), R9
  8750. MOVQ R9, (AX)
  8751. JMP memmove_end_copy_match_emit_encodeBlockAsm10BAvx
  8752. emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_9through16:
  8753. MOVQ (SI), R9
  8754. MOVQ -8(SI)(R8*1), R10
  8755. MOVQ R9, (AX)
  8756. MOVQ R10, -8(AX)(R8*1)
  8757. JMP memmove_end_copy_match_emit_encodeBlockAsm10BAvx
  8758. emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_17through32:
  8759. MOVOU (SI), X0
  8760. MOVOU -16(SI)(R8*1), X1
  8761. MOVOU X0, (AX)
  8762. MOVOU X1, -16(AX)(R8*1)
  8763. JMP memmove_end_copy_match_emit_encodeBlockAsm10BAvx
  8764. emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_33through64:
  8765. MOVOU (SI), X0
  8766. MOVOU 16(SI), X1
  8767. MOVOU -32(SI)(R8*1), X2
  8768. MOVOU -16(SI)(R8*1), X3
  8769. MOVOU X0, (AX)
  8770. MOVOU X1, 16(AX)
  8771. MOVOU X2, -32(AX)(R8*1)
  8772. MOVOU X3, -16(AX)(R8*1)
  8773. JMP memmove_end_copy_match_emit_encodeBlockAsm10BAvx
  8774. emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_65through128:
  8775. MOVOU (SI), X0
  8776. MOVOU 16(SI), X1
  8777. MOVOU 32(SI), X2
  8778. MOVOU 48(SI), X3
  8779. MOVOU -64(SI)(R8*1), X12
  8780. MOVOU -48(SI)(R8*1), X13
  8781. MOVOU -32(SI)(R8*1), X14
  8782. MOVOU -16(SI)(R8*1), X15
  8783. MOVOU X0, (AX)
  8784. MOVOU X1, 16(AX)
  8785. MOVOU X2, 32(AX)
  8786. MOVOU X3, 48(AX)
  8787. MOVOU X12, -64(AX)(R8*1)
  8788. MOVOU X13, -48(AX)(R8*1)
  8789. MOVOU X14, -32(AX)(R8*1)
  8790. MOVOU X15, -16(AX)(R8*1)
  8791. JMP memmove_end_copy_match_emit_encodeBlockAsm10BAvx
  8792. emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_129through256:
  8793. MOVOU (SI), X0
  8794. MOVOU 16(SI), X1
  8795. MOVOU 32(SI), X2
  8796. MOVOU 48(SI), X3
  8797. MOVOU 64(SI), X4
  8798. MOVOU 80(SI), X5
  8799. MOVOU 96(SI), X6
  8800. MOVOU 112(SI), X7
  8801. MOVOU -128(SI)(R8*1), X8
  8802. MOVOU -112(SI)(R8*1), X9
  8803. MOVOU -96(SI)(R8*1), X10
  8804. MOVOU -80(SI)(R8*1), X11
  8805. MOVOU -64(SI)(R8*1), X12
  8806. MOVOU -48(SI)(R8*1), X13
  8807. MOVOU -32(SI)(R8*1), X14
  8808. MOVOU -16(SI)(R8*1), X15
  8809. MOVOU X0, (AX)
  8810. MOVOU X1, 16(AX)
  8811. MOVOU X2, 32(AX)
  8812. MOVOU X3, 48(AX)
  8813. MOVOU X4, 64(AX)
  8814. MOVOU X5, 80(AX)
  8815. MOVOU X6, 96(AX)
  8816. MOVOU X7, 112(AX)
  8817. MOVOU X8, -128(AX)(R8*1)
  8818. MOVOU X9, -112(AX)(R8*1)
  8819. MOVOU X10, -96(AX)(R8*1)
  8820. MOVOU X11, -80(AX)(R8*1)
  8821. MOVOU X12, -64(AX)(R8*1)
  8822. MOVOU X13, -48(AX)(R8*1)
  8823. MOVOU X14, -32(AX)(R8*1)
  8824. MOVOU X15, -16(AX)(R8*1)
  8825. JMP memmove_end_copy_match_emit_encodeBlockAsm10BAvx
  8826. emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_256through2048:
  8827. LEAQ -256(R8), R8
  8828. MOVOU (SI), X0
  8829. MOVOU 16(SI), X1
  8830. MOVOU 32(SI), X2
  8831. MOVOU 48(SI), X3
  8832. MOVOU 64(SI), X4
  8833. MOVOU 80(SI), X5
  8834. MOVOU 96(SI), X6
  8835. MOVOU 112(SI), X7
  8836. MOVOU 128(SI), X8
  8837. MOVOU 144(SI), X9
  8838. MOVOU 160(SI), X10
  8839. MOVOU 176(SI), X11
  8840. MOVOU 192(SI), X12
  8841. MOVOU 208(SI), X13
  8842. MOVOU 224(SI), X14
  8843. MOVOU 240(SI), X15
  8844. MOVOU X0, (AX)
  8845. MOVOU X1, 16(AX)
  8846. MOVOU X2, 32(AX)
  8847. MOVOU X3, 48(AX)
  8848. MOVOU X4, 64(AX)
  8849. MOVOU X5, 80(AX)
  8850. MOVOU X6, 96(AX)
  8851. MOVOU X7, 112(AX)
  8852. MOVOU X8, 128(AX)
  8853. MOVOU X9, 144(AX)
  8854. MOVOU X10, 160(AX)
  8855. MOVOU X11, 176(AX)
  8856. MOVOU X12, 192(AX)
  8857. MOVOU X13, 208(AX)
  8858. MOVOU X14, 224(AX)
  8859. MOVOU X15, 240(AX)
  8860. CMPQ R8, $0x00000100
  8861. LEAQ 256(SI), SI
  8862. LEAQ 256(AX), AX
  8863. JGE emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_256through2048
  8864. JMP emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_tail
  8865. emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_avxUnaligned:
  8866. LEAQ (SI)(R8*1), R10
  8867. MOVQ AX, R12
  8868. MOVOU -128(R10), X5
  8869. MOVOU -112(R10), X6
  8870. MOVQ $0x00000080, R9
  8871. ANDQ $0xffffffe0, AX
  8872. ADDQ $0x20, AX
  8873. MOVOU -96(R10), X7
  8874. MOVOU -80(R10), X8
  8875. MOVQ AX, R11
  8876. SUBQ R12, R11
  8877. MOVOU -64(R10), X9
  8878. MOVOU -48(R10), X10
  8879. SUBQ R11, R8
  8880. MOVOU -32(R10), X11
  8881. MOVOU -16(R10), X12
  8882. VMOVDQU (SI), Y4
  8883. ADDQ R11, SI
  8884. SUBQ R9, R8
  8885. emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_gobble_128_loop:
  8886. VMOVDQU (SI), Y0
  8887. VMOVDQU 32(SI), Y1
  8888. VMOVDQU 64(SI), Y2
  8889. VMOVDQU 96(SI), Y3
  8890. ADDQ R9, SI
  8891. VMOVDQA Y0, (AX)
  8892. VMOVDQA Y1, 32(AX)
  8893. VMOVDQA Y2, 64(AX)
  8894. VMOVDQA Y3, 96(AX)
  8895. ADDQ R9, AX
  8896. SUBQ R9, R8
  8897. JA emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_gobble_128_loop
  8898. ADDQ R9, R8
  8899. ADDQ AX, R8
  8900. VMOVDQU Y4, (R12)
  8901. VZEROUPPER
  8902. MOVOU X5, -128(R8)
  8903. MOVOU X6, -112(R8)
  8904. MOVOU X7, -96(R8)
  8905. MOVOU X8, -80(R8)
  8906. MOVOU X9, -64(R8)
  8907. MOVOU X10, -48(R8)
  8908. MOVOU X11, -32(R8)
  8909. MOVOU X12, -16(R8)
  8910. memmove_end_copy_match_emit_encodeBlockAsm10BAvx:
  8911. MOVQ DI, AX
  8912. emit_literal_done_match_emit_encodeBlockAsm10BAvx:
  8913. match_nolit_loop_encodeBlockAsm10BAvx:
  8914. MOVL CX, SI
  8915. SUBL BP, SI
  8916. MOVL SI, 16(SP)
  8917. ADDL $0x04, CX
  8918. ADDL $0x04, BP
  8919. MOVQ src_len+32(FP), SI
  8920. SUBL CX, SI
  8921. LEAQ (DX)(CX*1), DI
  8922. LEAQ (DX)(BP*1), BP
  8923. XORL R9, R9
  8924. CMPL SI, $0x08
  8925. JL matchlen_single_match_nolit_encodeBlockAsm10BAvx
  8926. matchlen_loopback_match_nolit_encodeBlockAsm10BAvx:
  8927. MOVQ (DI)(R9*1), R8
  8928. XORQ (BP)(R9*1), R8
  8929. TESTQ R8, R8
  8930. JZ matchlen_loop_match_nolit_encodeBlockAsm10BAvx
  8931. BSFQ R8, R8
  8932. SARQ $0x03, R8
  8933. LEAL (R9)(R8*1), R9
  8934. JMP match_nolit_end_encodeBlockAsm10BAvx
  8935. matchlen_loop_match_nolit_encodeBlockAsm10BAvx:
  8936. LEAL -8(SI), SI
  8937. LEAL 8(R9), R9
  8938. CMPL SI, $0x08
  8939. JGE matchlen_loopback_match_nolit_encodeBlockAsm10BAvx
  8940. matchlen_single_match_nolit_encodeBlockAsm10BAvx:
  8941. TESTL SI, SI
  8942. JZ match_nolit_end_encodeBlockAsm10BAvx
  8943. matchlen_single_loopback_match_nolit_encodeBlockAsm10BAvx:
  8944. MOVB (DI)(R9*1), R8
  8945. CMPB (BP)(R9*1), R8
  8946. JNE match_nolit_end_encodeBlockAsm10BAvx
  8947. LEAL 1(R9), R9
  8948. DECL SI
  8949. JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm10BAvx
  8950. match_nolit_end_encodeBlockAsm10BAvx:
  8951. ADDL R9, CX
  8952. MOVL 16(SP), BP
  8953. ADDL $0x04, R9
  8954. CMPL BP, $0x00010000
  8955. JL two_byte_offset_match_nolit_encodeBlockAsm10BAvx
  8956. four_bytes_loop_back_match_nolit_encodeBlockAsm10BAvx:
  8957. CMPL R9, $0x40
  8958. JLE four_bytes_remain_match_nolit_encodeBlockAsm10BAvx
  8959. MOVB $0xff, (AX)
  8960. MOVL BP, 1(AX)
  8961. LEAL -64(R9), R9
  8962. ADDQ $0x05, AX
  8963. CMPL R9, $0x04
  8964. JL four_bytes_remain_match_nolit_encodeBlockAsm10BAvx
  8965. emit_repeat_again_match_nolit_encodeBlockAsm10BAvx_emit_copy:
  8966. MOVL R9, SI
  8967. LEAL -4(R9), R9
  8968. CMPL SI, $0x08
  8969. JLE repeat_two_match_nolit_encodeBlockAsm10BAvx_emit_copy
  8970. CMPL SI, $0x0c
  8971. JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm10BAvx_emit_copy
  8972. CMPL BP, $0x00000800
  8973. JLT repeat_two_offset_match_nolit_encodeBlockAsm10BAvx_emit_copy
  8974. cant_repeat_two_offset_match_nolit_encodeBlockAsm10BAvx_emit_copy:
  8975. CMPL R9, $0x00000104
  8976. JLT repeat_three_match_nolit_encodeBlockAsm10BAvx_emit_copy
  8977. CMPL R9, $0x00010100
  8978. JLT repeat_four_match_nolit_encodeBlockAsm10BAvx_emit_copy
  8979. CMPL R9, $0x0100ffff
  8980. JLT repeat_five_match_nolit_encodeBlockAsm10BAvx_emit_copy
  8981. LEAL -16842747(R9), R9
  8982. MOVW $0x001d, (AX)
  8983. MOVW $0xfffb, 2(AX)
  8984. MOVB $0xff, 4(AX)
  8985. ADDQ $0x05, AX
  8986. JMP emit_repeat_again_match_nolit_encodeBlockAsm10BAvx_emit_copy
  8987. repeat_five_match_nolit_encodeBlockAsm10BAvx_emit_copy:
  8988. LEAL -65536(R9), R9
  8989. MOVL R9, BP
  8990. MOVW $0x001d, (AX)
  8991. MOVW R9, 2(AX)
  8992. SARL $0x10, BP
  8993. MOVB BP, 4(AX)
  8994. ADDQ $0x05, AX
  8995. JMP match_nolit_emitcopy_end_encodeBlockAsm10BAvx
  8996. repeat_four_match_nolit_encodeBlockAsm10BAvx_emit_copy:
  8997. LEAL -256(R9), R9
  8998. MOVW $0x0019, (AX)
  8999. MOVW R9, 2(AX)
  9000. ADDQ $0x04, AX
  9001. JMP match_nolit_emitcopy_end_encodeBlockAsm10BAvx
  9002. repeat_three_match_nolit_encodeBlockAsm10BAvx_emit_copy:
  9003. LEAL -4(R9), R9
  9004. MOVW $0x0015, (AX)
  9005. MOVB R9, 2(AX)
  9006. ADDQ $0x03, AX
  9007. JMP match_nolit_emitcopy_end_encodeBlockAsm10BAvx
  9008. repeat_two_match_nolit_encodeBlockAsm10BAvx_emit_copy:
  9009. SHLL $0x02, R9
  9010. ORL $0x01, R9
  9011. MOVW R9, (AX)
  9012. ADDQ $0x02, AX
  9013. JMP match_nolit_emitcopy_end_encodeBlockAsm10BAvx
  9014. repeat_two_offset_match_nolit_encodeBlockAsm10BAvx_emit_copy:
  9015. XORQ SI, SI
  9016. LEAL 1(SI)(R9*4), R9
  9017. MOVB BP, 1(AX)
  9018. SARL $0x08, BP
  9019. SHLL $0x05, BP
  9020. ORL BP, R9
  9021. MOVB R9, (AX)
  9022. ADDQ $0x02, AX
  9023. JMP match_nolit_emitcopy_end_encodeBlockAsm10BAvx
  9024. JMP four_bytes_loop_back_match_nolit_encodeBlockAsm10BAvx
  9025. four_bytes_remain_match_nolit_encodeBlockAsm10BAvx:
  9026. TESTL R9, R9
  9027. JZ match_nolit_emitcopy_end_encodeBlockAsm10BAvx
  9028. MOVB $0x03, BL
  9029. LEAL -4(BX)(R9*4), R9
  9030. MOVB R9, (AX)
  9031. MOVL BP, 1(AX)
  9032. ADDQ $0x05, AX
  9033. JMP match_nolit_emitcopy_end_encodeBlockAsm10BAvx
  9034. two_byte_offset_match_nolit_encodeBlockAsm10BAvx:
  9035. CMPL R9, $0x40
  9036. JLE two_byte_offset_short_match_nolit_encodeBlockAsm10BAvx
  9037. MOVB $0xee, (AX)
  9038. MOVW BP, 1(AX)
  9039. LEAL -60(R9), R9
  9040. ADDQ $0x03, AX
  9041. emit_repeat_again_match_nolit_encodeBlockAsm10BAvx_emit_copy_short:
  9042. MOVL R9, SI
  9043. LEAL -4(R9), R9
  9044. CMPL SI, $0x08
  9045. JLE repeat_two_match_nolit_encodeBlockAsm10BAvx_emit_copy_short
  9046. CMPL SI, $0x0c
  9047. JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm10BAvx_emit_copy_short
  9048. CMPL BP, $0x00000800
  9049. JLT repeat_two_offset_match_nolit_encodeBlockAsm10BAvx_emit_copy_short
  9050. cant_repeat_two_offset_match_nolit_encodeBlockAsm10BAvx_emit_copy_short:
  9051. CMPL R9, $0x00000104
  9052. JLT repeat_three_match_nolit_encodeBlockAsm10BAvx_emit_copy_short
  9053. CMPL R9, $0x00010100
  9054. JLT repeat_four_match_nolit_encodeBlockAsm10BAvx_emit_copy_short
  9055. CMPL R9, $0x0100ffff
  9056. JLT repeat_five_match_nolit_encodeBlockAsm10BAvx_emit_copy_short
  9057. LEAL -16842747(R9), R9
  9058. MOVW $0x001d, (AX)
  9059. MOVW $0xfffb, 2(AX)
  9060. MOVB $0xff, 4(AX)
  9061. ADDQ $0x05, AX
  9062. JMP emit_repeat_again_match_nolit_encodeBlockAsm10BAvx_emit_copy_short
  9063. repeat_five_match_nolit_encodeBlockAsm10BAvx_emit_copy_short:
  9064. LEAL -65536(R9), R9
  9065. MOVL R9, BP
  9066. MOVW $0x001d, (AX)
  9067. MOVW R9, 2(AX)
  9068. SARL $0x10, BP
  9069. MOVB BP, 4(AX)
  9070. ADDQ $0x05, AX
  9071. JMP match_nolit_emitcopy_end_encodeBlockAsm10BAvx
  9072. repeat_four_match_nolit_encodeBlockAsm10BAvx_emit_copy_short:
  9073. LEAL -256(R9), R9
  9074. MOVW $0x0019, (AX)
  9075. MOVW R9, 2(AX)
  9076. ADDQ $0x04, AX
  9077. JMP match_nolit_emitcopy_end_encodeBlockAsm10BAvx
  9078. repeat_three_match_nolit_encodeBlockAsm10BAvx_emit_copy_short:
  9079. LEAL -4(R9), R9
  9080. MOVW $0x0015, (AX)
  9081. MOVB R9, 2(AX)
  9082. ADDQ $0x03, AX
  9083. JMP match_nolit_emitcopy_end_encodeBlockAsm10BAvx
  9084. repeat_two_match_nolit_encodeBlockAsm10BAvx_emit_copy_short:
  9085. SHLL $0x02, R9
  9086. ORL $0x01, R9
  9087. MOVW R9, (AX)
  9088. ADDQ $0x02, AX
  9089. JMP match_nolit_emitcopy_end_encodeBlockAsm10BAvx
  9090. repeat_two_offset_match_nolit_encodeBlockAsm10BAvx_emit_copy_short:
  9091. XORQ SI, SI
  9092. LEAL 1(SI)(R9*4), R9
  9093. MOVB BP, 1(AX)
  9094. SARL $0x08, BP
  9095. SHLL $0x05, BP
  9096. ORL BP, R9
  9097. MOVB R9, (AX)
  9098. ADDQ $0x02, AX
  9099. JMP match_nolit_emitcopy_end_encodeBlockAsm10BAvx
  9100. JMP two_byte_offset_match_nolit_encodeBlockAsm10BAvx
  9101. two_byte_offset_short_match_nolit_encodeBlockAsm10BAvx:
  9102. CMPL R9, $0x0c
  9103. JGE emit_copy_three_match_nolit_encodeBlockAsm10BAvx
  9104. CMPL BP, $0x00000800
  9105. JGE emit_copy_three_match_nolit_encodeBlockAsm10BAvx
  9106. MOVB $0x01, BL
  9107. LEAL -16(BX)(R9*4), R9
  9108. MOVB BP, 1(AX)
  9109. SHRL $0x08, BP
  9110. SHLL $0x05, BP
  9111. ORL BP, R9
  9112. MOVB R9, (AX)
  9113. ADDQ $0x02, AX
  9114. JMP match_nolit_emitcopy_end_encodeBlockAsm10BAvx
  9115. emit_copy_three_match_nolit_encodeBlockAsm10BAvx:
  9116. MOVB $0x02, BL
  9117. LEAL -4(BX)(R9*4), R9
  9118. MOVB R9, (AX)
  9119. MOVW BP, 1(AX)
  9120. ADDQ $0x03, AX
  9121. match_nolit_emitcopy_end_encodeBlockAsm10BAvx:
  9122. MOVL CX, 12(SP)
  9123. CMPL CX, 8(SP)
  9124. JGE emit_remainder_encodeBlockAsm10BAvx
  9125. CMPQ AX, (SP)
  9126. JL match_nolit_dst_ok_encodeBlockAsm10BAvx
  9127. MOVQ $0x00000000, ret+48(FP)
  9128. RET
  9129. match_nolit_dst_ok_encodeBlockAsm10BAvx:
  9130. MOVQ -2(DX)(CX*1), SI
  9131. MOVQ $0x9e3779b1, BP
  9132. MOVQ SI, DI
  9133. SHRQ $0x10, SI
  9134. MOVQ SI, R8
  9135. SHLQ $0x20, DI
  9136. IMULQ BP, DI
  9137. SHRQ $0x36, DI
  9138. SHLQ $0x20, R8
  9139. IMULQ BP, R8
  9140. SHRQ $0x36, R8
  9141. LEAL -2(CX), R9
  9142. MOVL 24(SP)(R8*4), BP
  9143. MOVL R9, 24(SP)(DI*4)
  9144. MOVL CX, 24(SP)(R8*4)
  9145. CMPL (DX)(BP*1), SI
  9146. JEQ match_nolit_loop_encodeBlockAsm10BAvx
  9147. INCL CX
  9148. JMP search_loop_encodeBlockAsm10BAvx
  9149. emit_remainder_encodeBlockAsm10BAvx:
  9150. MOVQ src_len+32(FP), CX
  9151. SUBL 12(SP), CX
  9152. LEAQ 4(AX)(CX*1), CX
  9153. CMPQ CX, (SP)
  9154. JL emit_remainder_ok_encodeBlockAsm10BAvx
  9155. MOVQ $0x00000000, ret+48(FP)
  9156. RET
  9157. emit_remainder_ok_encodeBlockAsm10BAvx:
  9158. MOVQ src_len+32(FP), CX
  9159. MOVL 12(SP), BX
  9160. CMPL BX, CX
  9161. JEQ emit_literal_done_emit_remainder_encodeBlockAsm10BAvx
  9162. MOVL CX, BP
  9163. MOVL CX, 12(SP)
  9164. LEAQ (DX)(BX*1), CX
  9165. SUBL BX, BP
  9166. MOVL BP, DX
  9167. SUBL $0x01, DX
  9168. JC emit_literal_done_emit_remainder_encodeBlockAsm10BAvx
  9169. CMPL DX, $0x3c
  9170. JLT one_byte_emit_remainder_encodeBlockAsm10BAvx
  9171. CMPL DX, $0x00000100
  9172. JLT two_bytes_emit_remainder_encodeBlockAsm10BAvx
  9173. CMPL DX, $0x00010000
  9174. JLT three_bytes_emit_remainder_encodeBlockAsm10BAvx
  9175. CMPL DX, $0x01000000
  9176. JLT four_bytes_emit_remainder_encodeBlockAsm10BAvx
  9177. MOVB $0xfc, (AX)
  9178. MOVL DX, 1(AX)
  9179. ADDQ $0x05, AX
  9180. JMP memmove_emit_remainder_encodeBlockAsm10BAvx
  9181. four_bytes_emit_remainder_encodeBlockAsm10BAvx:
  9182. MOVL DX, BX
  9183. SHRL $0x10, BX
  9184. MOVB $0xf8, (AX)
  9185. MOVW DX, 1(AX)
  9186. MOVB BL, 3(AX)
  9187. ADDQ $0x04, AX
  9188. JMP memmove_emit_remainder_encodeBlockAsm10BAvx
  9189. three_bytes_emit_remainder_encodeBlockAsm10BAvx:
  9190. MOVB $0xf4, (AX)
  9191. MOVW DX, 1(AX)
  9192. ADDQ $0x03, AX
  9193. JMP memmove_emit_remainder_encodeBlockAsm10BAvx
  9194. two_bytes_emit_remainder_encodeBlockAsm10BAvx:
  9195. MOVB $0xf0, (AX)
  9196. MOVB DL, 1(AX)
  9197. ADDQ $0x02, AX
  9198. JMP memmove_emit_remainder_encodeBlockAsm10BAvx
  9199. one_byte_emit_remainder_encodeBlockAsm10BAvx:
  9200. SHLB $0x02, DL
  9201. MOVB DL, (AX)
  9202. ADDQ $0x01, AX
  9203. memmove_emit_remainder_encodeBlockAsm10BAvx:
  9204. LEAQ (AX)(BP*1), DX
  9205. MOVL BP, BX
  9206. NOP
  9207. emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_tail:
  9208. TESTQ BX, BX
  9209. JEQ memmove_end_copy_emit_remainder_encodeBlockAsm10BAvx
  9210. CMPQ BX, $0x02
  9211. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_1or2
  9212. CMPQ BX, $0x04
  9213. JB emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_3
  9214. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_4
  9215. CMPQ BX, $0x08
  9216. JB emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_5through7
  9217. JE emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_8
  9218. CMPQ BX, $0x10
  9219. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_9through16
  9220. CMPQ BX, $0x20
  9221. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_17through32
  9222. CMPQ BX, $0x40
  9223. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_33through64
  9224. CMPQ BX, $0x80
  9225. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_65through128
  9226. CMPQ BX, $0x00000100
  9227. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_129through256
  9228. JMP emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_avxUnaligned
  9229. emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_1or2:
  9230. MOVB (CX), BP
  9231. MOVB -1(CX)(BX*1), SI
  9232. MOVB BP, (AX)
  9233. MOVB SI, -1(AX)(BX*1)
  9234. JMP memmove_end_copy_emit_remainder_encodeBlockAsm10BAvx
  9235. emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_4:
  9236. MOVL (CX), BP
  9237. MOVL BP, (AX)
  9238. JMP memmove_end_copy_emit_remainder_encodeBlockAsm10BAvx
  9239. emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_3:
  9240. MOVW (CX), BP
  9241. MOVB 2(CX), SI
  9242. MOVW BP, (AX)
  9243. MOVB SI, 2(AX)
  9244. JMP memmove_end_copy_emit_remainder_encodeBlockAsm10BAvx
  9245. emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_5through7:
  9246. MOVL (CX), BP
  9247. MOVL -4(CX)(BX*1), SI
  9248. MOVL BP, (AX)
  9249. MOVL SI, -4(AX)(BX*1)
  9250. JMP memmove_end_copy_emit_remainder_encodeBlockAsm10BAvx
  9251. emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_8:
  9252. MOVQ (CX), BP
  9253. MOVQ BP, (AX)
  9254. JMP memmove_end_copy_emit_remainder_encodeBlockAsm10BAvx
  9255. emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_9through16:
  9256. MOVQ (CX), BP
  9257. MOVQ -8(CX)(BX*1), SI
  9258. MOVQ BP, (AX)
  9259. MOVQ SI, -8(AX)(BX*1)
  9260. JMP memmove_end_copy_emit_remainder_encodeBlockAsm10BAvx
  9261. emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_17through32:
  9262. MOVOU (CX), X0
  9263. MOVOU -16(CX)(BX*1), X1
  9264. MOVOU X0, (AX)
  9265. MOVOU X1, -16(AX)(BX*1)
  9266. JMP memmove_end_copy_emit_remainder_encodeBlockAsm10BAvx
  9267. emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_33through64:
  9268. MOVOU (CX), X0
  9269. MOVOU 16(CX), X1
  9270. MOVOU -32(CX)(BX*1), X2
  9271. MOVOU -16(CX)(BX*1), X3
  9272. MOVOU X0, (AX)
  9273. MOVOU X1, 16(AX)
  9274. MOVOU X2, -32(AX)(BX*1)
  9275. MOVOU X3, -16(AX)(BX*1)
  9276. JMP memmove_end_copy_emit_remainder_encodeBlockAsm10BAvx
  9277. emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_65through128:
  9278. MOVOU (CX), X0
  9279. MOVOU 16(CX), X1
  9280. MOVOU 32(CX), X2
  9281. MOVOU 48(CX), X3
  9282. MOVOU -64(CX)(BX*1), X12
  9283. MOVOU -48(CX)(BX*1), X13
  9284. MOVOU -32(CX)(BX*1), X14
  9285. MOVOU -16(CX)(BX*1), X15
  9286. MOVOU X0, (AX)
  9287. MOVOU X1, 16(AX)
  9288. MOVOU X2, 32(AX)
  9289. MOVOU X3, 48(AX)
  9290. MOVOU X12, -64(AX)(BX*1)
  9291. MOVOU X13, -48(AX)(BX*1)
  9292. MOVOU X14, -32(AX)(BX*1)
  9293. MOVOU X15, -16(AX)(BX*1)
  9294. JMP memmove_end_copy_emit_remainder_encodeBlockAsm10BAvx
  9295. emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_129through256:
  9296. MOVOU (CX), X0
  9297. MOVOU 16(CX), X1
  9298. MOVOU 32(CX), X2
  9299. MOVOU 48(CX), X3
  9300. MOVOU 64(CX), X4
  9301. MOVOU 80(CX), X5
  9302. MOVOU 96(CX), X6
  9303. MOVOU 112(CX), X7
  9304. MOVOU -128(CX)(BX*1), X8
  9305. MOVOU -112(CX)(BX*1), X9
  9306. MOVOU -96(CX)(BX*1), X10
  9307. MOVOU -80(CX)(BX*1), X11
  9308. MOVOU -64(CX)(BX*1), X12
  9309. MOVOU -48(CX)(BX*1), X13
  9310. MOVOU -32(CX)(BX*1), X14
  9311. MOVOU -16(CX)(BX*1), X15
  9312. MOVOU X0, (AX)
  9313. MOVOU X1, 16(AX)
  9314. MOVOU X2, 32(AX)
  9315. MOVOU X3, 48(AX)
  9316. MOVOU X4, 64(AX)
  9317. MOVOU X5, 80(AX)
  9318. MOVOU X6, 96(AX)
  9319. MOVOU X7, 112(AX)
  9320. MOVOU X8, -128(AX)(BX*1)
  9321. MOVOU X9, -112(AX)(BX*1)
  9322. MOVOU X10, -96(AX)(BX*1)
  9323. MOVOU X11, -80(AX)(BX*1)
  9324. MOVOU X12, -64(AX)(BX*1)
  9325. MOVOU X13, -48(AX)(BX*1)
  9326. MOVOU X14, -32(AX)(BX*1)
  9327. MOVOU X15, -16(AX)(BX*1)
  9328. JMP memmove_end_copy_emit_remainder_encodeBlockAsm10BAvx
  9329. emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_256through2048:
  9330. LEAQ -256(BX), BX
  9331. MOVOU (CX), X0
  9332. MOVOU 16(CX), X1
  9333. MOVOU 32(CX), X2
  9334. MOVOU 48(CX), X3
  9335. MOVOU 64(CX), X4
  9336. MOVOU 80(CX), X5
  9337. MOVOU 96(CX), X6
  9338. MOVOU 112(CX), X7
  9339. MOVOU 128(CX), X8
  9340. MOVOU 144(CX), X9
  9341. MOVOU 160(CX), X10
  9342. MOVOU 176(CX), X11
  9343. MOVOU 192(CX), X12
  9344. MOVOU 208(CX), X13
  9345. MOVOU 224(CX), X14
  9346. MOVOU 240(CX), X15
  9347. MOVOU X0, (AX)
  9348. MOVOU X1, 16(AX)
  9349. MOVOU X2, 32(AX)
  9350. MOVOU X3, 48(AX)
  9351. MOVOU X4, 64(AX)
  9352. MOVOU X5, 80(AX)
  9353. MOVOU X6, 96(AX)
  9354. MOVOU X7, 112(AX)
  9355. MOVOU X8, 128(AX)
  9356. MOVOU X9, 144(AX)
  9357. MOVOU X10, 160(AX)
  9358. MOVOU X11, 176(AX)
  9359. MOVOU X12, 192(AX)
  9360. MOVOU X13, 208(AX)
  9361. MOVOU X14, 224(AX)
  9362. MOVOU X15, 240(AX)
  9363. CMPQ BX, $0x00000100
  9364. LEAQ 256(CX), CX
  9365. LEAQ 256(AX), AX
  9366. JGE emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_256through2048
  9367. JMP emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_tail
  9368. emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_avxUnaligned:
  9369. LEAQ (CX)(BX*1), SI
  9370. MOVQ AX, R8
  9371. MOVOU -128(SI), X5
  9372. MOVOU -112(SI), X6
  9373. MOVQ $0x00000080, BP
  9374. ANDQ $0xffffffe0, AX
  9375. ADDQ $0x20, AX
  9376. MOVOU -96(SI), X7
  9377. MOVOU -80(SI), X8
  9378. MOVQ AX, DI
  9379. SUBQ R8, DI
  9380. MOVOU -64(SI), X9
  9381. MOVOU -48(SI), X10
  9382. SUBQ DI, BX
  9383. MOVOU -32(SI), X11
  9384. MOVOU -16(SI), X12
  9385. VMOVDQU (CX), Y4
  9386. ADDQ DI, CX
  9387. SUBQ BP, BX
  9388. emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_gobble_128_loop:
  9389. VMOVDQU (CX), Y0
  9390. VMOVDQU 32(CX), Y1
  9391. VMOVDQU 64(CX), Y2
  9392. VMOVDQU 96(CX), Y3
  9393. ADDQ BP, CX
  9394. VMOVDQA Y0, (AX)
  9395. VMOVDQA Y1, 32(AX)
  9396. VMOVDQA Y2, 64(AX)
  9397. VMOVDQA Y3, 96(AX)
  9398. ADDQ BP, AX
  9399. SUBQ BP, BX
  9400. JA emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_gobble_128_loop
  9401. ADDQ BP, BX
  9402. ADDQ AX, BX
  9403. VMOVDQU Y4, (R8)
  9404. VZEROUPPER
  9405. MOVOU X5, -128(BX)
  9406. MOVOU X6, -112(BX)
  9407. MOVOU X7, -96(BX)
  9408. MOVOU X8, -80(BX)
  9409. MOVOU X9, -64(BX)
  9410. MOVOU X10, -48(BX)
  9411. MOVOU X11, -32(BX)
  9412. MOVOU X12, -16(BX)
  9413. memmove_end_copy_emit_remainder_encodeBlockAsm10BAvx:
  9414. MOVQ DX, AX
  9415. emit_literal_done_emit_remainder_encodeBlockAsm10BAvx:
  9416. MOVQ dst_base+0(FP), CX
  9417. SUBQ CX, AX
  9418. MOVQ AX, ret+48(FP)
  9419. RET
  9420. // func encodeBlockAsm8BAvx(dst []byte, src []byte) int
  9421. // Requires: AVX, SSE2
  9422. TEXT ·encodeBlockAsm8BAvx(SB), $1048-56
  9423. MOVQ dst_base+0(FP), AX
  9424. MOVQ $0x00000008, CX
  9425. LEAQ 24(SP), DX
  9426. PXOR X0, X0
  9427. zero_loop_encodeBlockAsm8BAvx:
  9428. MOVOU X0, (DX)
  9429. MOVOU X0, 16(DX)
  9430. MOVOU X0, 32(DX)
  9431. MOVOU X0, 48(DX)
  9432. MOVOU X0, 64(DX)
  9433. MOVOU X0, 80(DX)
  9434. MOVOU X0, 96(DX)
  9435. MOVOU X0, 112(DX)
  9436. ADDQ $0x80, DX
  9437. DECQ CX
  9438. JNZ zero_loop_encodeBlockAsm8BAvx
  9439. MOVL $0x00000000, 12(SP)
  9440. MOVQ src_len+32(FP), CX
  9441. LEAQ -5(CX), DX
  9442. LEAQ -8(CX), BP
  9443. MOVL BP, 8(SP)
  9444. SHRQ $0x05, CX
  9445. SUBL CX, DX
  9446. LEAQ (AX)(DX*1), DX
  9447. MOVQ DX, (SP)
  9448. MOVL $0x00000001, CX
  9449. MOVL CX, 16(SP)
  9450. MOVQ src_base+24(FP), DX
  9451. search_loop_encodeBlockAsm8BAvx:
  9452. MOVQ (DX)(CX*1), SI
  9453. MOVL CX, BP
  9454. SUBL 12(SP), BP
  9455. SHRL $0x04, BP
  9456. LEAL 4(CX)(BP*1), BP
  9457. MOVL 8(SP), DI
  9458. CMPL BP, DI
  9459. JGT emit_remainder_encodeBlockAsm8BAvx
  9460. MOVL BP, 20(SP)
  9461. MOVQ $0x9e3779b1, R8
  9462. MOVQ SI, R9
  9463. MOVQ SI, R10
  9464. SHRQ $0x08, R10
  9465. SHLQ $0x20, R9
  9466. IMULQ R8, R9
  9467. SHRQ $0x38, R9
  9468. SHLQ $0x20, R10
  9469. IMULQ R8, R10
  9470. SHRQ $0x38, R10
  9471. MOVL 24(SP)(R9*4), BP
  9472. MOVL 24(SP)(R10*4), DI
  9473. MOVL CX, 24(SP)(R9*4)
  9474. LEAL 1(CX), R9
  9475. MOVL R9, 24(SP)(R10*4)
  9476. MOVQ SI, R9
  9477. SHRQ $0x10, R9
  9478. SHLQ $0x20, R9
  9479. IMULQ R8, R9
  9480. SHRQ $0x38, R9
  9481. MOVL CX, R8
  9482. SUBL 16(SP), R8
  9483. MOVL 1(DX)(R8*1), R10
  9484. MOVQ SI, R8
  9485. SHRQ $0x08, R8
  9486. CMPL R8, R10
  9487. JNE no_repeat_found_encodeBlockAsm8BAvx
  9488. LEAL 1(CX), SI
  9489. MOVL 12(SP), DI
  9490. MOVL SI, BP
  9491. SUBL 16(SP), BP
  9492. JZ repeat_extend_back_end_encodeBlockAsm8BAvx
  9493. repeat_extend_back_loop_encodeBlockAsm8BAvx:
  9494. CMPL SI, DI
  9495. JLE repeat_extend_back_end_encodeBlockAsm8BAvx
  9496. MOVB -1(DX)(BP*1), BL
  9497. MOVB -1(DX)(SI*1), R8
  9498. CMPB BL, R8
  9499. JNE repeat_extend_back_end_encodeBlockAsm8BAvx
  9500. LEAL -1(SI), SI
  9501. DECL BP
  9502. JNZ repeat_extend_back_loop_encodeBlockAsm8BAvx
  9503. repeat_extend_back_end_encodeBlockAsm8BAvx:
  9504. MOVL 12(SP), BP
  9505. CMPL BP, SI
  9506. JEQ emit_literal_done_repeat_emit_encodeBlockAsm8BAvx
  9507. MOVL SI, R8
  9508. MOVL SI, 12(SP)
  9509. LEAQ (DX)(BP*1), R9
  9510. SUBL BP, R8
  9511. MOVL R8, BP
  9512. SUBL $0x01, BP
  9513. JC emit_literal_done_repeat_emit_encodeBlockAsm8BAvx
  9514. CMPL BP, $0x3c
  9515. JLT one_byte_repeat_emit_encodeBlockAsm8BAvx
  9516. CMPL BP, $0x00000100
  9517. JLT two_bytes_repeat_emit_encodeBlockAsm8BAvx
  9518. CMPL BP, $0x00010000
  9519. JLT three_bytes_repeat_emit_encodeBlockAsm8BAvx
  9520. CMPL BP, $0x01000000
  9521. JLT four_bytes_repeat_emit_encodeBlockAsm8BAvx
  9522. MOVB $0xfc, (AX)
  9523. MOVL BP, 1(AX)
  9524. ADDQ $0x05, AX
  9525. JMP memmove_repeat_emit_encodeBlockAsm8BAvx
  9526. four_bytes_repeat_emit_encodeBlockAsm8BAvx:
  9527. MOVL BP, R10
  9528. SHRL $0x10, R10
  9529. MOVB $0xf8, (AX)
  9530. MOVW BP, 1(AX)
  9531. MOVB R10, 3(AX)
  9532. ADDQ $0x04, AX
  9533. JMP memmove_repeat_emit_encodeBlockAsm8BAvx
  9534. three_bytes_repeat_emit_encodeBlockAsm8BAvx:
  9535. MOVB $0xf4, (AX)
  9536. MOVW BP, 1(AX)
  9537. ADDQ $0x03, AX
  9538. JMP memmove_repeat_emit_encodeBlockAsm8BAvx
  9539. two_bytes_repeat_emit_encodeBlockAsm8BAvx:
  9540. MOVB $0xf0, (AX)
  9541. MOVB BP, 1(AX)
  9542. ADDQ $0x02, AX
  9543. JMP memmove_repeat_emit_encodeBlockAsm8BAvx
  9544. one_byte_repeat_emit_encodeBlockAsm8BAvx:
  9545. SHLB $0x02, BP
  9546. MOVB BP, (AX)
  9547. ADDQ $0x01, AX
  9548. memmove_repeat_emit_encodeBlockAsm8BAvx:
  9549. LEAQ (AX)(R8*1), BP
  9550. NOP
  9551. emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_tail:
  9552. TESTQ R8, R8
  9553. JEQ memmove_end_copy_repeat_emit_encodeBlockAsm8BAvx
  9554. CMPQ R8, $0x02
  9555. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_1or2
  9556. CMPQ R8, $0x04
  9557. JB emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_3
  9558. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_4
  9559. CMPQ R8, $0x08
  9560. JB emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_5through7
  9561. JE emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_8
  9562. CMPQ R8, $0x10
  9563. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_9through16
  9564. CMPQ R8, $0x20
  9565. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_17through32
  9566. CMPQ R8, $0x40
  9567. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_33through64
  9568. CMPQ R8, $0x80
  9569. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_65through128
  9570. CMPQ R8, $0x00000100
  9571. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_129through256
  9572. JMP emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_avxUnaligned
  9573. emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_1or2:
  9574. MOVB (R9), R10
  9575. MOVB -1(R9)(R8*1), R11
  9576. MOVB R10, (AX)
  9577. MOVB R11, -1(AX)(R8*1)
  9578. JMP memmove_end_copy_repeat_emit_encodeBlockAsm8BAvx
  9579. emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_4:
  9580. MOVL (R9), R10
  9581. MOVL R10, (AX)
  9582. JMP memmove_end_copy_repeat_emit_encodeBlockAsm8BAvx
  9583. emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_3:
  9584. MOVW (R9), R10
  9585. MOVB 2(R9), R11
  9586. MOVW R10, (AX)
  9587. MOVB R11, 2(AX)
  9588. JMP memmove_end_copy_repeat_emit_encodeBlockAsm8BAvx
  9589. emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_5through7:
  9590. MOVL (R9), R10
  9591. MOVL -4(R9)(R8*1), R11
  9592. MOVL R10, (AX)
  9593. MOVL R11, -4(AX)(R8*1)
  9594. JMP memmove_end_copy_repeat_emit_encodeBlockAsm8BAvx
  9595. emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_8:
  9596. MOVQ (R9), R10
  9597. MOVQ R10, (AX)
  9598. JMP memmove_end_copy_repeat_emit_encodeBlockAsm8BAvx
  9599. emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_9through16:
  9600. MOVQ (R9), R10
  9601. MOVQ -8(R9)(R8*1), R11
  9602. MOVQ R10, (AX)
  9603. MOVQ R11, -8(AX)(R8*1)
  9604. JMP memmove_end_copy_repeat_emit_encodeBlockAsm8BAvx
  9605. emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_17through32:
  9606. MOVOU (R9), X0
  9607. MOVOU -16(R9)(R8*1), X1
  9608. MOVOU X0, (AX)
  9609. MOVOU X1, -16(AX)(R8*1)
  9610. JMP memmove_end_copy_repeat_emit_encodeBlockAsm8BAvx
  9611. emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_33through64:
  9612. MOVOU (R9), X0
  9613. MOVOU 16(R9), X1
  9614. MOVOU -32(R9)(R8*1), X2
  9615. MOVOU -16(R9)(R8*1), X3
  9616. MOVOU X0, (AX)
  9617. MOVOU X1, 16(AX)
  9618. MOVOU X2, -32(AX)(R8*1)
  9619. MOVOU X3, -16(AX)(R8*1)
  9620. JMP memmove_end_copy_repeat_emit_encodeBlockAsm8BAvx
  9621. emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_65through128:
  9622. MOVOU (R9), X0
  9623. MOVOU 16(R9), X1
  9624. MOVOU 32(R9), X2
  9625. MOVOU 48(R9), X3
  9626. MOVOU -64(R9)(R8*1), X12
  9627. MOVOU -48(R9)(R8*1), X13
  9628. MOVOU -32(R9)(R8*1), X14
  9629. MOVOU -16(R9)(R8*1), X15
  9630. MOVOU X0, (AX)
  9631. MOVOU X1, 16(AX)
  9632. MOVOU X2, 32(AX)
  9633. MOVOU X3, 48(AX)
  9634. MOVOU X12, -64(AX)(R8*1)
  9635. MOVOU X13, -48(AX)(R8*1)
  9636. MOVOU X14, -32(AX)(R8*1)
  9637. MOVOU X15, -16(AX)(R8*1)
  9638. JMP memmove_end_copy_repeat_emit_encodeBlockAsm8BAvx
  9639. emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_129through256:
  9640. MOVOU (R9), X0
  9641. MOVOU 16(R9), X1
  9642. MOVOU 32(R9), X2
  9643. MOVOU 48(R9), X3
  9644. MOVOU 64(R9), X4
  9645. MOVOU 80(R9), X5
  9646. MOVOU 96(R9), X6
  9647. MOVOU 112(R9), X7
  9648. MOVOU -128(R9)(R8*1), X8
  9649. MOVOU -112(R9)(R8*1), X9
  9650. MOVOU -96(R9)(R8*1), X10
  9651. MOVOU -80(R9)(R8*1), X11
  9652. MOVOU -64(R9)(R8*1), X12
  9653. MOVOU -48(R9)(R8*1), X13
  9654. MOVOU -32(R9)(R8*1), X14
  9655. MOVOU -16(R9)(R8*1), X15
  9656. MOVOU X0, (AX)
  9657. MOVOU X1, 16(AX)
  9658. MOVOU X2, 32(AX)
  9659. MOVOU X3, 48(AX)
  9660. MOVOU X4, 64(AX)
  9661. MOVOU X5, 80(AX)
  9662. MOVOU X6, 96(AX)
  9663. MOVOU X7, 112(AX)
  9664. MOVOU X8, -128(AX)(R8*1)
  9665. MOVOU X9, -112(AX)(R8*1)
  9666. MOVOU X10, -96(AX)(R8*1)
  9667. MOVOU X11, -80(AX)(R8*1)
  9668. MOVOU X12, -64(AX)(R8*1)
  9669. MOVOU X13, -48(AX)(R8*1)
  9670. MOVOU X14, -32(AX)(R8*1)
  9671. MOVOU X15, -16(AX)(R8*1)
  9672. JMP memmove_end_copy_repeat_emit_encodeBlockAsm8BAvx
  9673. emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_256through2048:
  9674. LEAQ -256(R8), R8
  9675. MOVOU (R9), X0
  9676. MOVOU 16(R9), X1
  9677. MOVOU 32(R9), X2
  9678. MOVOU 48(R9), X3
  9679. MOVOU 64(R9), X4
  9680. MOVOU 80(R9), X5
  9681. MOVOU 96(R9), X6
  9682. MOVOU 112(R9), X7
  9683. MOVOU 128(R9), X8
  9684. MOVOU 144(R9), X9
  9685. MOVOU 160(R9), X10
  9686. MOVOU 176(R9), X11
  9687. MOVOU 192(R9), X12
  9688. MOVOU 208(R9), X13
  9689. MOVOU 224(R9), X14
  9690. MOVOU 240(R9), X15
  9691. MOVOU X0, (AX)
  9692. MOVOU X1, 16(AX)
  9693. MOVOU X2, 32(AX)
  9694. MOVOU X3, 48(AX)
  9695. MOVOU X4, 64(AX)
  9696. MOVOU X5, 80(AX)
  9697. MOVOU X6, 96(AX)
  9698. MOVOU X7, 112(AX)
  9699. MOVOU X8, 128(AX)
  9700. MOVOU X9, 144(AX)
  9701. MOVOU X10, 160(AX)
  9702. MOVOU X11, 176(AX)
  9703. MOVOU X12, 192(AX)
  9704. MOVOU X13, 208(AX)
  9705. MOVOU X14, 224(AX)
  9706. MOVOU X15, 240(AX)
  9707. CMPQ R8, $0x00000100
  9708. LEAQ 256(R9), R9
  9709. LEAQ 256(AX), AX
  9710. JGE emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_256through2048
  9711. JMP emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_tail
  9712. emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_avxUnaligned:
  9713. LEAQ (R9)(R8*1), R11
  9714. MOVQ AX, R13
  9715. MOVOU -128(R11), X5
  9716. MOVOU -112(R11), X6
  9717. MOVQ $0x00000080, R10
  9718. ANDQ $0xffffffe0, AX
  9719. ADDQ $0x20, AX
  9720. MOVOU -96(R11), X7
  9721. MOVOU -80(R11), X8
  9722. MOVQ AX, R12
  9723. SUBQ R13, R12
  9724. MOVOU -64(R11), X9
  9725. MOVOU -48(R11), X10
  9726. SUBQ R12, R8
  9727. MOVOU -32(R11), X11
  9728. MOVOU -16(R11), X12
  9729. VMOVDQU (R9), Y4
  9730. ADDQ R12, R9
  9731. SUBQ R10, R8
  9732. emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_gobble_128_loop:
  9733. VMOVDQU (R9), Y0
  9734. VMOVDQU 32(R9), Y1
  9735. VMOVDQU 64(R9), Y2
  9736. VMOVDQU 96(R9), Y3
  9737. ADDQ R10, R9
  9738. VMOVDQA Y0, (AX)
  9739. VMOVDQA Y1, 32(AX)
  9740. VMOVDQA Y2, 64(AX)
  9741. VMOVDQA Y3, 96(AX)
  9742. ADDQ R10, AX
  9743. SUBQ R10, R8
  9744. JA emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_gobble_128_loop
  9745. ADDQ R10, R8
  9746. ADDQ AX, R8
  9747. VMOVDQU Y4, (R13)
  9748. VZEROUPPER
  9749. MOVOU X5, -128(R8)
  9750. MOVOU X6, -112(R8)
  9751. MOVOU X7, -96(R8)
  9752. MOVOU X8, -80(R8)
  9753. MOVOU X9, -64(R8)
  9754. MOVOU X10, -48(R8)
  9755. MOVOU X11, -32(R8)
  9756. MOVOU X12, -16(R8)
  9757. memmove_end_copy_repeat_emit_encodeBlockAsm8BAvx:
  9758. MOVQ BP, AX
  9759. emit_literal_done_repeat_emit_encodeBlockAsm8BAvx:
  9760. ADDL $0x05, CX
  9761. MOVL CX, BP
  9762. SUBL 16(SP), BP
  9763. MOVQ src_len+32(FP), R8
  9764. SUBL CX, R8
  9765. LEAQ (DX)(CX*1), R9
  9766. LEAQ (DX)(BP*1), BP
  9767. XORL R11, R11
  9768. CMPL R8, $0x08
  9769. JL matchlen_single_repeat_extend
  9770. matchlen_loopback_repeat_extend:
  9771. MOVQ (R9)(R11*1), R10
  9772. XORQ (BP)(R11*1), R10
  9773. TESTQ R10, R10
  9774. JZ matchlen_loop_repeat_extend
  9775. BSFQ R10, R10
  9776. SARQ $0x03, R10
  9777. LEAL (R11)(R10*1), R11
  9778. JMP repeat_extend_forward_end_encodeBlockAsm8BAvx
  9779. matchlen_loop_repeat_extend:
  9780. LEAL -8(R8), R8
  9781. LEAL 8(R11), R11
  9782. CMPL R8, $0x08
  9783. JGE matchlen_loopback_repeat_extend
  9784. matchlen_single_repeat_extend:
  9785. TESTL R8, R8
  9786. JZ repeat_extend_forward_end_encodeBlockAsm8BAvx
  9787. matchlen_single_loopback_repeat_extend:
  9788. MOVB (R9)(R11*1), R10
  9789. CMPB (BP)(R11*1), R10
  9790. JNE repeat_extend_forward_end_encodeBlockAsm8BAvx
  9791. LEAL 1(R11), R11
  9792. DECL R8
  9793. JNZ matchlen_single_loopback_repeat_extend
  9794. repeat_extend_forward_end_encodeBlockAsm8BAvx:
  9795. ADDL R11, CX
  9796. MOVL CX, BP
  9797. SUBL SI, BP
  9798. MOVL 16(SP), SI
  9799. TESTL DI, DI
  9800. JZ repeat_as_copy_encodeBlockAsm8BAvx
  9801. emit_repeat_again_match_repeat_encodeBlockAsm8BAvx:
  9802. MOVL BP, DI
  9803. LEAL -4(BP), BP
  9804. CMPL DI, $0x08
  9805. JLE repeat_two_match_repeat_encodeBlockAsm8BAvx
  9806. CMPL DI, $0x0c
  9807. JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm8BAvx
  9808. CMPL SI, $0x00000800
  9809. JLT repeat_two_offset_match_repeat_encodeBlockAsm8BAvx
  9810. cant_repeat_two_offset_match_repeat_encodeBlockAsm8BAvx:
  9811. CMPL BP, $0x00000104
  9812. JLT repeat_three_match_repeat_encodeBlockAsm8BAvx
  9813. CMPL BP, $0x00010100
  9814. JLT repeat_four_match_repeat_encodeBlockAsm8BAvx
  9815. CMPL BP, $0x0100ffff
  9816. JLT repeat_five_match_repeat_encodeBlockAsm8BAvx
  9817. LEAL -16842747(BP), BP
  9818. MOVW $0x001d, (AX)
  9819. MOVW $0xfffb, 2(AX)
  9820. MOVB $0xff, 4(AX)
  9821. ADDQ $0x05, AX
  9822. JMP emit_repeat_again_match_repeat_encodeBlockAsm8BAvx
  9823. repeat_five_match_repeat_encodeBlockAsm8BAvx:
  9824. LEAL -65536(BP), BP
  9825. MOVL BP, SI
  9826. MOVW $0x001d, (AX)
  9827. MOVW BP, 2(AX)
  9828. SARL $0x10, SI
  9829. MOVB SI, 4(AX)
  9830. ADDQ $0x05, AX
  9831. JMP repeat_end_emit_encodeBlockAsm8BAvx
  9832. repeat_four_match_repeat_encodeBlockAsm8BAvx:
  9833. LEAL -256(BP), BP
  9834. MOVW $0x0019, (AX)
  9835. MOVW BP, 2(AX)
  9836. ADDQ $0x04, AX
  9837. JMP repeat_end_emit_encodeBlockAsm8BAvx
  9838. repeat_three_match_repeat_encodeBlockAsm8BAvx:
  9839. LEAL -4(BP), BP
  9840. MOVW $0x0015, (AX)
  9841. MOVB BP, 2(AX)
  9842. ADDQ $0x03, AX
  9843. JMP repeat_end_emit_encodeBlockAsm8BAvx
  9844. repeat_two_match_repeat_encodeBlockAsm8BAvx:
  9845. SHLL $0x02, BP
  9846. ORL $0x01, BP
  9847. MOVW BP, (AX)
  9848. ADDQ $0x02, AX
  9849. JMP repeat_end_emit_encodeBlockAsm8BAvx
  9850. repeat_two_offset_match_repeat_encodeBlockAsm8BAvx:
  9851. XORQ DI, DI
  9852. LEAL 1(DI)(BP*4), BP
  9853. MOVB SI, 1(AX)
  9854. SARL $0x08, SI
  9855. SHLL $0x05, SI
  9856. ORL SI, BP
  9857. MOVB BP, (AX)
  9858. ADDQ $0x02, AX
  9859. JMP repeat_end_emit_encodeBlockAsm8BAvx
  9860. repeat_as_copy_encodeBlockAsm8BAvx:
  9861. CMPL SI, $0x00010000
  9862. JL two_byte_offset_repeat_as_copy_encodeBlockAsm8BAvx
  9863. four_bytes_loop_back_repeat_as_copy_encodeBlockAsm8BAvx:
  9864. CMPL BP, $0x40
  9865. JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm8BAvx
  9866. MOVB $0xff, (AX)
  9867. MOVL SI, 1(AX)
  9868. LEAL -64(BP), BP
  9869. ADDQ $0x05, AX
  9870. CMPL BP, $0x04
  9871. JL four_bytes_remain_repeat_as_copy_encodeBlockAsm8BAvx
  9872. emit_repeat_again_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy:
  9873. MOVL BP, DI
  9874. LEAL -4(BP), BP
  9875. CMPL DI, $0x08
  9876. JLE repeat_two_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy
  9877. CMPL DI, $0x0c
  9878. JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy
  9879. CMPL SI, $0x00000800
  9880. JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy
  9881. cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy:
  9882. CMPL BP, $0x00000104
  9883. JLT repeat_three_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy
  9884. CMPL BP, $0x00010100
  9885. JLT repeat_four_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy
  9886. CMPL BP, $0x0100ffff
  9887. JLT repeat_five_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy
  9888. LEAL -16842747(BP), BP
  9889. MOVW $0x001d, (AX)
  9890. MOVW $0xfffb, 2(AX)
  9891. MOVB $0xff, 4(AX)
  9892. ADDQ $0x05, AX
  9893. JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy
  9894. repeat_five_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy:
  9895. LEAL -65536(BP), BP
  9896. MOVL BP, SI
  9897. MOVW $0x001d, (AX)
  9898. MOVW BP, 2(AX)
  9899. SARL $0x10, SI
  9900. MOVB SI, 4(AX)
  9901. ADDQ $0x05, AX
  9902. JMP repeat_end_emit_encodeBlockAsm8BAvx
  9903. repeat_four_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy:
  9904. LEAL -256(BP), BP
  9905. MOVW $0x0019, (AX)
  9906. MOVW BP, 2(AX)
  9907. ADDQ $0x04, AX
  9908. JMP repeat_end_emit_encodeBlockAsm8BAvx
  9909. repeat_three_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy:
  9910. LEAL -4(BP), BP
  9911. MOVW $0x0015, (AX)
  9912. MOVB BP, 2(AX)
  9913. ADDQ $0x03, AX
  9914. JMP repeat_end_emit_encodeBlockAsm8BAvx
  9915. repeat_two_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy:
  9916. SHLL $0x02, BP
  9917. ORL $0x01, BP
  9918. MOVW BP, (AX)
  9919. ADDQ $0x02, AX
  9920. JMP repeat_end_emit_encodeBlockAsm8BAvx
  9921. repeat_two_offset_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy:
  9922. XORQ DI, DI
  9923. LEAL 1(DI)(BP*4), BP
  9924. MOVB SI, 1(AX)
  9925. SARL $0x08, SI
  9926. SHLL $0x05, SI
  9927. ORL SI, BP
  9928. MOVB BP, (AX)
  9929. ADDQ $0x02, AX
  9930. JMP repeat_end_emit_encodeBlockAsm8BAvx
  9931. JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm8BAvx
  9932. four_bytes_remain_repeat_as_copy_encodeBlockAsm8BAvx:
  9933. TESTL BP, BP
  9934. JZ repeat_end_emit_encodeBlockAsm8BAvx
  9935. MOVB $0x03, BL
  9936. LEAL -4(BX)(BP*4), BP
  9937. MOVB BP, (AX)
  9938. MOVL SI, 1(AX)
  9939. ADDQ $0x05, AX
  9940. JMP repeat_end_emit_encodeBlockAsm8BAvx
  9941. two_byte_offset_repeat_as_copy_encodeBlockAsm8BAvx:
  9942. CMPL BP, $0x40
  9943. JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm8BAvx
  9944. MOVB $0xee, (AX)
  9945. MOVW SI, 1(AX)
  9946. LEAL -60(BP), BP
  9947. ADDQ $0x03, AX
  9948. emit_repeat_again_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy_short:
  9949. MOVL BP, DI
  9950. LEAL -4(BP), BP
  9951. CMPL DI, $0x08
  9952. JLE repeat_two_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy_short
  9953. CMPL DI, $0x0c
  9954. JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy_short
  9955. CMPL SI, $0x00000800
  9956. JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy_short
  9957. cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy_short:
  9958. CMPL BP, $0x00000104
  9959. JLT repeat_three_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy_short
  9960. CMPL BP, $0x00010100
  9961. JLT repeat_four_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy_short
  9962. CMPL BP, $0x0100ffff
  9963. JLT repeat_five_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy_short
  9964. LEAL -16842747(BP), BP
  9965. MOVW $0x001d, (AX)
  9966. MOVW $0xfffb, 2(AX)
  9967. MOVB $0xff, 4(AX)
  9968. ADDQ $0x05, AX
  9969. JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy_short
  9970. repeat_five_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy_short:
  9971. LEAL -65536(BP), BP
  9972. MOVL BP, SI
  9973. MOVW $0x001d, (AX)
  9974. MOVW BP, 2(AX)
  9975. SARL $0x10, SI
  9976. MOVB SI, 4(AX)
  9977. ADDQ $0x05, AX
  9978. JMP repeat_end_emit_encodeBlockAsm8BAvx
  9979. repeat_four_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy_short:
  9980. LEAL -256(BP), BP
  9981. MOVW $0x0019, (AX)
  9982. MOVW BP, 2(AX)
  9983. ADDQ $0x04, AX
  9984. JMP repeat_end_emit_encodeBlockAsm8BAvx
  9985. repeat_three_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy_short:
  9986. LEAL -4(BP), BP
  9987. MOVW $0x0015, (AX)
  9988. MOVB BP, 2(AX)
  9989. ADDQ $0x03, AX
  9990. JMP repeat_end_emit_encodeBlockAsm8BAvx
  9991. repeat_two_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy_short:
  9992. SHLL $0x02, BP
  9993. ORL $0x01, BP
  9994. MOVW BP, (AX)
  9995. ADDQ $0x02, AX
  9996. JMP repeat_end_emit_encodeBlockAsm8BAvx
  9997. repeat_two_offset_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy_short:
  9998. XORQ DI, DI
  9999. LEAL 1(DI)(BP*4), BP
  10000. MOVB SI, 1(AX)
  10001. SARL $0x08, SI
  10002. SHLL $0x05, SI
  10003. ORL SI, BP
  10004. MOVB BP, (AX)
  10005. ADDQ $0x02, AX
  10006. JMP repeat_end_emit_encodeBlockAsm8BAvx
  10007. JMP two_byte_offset_repeat_as_copy_encodeBlockAsm8BAvx
  10008. two_byte_offset_short_repeat_as_copy_encodeBlockAsm8BAvx:
  10009. CMPL BP, $0x0c
  10010. JGE emit_copy_three_repeat_as_copy_encodeBlockAsm8BAvx
  10011. CMPL SI, $0x00000800
  10012. JGE emit_copy_three_repeat_as_copy_encodeBlockAsm8BAvx
  10013. MOVB $0x01, BL
  10014. LEAL -16(BX)(BP*4), BP
  10015. MOVB SI, 1(AX)
  10016. SHRL $0x08, SI
  10017. SHLL $0x05, SI
  10018. ORL SI, BP
  10019. MOVB BP, (AX)
  10020. ADDQ $0x02, AX
  10021. JMP repeat_end_emit_encodeBlockAsm8BAvx
  10022. emit_copy_three_repeat_as_copy_encodeBlockAsm8BAvx:
  10023. MOVB $0x02, BL
  10024. LEAL -4(BX)(BP*4), BP
  10025. MOVB BP, (AX)
  10026. MOVW SI, 1(AX)
  10027. ADDQ $0x03, AX
  10028. repeat_end_emit_encodeBlockAsm8BAvx:
  10029. MOVL CX, 12(SP)
  10030. CMPL CX, 8(SP)
  10031. JGE emit_remainder_encodeBlockAsm8BAvx
  10032. JMP search_loop_encodeBlockAsm8BAvx
  10033. no_repeat_found_encodeBlockAsm8BAvx:
  10034. CMPL (DX)(BP*1), SI
  10035. JEQ candidate_match_encodeBlockAsm8BAvx
  10036. SHRQ $0x08, SI
  10037. MOVL 24(SP)(R9*4), BP
  10038. LEAL 2(CX), R8
  10039. CMPL (DX)(DI*1), SI
  10040. JEQ candidate2_match_encodeBlockAsm8BAvx
  10041. MOVL R8, 24(SP)(R9*4)
  10042. SHRQ $0x08, SI
  10043. CMPL (DX)(BP*1), SI
  10044. JEQ candidate3_match_encodeBlockAsm8BAvx
  10045. MOVL 20(SP), CX
  10046. JMP search_loop_encodeBlockAsm8BAvx
  10047. candidate3_match_encodeBlockAsm8BAvx:
  10048. ADDL $0x02, CX
  10049. JMP candidate_match_encodeBlockAsm8BAvx
  10050. candidate2_match_encodeBlockAsm8BAvx:
  10051. MOVL R8, 24(SP)(R9*4)
  10052. INCL CX
  10053. MOVL DI, BP
  10054. candidate_match_encodeBlockAsm8BAvx:
  10055. MOVL 12(SP), SI
  10056. TESTL BP, BP
  10057. JZ match_extend_back_end_encodeBlockAsm8BAvx
  10058. match_extend_back_loop_encodeBlockAsm8BAvx:
  10059. CMPL CX, SI
  10060. JLE match_extend_back_end_encodeBlockAsm8BAvx
  10061. MOVB -1(DX)(BP*1), BL
  10062. MOVB -1(DX)(CX*1), DI
  10063. CMPB BL, DI
  10064. JNE match_extend_back_end_encodeBlockAsm8BAvx
  10065. LEAL -1(CX), CX
  10066. DECL BP
  10067. JZ match_extend_back_end_encodeBlockAsm8BAvx
  10068. JMP match_extend_back_loop_encodeBlockAsm8BAvx
  10069. match_extend_back_end_encodeBlockAsm8BAvx:
  10070. MOVL CX, SI
  10071. SUBL 12(SP), SI
  10072. LEAQ 4(AX)(SI*1), SI
  10073. CMPQ SI, (SP)
  10074. JL match_dst_size_check_encodeBlockAsm8BAvx
  10075. MOVQ $0x00000000, ret+48(FP)
  10076. RET
  10077. match_dst_size_check_encodeBlockAsm8BAvx:
  10078. MOVL CX, SI
  10079. MOVL 12(SP), DI
  10080. CMPL DI, SI
  10081. JEQ emit_literal_done_match_emit_encodeBlockAsm8BAvx
  10082. MOVL SI, R8
  10083. MOVL SI, 12(SP)
  10084. LEAQ (DX)(DI*1), SI
  10085. SUBL DI, R8
  10086. MOVL R8, DI
  10087. SUBL $0x01, DI
  10088. JC emit_literal_done_match_emit_encodeBlockAsm8BAvx
  10089. CMPL DI, $0x3c
  10090. JLT one_byte_match_emit_encodeBlockAsm8BAvx
  10091. CMPL DI, $0x00000100
  10092. JLT two_bytes_match_emit_encodeBlockAsm8BAvx
  10093. CMPL DI, $0x00010000
  10094. JLT three_bytes_match_emit_encodeBlockAsm8BAvx
  10095. CMPL DI, $0x01000000
  10096. JLT four_bytes_match_emit_encodeBlockAsm8BAvx
  10097. MOVB $0xfc, (AX)
  10098. MOVL DI, 1(AX)
  10099. ADDQ $0x05, AX
  10100. JMP memmove_match_emit_encodeBlockAsm8BAvx
  10101. four_bytes_match_emit_encodeBlockAsm8BAvx:
  10102. MOVL DI, R9
  10103. SHRL $0x10, R9
  10104. MOVB $0xf8, (AX)
  10105. MOVW DI, 1(AX)
  10106. MOVB R9, 3(AX)
  10107. ADDQ $0x04, AX
  10108. JMP memmove_match_emit_encodeBlockAsm8BAvx
  10109. three_bytes_match_emit_encodeBlockAsm8BAvx:
  10110. MOVB $0xf4, (AX)
  10111. MOVW DI, 1(AX)
  10112. ADDQ $0x03, AX
  10113. JMP memmove_match_emit_encodeBlockAsm8BAvx
  10114. two_bytes_match_emit_encodeBlockAsm8BAvx:
  10115. MOVB $0xf0, (AX)
  10116. MOVB DI, 1(AX)
  10117. ADDQ $0x02, AX
  10118. JMP memmove_match_emit_encodeBlockAsm8BAvx
  10119. one_byte_match_emit_encodeBlockAsm8BAvx:
  10120. SHLB $0x02, DI
  10121. MOVB DI, (AX)
  10122. ADDQ $0x01, AX
  10123. memmove_match_emit_encodeBlockAsm8BAvx:
  10124. LEAQ (AX)(R8*1), DI
  10125. NOP
  10126. emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_tail:
  10127. TESTQ R8, R8
  10128. JEQ memmove_end_copy_match_emit_encodeBlockAsm8BAvx
  10129. CMPQ R8, $0x02
  10130. JBE emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_1or2
  10131. CMPQ R8, $0x04
  10132. JB emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_3
  10133. JBE emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_4
  10134. CMPQ R8, $0x08
  10135. JB emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_5through7
  10136. JE emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_8
  10137. CMPQ R8, $0x10
  10138. JBE emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_9through16
  10139. CMPQ R8, $0x20
  10140. JBE emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_17through32
  10141. CMPQ R8, $0x40
  10142. JBE emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_33through64
  10143. CMPQ R8, $0x80
  10144. JBE emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_65through128
  10145. CMPQ R8, $0x00000100
  10146. JBE emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_129through256
  10147. JMP emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_avxUnaligned
  10148. emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_1or2:
  10149. MOVB (SI), R9
  10150. MOVB -1(SI)(R8*1), R10
  10151. MOVB R9, (AX)
  10152. MOVB R10, -1(AX)(R8*1)
  10153. JMP memmove_end_copy_match_emit_encodeBlockAsm8BAvx
  10154. emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_4:
  10155. MOVL (SI), R9
  10156. MOVL R9, (AX)
  10157. JMP memmove_end_copy_match_emit_encodeBlockAsm8BAvx
  10158. emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_3:
  10159. MOVW (SI), R9
  10160. MOVB 2(SI), R10
  10161. MOVW R9, (AX)
  10162. MOVB R10, 2(AX)
  10163. JMP memmove_end_copy_match_emit_encodeBlockAsm8BAvx
  10164. emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_5through7:
  10165. MOVL (SI), R9
  10166. MOVL -4(SI)(R8*1), R10
  10167. MOVL R9, (AX)
  10168. MOVL R10, -4(AX)(R8*1)
  10169. JMP memmove_end_copy_match_emit_encodeBlockAsm8BAvx
  10170. emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_8:
  10171. MOVQ (SI), R9
  10172. MOVQ R9, (AX)
  10173. JMP memmove_end_copy_match_emit_encodeBlockAsm8BAvx
  10174. emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_9through16:
  10175. MOVQ (SI), R9
  10176. MOVQ -8(SI)(R8*1), R10
  10177. MOVQ R9, (AX)
  10178. MOVQ R10, -8(AX)(R8*1)
  10179. JMP memmove_end_copy_match_emit_encodeBlockAsm8BAvx
  10180. emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_17through32:
  10181. MOVOU (SI), X0
  10182. MOVOU -16(SI)(R8*1), X1
  10183. MOVOU X0, (AX)
  10184. MOVOU X1, -16(AX)(R8*1)
  10185. JMP memmove_end_copy_match_emit_encodeBlockAsm8BAvx
  10186. emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_33through64:
  10187. MOVOU (SI), X0
  10188. MOVOU 16(SI), X1
  10189. MOVOU -32(SI)(R8*1), X2
  10190. MOVOU -16(SI)(R8*1), X3
  10191. MOVOU X0, (AX)
  10192. MOVOU X1, 16(AX)
  10193. MOVOU X2, -32(AX)(R8*1)
  10194. MOVOU X3, -16(AX)(R8*1)
  10195. JMP memmove_end_copy_match_emit_encodeBlockAsm8BAvx
  10196. emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_65through128:
  10197. MOVOU (SI), X0
  10198. MOVOU 16(SI), X1
  10199. MOVOU 32(SI), X2
  10200. MOVOU 48(SI), X3
  10201. MOVOU -64(SI)(R8*1), X12
  10202. MOVOU -48(SI)(R8*1), X13
  10203. MOVOU -32(SI)(R8*1), X14
  10204. MOVOU -16(SI)(R8*1), X15
  10205. MOVOU X0, (AX)
  10206. MOVOU X1, 16(AX)
  10207. MOVOU X2, 32(AX)
  10208. MOVOU X3, 48(AX)
  10209. MOVOU X12, -64(AX)(R8*1)
  10210. MOVOU X13, -48(AX)(R8*1)
  10211. MOVOU X14, -32(AX)(R8*1)
  10212. MOVOU X15, -16(AX)(R8*1)
  10213. JMP memmove_end_copy_match_emit_encodeBlockAsm8BAvx
  10214. emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_129through256:
  10215. MOVOU (SI), X0
  10216. MOVOU 16(SI), X1
  10217. MOVOU 32(SI), X2
  10218. MOVOU 48(SI), X3
  10219. MOVOU 64(SI), X4
  10220. MOVOU 80(SI), X5
  10221. MOVOU 96(SI), X6
  10222. MOVOU 112(SI), X7
  10223. MOVOU -128(SI)(R8*1), X8
  10224. MOVOU -112(SI)(R8*1), X9
  10225. MOVOU -96(SI)(R8*1), X10
  10226. MOVOU -80(SI)(R8*1), X11
  10227. MOVOU -64(SI)(R8*1), X12
  10228. MOVOU -48(SI)(R8*1), X13
  10229. MOVOU -32(SI)(R8*1), X14
  10230. MOVOU -16(SI)(R8*1), X15
  10231. MOVOU X0, (AX)
  10232. MOVOU X1, 16(AX)
  10233. MOVOU X2, 32(AX)
  10234. MOVOU X3, 48(AX)
  10235. MOVOU X4, 64(AX)
  10236. MOVOU X5, 80(AX)
  10237. MOVOU X6, 96(AX)
  10238. MOVOU X7, 112(AX)
  10239. MOVOU X8, -128(AX)(R8*1)
  10240. MOVOU X9, -112(AX)(R8*1)
  10241. MOVOU X10, -96(AX)(R8*1)
  10242. MOVOU X11, -80(AX)(R8*1)
  10243. MOVOU X12, -64(AX)(R8*1)
  10244. MOVOU X13, -48(AX)(R8*1)
  10245. MOVOU X14, -32(AX)(R8*1)
  10246. MOVOU X15, -16(AX)(R8*1)
  10247. JMP memmove_end_copy_match_emit_encodeBlockAsm8BAvx
  10248. emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_256through2048:
  10249. LEAQ -256(R8), R8
  10250. MOVOU (SI), X0
  10251. MOVOU 16(SI), X1
  10252. MOVOU 32(SI), X2
  10253. MOVOU 48(SI), X3
  10254. MOVOU 64(SI), X4
  10255. MOVOU 80(SI), X5
  10256. MOVOU 96(SI), X6
  10257. MOVOU 112(SI), X7
  10258. MOVOU 128(SI), X8
  10259. MOVOU 144(SI), X9
  10260. MOVOU 160(SI), X10
  10261. MOVOU 176(SI), X11
  10262. MOVOU 192(SI), X12
  10263. MOVOU 208(SI), X13
  10264. MOVOU 224(SI), X14
  10265. MOVOU 240(SI), X15
  10266. MOVOU X0, (AX)
  10267. MOVOU X1, 16(AX)
  10268. MOVOU X2, 32(AX)
  10269. MOVOU X3, 48(AX)
  10270. MOVOU X4, 64(AX)
  10271. MOVOU X5, 80(AX)
  10272. MOVOU X6, 96(AX)
  10273. MOVOU X7, 112(AX)
  10274. MOVOU X8, 128(AX)
  10275. MOVOU X9, 144(AX)
  10276. MOVOU X10, 160(AX)
  10277. MOVOU X11, 176(AX)
  10278. MOVOU X12, 192(AX)
  10279. MOVOU X13, 208(AX)
  10280. MOVOU X14, 224(AX)
  10281. MOVOU X15, 240(AX)
  10282. CMPQ R8, $0x00000100
  10283. LEAQ 256(SI), SI
  10284. LEAQ 256(AX), AX
  10285. JGE emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_256through2048
  10286. JMP emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_tail
  10287. emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_avxUnaligned:
  10288. LEAQ (SI)(R8*1), R10
  10289. MOVQ AX, R12
  10290. MOVOU -128(R10), X5
  10291. MOVOU -112(R10), X6
  10292. MOVQ $0x00000080, R9
  10293. ANDQ $0xffffffe0, AX
  10294. ADDQ $0x20, AX
  10295. MOVOU -96(R10), X7
  10296. MOVOU -80(R10), X8
  10297. MOVQ AX, R11
  10298. SUBQ R12, R11
  10299. MOVOU -64(R10), X9
  10300. MOVOU -48(R10), X10
  10301. SUBQ R11, R8
  10302. MOVOU -32(R10), X11
  10303. MOVOU -16(R10), X12
  10304. VMOVDQU (SI), Y4
  10305. ADDQ R11, SI
  10306. SUBQ R9, R8
  10307. emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_gobble_128_loop:
  10308. VMOVDQU (SI), Y0
  10309. VMOVDQU 32(SI), Y1
  10310. VMOVDQU 64(SI), Y2
  10311. VMOVDQU 96(SI), Y3
  10312. ADDQ R9, SI
  10313. VMOVDQA Y0, (AX)
  10314. VMOVDQA Y1, 32(AX)
  10315. VMOVDQA Y2, 64(AX)
  10316. VMOVDQA Y3, 96(AX)
  10317. ADDQ R9, AX
  10318. SUBQ R9, R8
  10319. JA emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_gobble_128_loop
  10320. ADDQ R9, R8
  10321. ADDQ AX, R8
  10322. VMOVDQU Y4, (R12)
  10323. VZEROUPPER
  10324. MOVOU X5, -128(R8)
  10325. MOVOU X6, -112(R8)
  10326. MOVOU X7, -96(R8)
  10327. MOVOU X8, -80(R8)
  10328. MOVOU X9, -64(R8)
  10329. MOVOU X10, -48(R8)
  10330. MOVOU X11, -32(R8)
  10331. MOVOU X12, -16(R8)
  10332. memmove_end_copy_match_emit_encodeBlockAsm8BAvx:
  10333. MOVQ DI, AX
  10334. emit_literal_done_match_emit_encodeBlockAsm8BAvx:
  10335. match_nolit_loop_encodeBlockAsm8BAvx:
  10336. MOVL CX, SI
  10337. SUBL BP, SI
  10338. MOVL SI, 16(SP)
  10339. ADDL $0x04, CX
  10340. ADDL $0x04, BP
  10341. MOVQ src_len+32(FP), SI
  10342. SUBL CX, SI
  10343. LEAQ (DX)(CX*1), DI
  10344. LEAQ (DX)(BP*1), BP
  10345. XORL R9, R9
  10346. CMPL SI, $0x08
  10347. JL matchlen_single_match_nolit_encodeBlockAsm8BAvx
  10348. matchlen_loopback_match_nolit_encodeBlockAsm8BAvx:
  10349. MOVQ (DI)(R9*1), R8
  10350. XORQ (BP)(R9*1), R8
  10351. TESTQ R8, R8
  10352. JZ matchlen_loop_match_nolit_encodeBlockAsm8BAvx
  10353. BSFQ R8, R8
  10354. SARQ $0x03, R8
  10355. LEAL (R9)(R8*1), R9
  10356. JMP match_nolit_end_encodeBlockAsm8BAvx
  10357. matchlen_loop_match_nolit_encodeBlockAsm8BAvx:
  10358. LEAL -8(SI), SI
  10359. LEAL 8(R9), R9
  10360. CMPL SI, $0x08
  10361. JGE matchlen_loopback_match_nolit_encodeBlockAsm8BAvx
  10362. matchlen_single_match_nolit_encodeBlockAsm8BAvx:
  10363. TESTL SI, SI
  10364. JZ match_nolit_end_encodeBlockAsm8BAvx
  10365. matchlen_single_loopback_match_nolit_encodeBlockAsm8BAvx:
  10366. MOVB (DI)(R9*1), R8
  10367. CMPB (BP)(R9*1), R8
  10368. JNE match_nolit_end_encodeBlockAsm8BAvx
  10369. LEAL 1(R9), R9
  10370. DECL SI
  10371. JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm8BAvx
  10372. match_nolit_end_encodeBlockAsm8BAvx:
  10373. ADDL R9, CX
  10374. MOVL 16(SP), BP
  10375. ADDL $0x04, R9
  10376. CMPL BP, $0x00010000
  10377. JL two_byte_offset_match_nolit_encodeBlockAsm8BAvx
  10378. four_bytes_loop_back_match_nolit_encodeBlockAsm8BAvx:
  10379. CMPL R9, $0x40
  10380. JLE four_bytes_remain_match_nolit_encodeBlockAsm8BAvx
  10381. MOVB $0xff, (AX)
  10382. MOVL BP, 1(AX)
  10383. LEAL -64(R9), R9
  10384. ADDQ $0x05, AX
  10385. CMPL R9, $0x04
  10386. JL four_bytes_remain_match_nolit_encodeBlockAsm8BAvx
  10387. emit_repeat_again_match_nolit_encodeBlockAsm8BAvx_emit_copy:
  10388. MOVL R9, SI
  10389. LEAL -4(R9), R9
  10390. CMPL SI, $0x08
  10391. JLE repeat_two_match_nolit_encodeBlockAsm8BAvx_emit_copy
  10392. CMPL SI, $0x0c
  10393. JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm8BAvx_emit_copy
  10394. CMPL BP, $0x00000800
  10395. JLT repeat_two_offset_match_nolit_encodeBlockAsm8BAvx_emit_copy
  10396. cant_repeat_two_offset_match_nolit_encodeBlockAsm8BAvx_emit_copy:
  10397. CMPL R9, $0x00000104
  10398. JLT repeat_three_match_nolit_encodeBlockAsm8BAvx_emit_copy
  10399. CMPL R9, $0x00010100
  10400. JLT repeat_four_match_nolit_encodeBlockAsm8BAvx_emit_copy
  10401. CMPL R9, $0x0100ffff
  10402. JLT repeat_five_match_nolit_encodeBlockAsm8BAvx_emit_copy
  10403. LEAL -16842747(R9), R9
  10404. MOVW $0x001d, (AX)
  10405. MOVW $0xfffb, 2(AX)
  10406. MOVB $0xff, 4(AX)
  10407. ADDQ $0x05, AX
  10408. JMP emit_repeat_again_match_nolit_encodeBlockAsm8BAvx_emit_copy
  10409. repeat_five_match_nolit_encodeBlockAsm8BAvx_emit_copy:
  10410. LEAL -65536(R9), R9
  10411. MOVL R9, BP
  10412. MOVW $0x001d, (AX)
  10413. MOVW R9, 2(AX)
  10414. SARL $0x10, BP
  10415. MOVB BP, 4(AX)
  10416. ADDQ $0x05, AX
  10417. JMP match_nolit_emitcopy_end_encodeBlockAsm8BAvx
  10418. repeat_four_match_nolit_encodeBlockAsm8BAvx_emit_copy:
  10419. LEAL -256(R9), R9
  10420. MOVW $0x0019, (AX)
  10421. MOVW R9, 2(AX)
  10422. ADDQ $0x04, AX
  10423. JMP match_nolit_emitcopy_end_encodeBlockAsm8BAvx
  10424. repeat_three_match_nolit_encodeBlockAsm8BAvx_emit_copy:
  10425. LEAL -4(R9), R9
  10426. MOVW $0x0015, (AX)
  10427. MOVB R9, 2(AX)
  10428. ADDQ $0x03, AX
  10429. JMP match_nolit_emitcopy_end_encodeBlockAsm8BAvx
  10430. repeat_two_match_nolit_encodeBlockAsm8BAvx_emit_copy:
  10431. SHLL $0x02, R9
  10432. ORL $0x01, R9
  10433. MOVW R9, (AX)
  10434. ADDQ $0x02, AX
  10435. JMP match_nolit_emitcopy_end_encodeBlockAsm8BAvx
  10436. repeat_two_offset_match_nolit_encodeBlockAsm8BAvx_emit_copy:
  10437. XORQ SI, SI
  10438. LEAL 1(SI)(R9*4), R9
  10439. MOVB BP, 1(AX)
  10440. SARL $0x08, BP
  10441. SHLL $0x05, BP
  10442. ORL BP, R9
  10443. MOVB R9, (AX)
  10444. ADDQ $0x02, AX
  10445. JMP match_nolit_emitcopy_end_encodeBlockAsm8BAvx
  10446. JMP four_bytes_loop_back_match_nolit_encodeBlockAsm8BAvx
  10447. four_bytes_remain_match_nolit_encodeBlockAsm8BAvx:
  10448. TESTL R9, R9
  10449. JZ match_nolit_emitcopy_end_encodeBlockAsm8BAvx
  10450. MOVB $0x03, BL
  10451. LEAL -4(BX)(R9*4), R9
  10452. MOVB R9, (AX)
  10453. MOVL BP, 1(AX)
  10454. ADDQ $0x05, AX
  10455. JMP match_nolit_emitcopy_end_encodeBlockAsm8BAvx
  10456. two_byte_offset_match_nolit_encodeBlockAsm8BAvx:
  10457. CMPL R9, $0x40
  10458. JLE two_byte_offset_short_match_nolit_encodeBlockAsm8BAvx
  10459. MOVB $0xee, (AX)
  10460. MOVW BP, 1(AX)
  10461. LEAL -60(R9), R9
  10462. ADDQ $0x03, AX
  10463. emit_repeat_again_match_nolit_encodeBlockAsm8BAvx_emit_copy_short:
  10464. MOVL R9, SI
  10465. LEAL -4(R9), R9
  10466. CMPL SI, $0x08
  10467. JLE repeat_two_match_nolit_encodeBlockAsm8BAvx_emit_copy_short
  10468. CMPL SI, $0x0c
  10469. JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm8BAvx_emit_copy_short
  10470. CMPL BP, $0x00000800
  10471. JLT repeat_two_offset_match_nolit_encodeBlockAsm8BAvx_emit_copy_short
  10472. cant_repeat_two_offset_match_nolit_encodeBlockAsm8BAvx_emit_copy_short:
  10473. CMPL R9, $0x00000104
  10474. JLT repeat_three_match_nolit_encodeBlockAsm8BAvx_emit_copy_short
  10475. CMPL R9, $0x00010100
  10476. JLT repeat_four_match_nolit_encodeBlockAsm8BAvx_emit_copy_short
  10477. CMPL R9, $0x0100ffff
  10478. JLT repeat_five_match_nolit_encodeBlockAsm8BAvx_emit_copy_short
  10479. LEAL -16842747(R9), R9
  10480. MOVW $0x001d, (AX)
  10481. MOVW $0xfffb, 2(AX)
  10482. MOVB $0xff, 4(AX)
  10483. ADDQ $0x05, AX
  10484. JMP emit_repeat_again_match_nolit_encodeBlockAsm8BAvx_emit_copy_short
  10485. repeat_five_match_nolit_encodeBlockAsm8BAvx_emit_copy_short:
  10486. LEAL -65536(R9), R9
  10487. MOVL R9, BP
  10488. MOVW $0x001d, (AX)
  10489. MOVW R9, 2(AX)
  10490. SARL $0x10, BP
  10491. MOVB BP, 4(AX)
  10492. ADDQ $0x05, AX
  10493. JMP match_nolit_emitcopy_end_encodeBlockAsm8BAvx
  10494. repeat_four_match_nolit_encodeBlockAsm8BAvx_emit_copy_short:
  10495. LEAL -256(R9), R9
  10496. MOVW $0x0019, (AX)
  10497. MOVW R9, 2(AX)
  10498. ADDQ $0x04, AX
  10499. JMP match_nolit_emitcopy_end_encodeBlockAsm8BAvx
  10500. repeat_three_match_nolit_encodeBlockAsm8BAvx_emit_copy_short:
  10501. LEAL -4(R9), R9
  10502. MOVW $0x0015, (AX)
  10503. MOVB R9, 2(AX)
  10504. ADDQ $0x03, AX
  10505. JMP match_nolit_emitcopy_end_encodeBlockAsm8BAvx
  10506. repeat_two_match_nolit_encodeBlockAsm8BAvx_emit_copy_short:
  10507. SHLL $0x02, R9
  10508. ORL $0x01, R9
  10509. MOVW R9, (AX)
  10510. ADDQ $0x02, AX
  10511. JMP match_nolit_emitcopy_end_encodeBlockAsm8BAvx
  10512. repeat_two_offset_match_nolit_encodeBlockAsm8BAvx_emit_copy_short:
  10513. XORQ SI, SI
  10514. LEAL 1(SI)(R9*4), R9
  10515. MOVB BP, 1(AX)
  10516. SARL $0x08, BP
  10517. SHLL $0x05, BP
  10518. ORL BP, R9
  10519. MOVB R9, (AX)
  10520. ADDQ $0x02, AX
  10521. JMP match_nolit_emitcopy_end_encodeBlockAsm8BAvx
  10522. JMP two_byte_offset_match_nolit_encodeBlockAsm8BAvx
  10523. two_byte_offset_short_match_nolit_encodeBlockAsm8BAvx:
  10524. CMPL R9, $0x0c
  10525. JGE emit_copy_three_match_nolit_encodeBlockAsm8BAvx
  10526. CMPL BP, $0x00000800
  10527. JGE emit_copy_three_match_nolit_encodeBlockAsm8BAvx
  10528. MOVB $0x01, BL
  10529. LEAL -16(BX)(R9*4), R9
  10530. MOVB BP, 1(AX)
  10531. SHRL $0x08, BP
  10532. SHLL $0x05, BP
  10533. ORL BP, R9
  10534. MOVB R9, (AX)
  10535. ADDQ $0x02, AX
  10536. JMP match_nolit_emitcopy_end_encodeBlockAsm8BAvx
  10537. emit_copy_three_match_nolit_encodeBlockAsm8BAvx:
  10538. MOVB $0x02, BL
  10539. LEAL -4(BX)(R9*4), R9
  10540. MOVB R9, (AX)
  10541. MOVW BP, 1(AX)
  10542. ADDQ $0x03, AX
  10543. match_nolit_emitcopy_end_encodeBlockAsm8BAvx:
  10544. MOVL CX, 12(SP)
  10545. CMPL CX, 8(SP)
  10546. JGE emit_remainder_encodeBlockAsm8BAvx
  10547. CMPQ AX, (SP)
  10548. JL match_nolit_dst_ok_encodeBlockAsm8BAvx
  10549. MOVQ $0x00000000, ret+48(FP)
  10550. RET
  10551. match_nolit_dst_ok_encodeBlockAsm8BAvx:
  10552. MOVQ -2(DX)(CX*1), SI
  10553. MOVQ $0x9e3779b1, BP
  10554. MOVQ SI, DI
  10555. SHRQ $0x10, SI
  10556. MOVQ SI, R8
  10557. SHLQ $0x20, DI
  10558. IMULQ BP, DI
  10559. SHRQ $0x38, DI
  10560. SHLQ $0x20, R8
  10561. IMULQ BP, R8
  10562. SHRQ $0x38, R8
  10563. LEAL -2(CX), R9
  10564. MOVL 24(SP)(R8*4), BP
  10565. MOVL R9, 24(SP)(DI*4)
  10566. MOVL CX, 24(SP)(R8*4)
  10567. CMPL (DX)(BP*1), SI
  10568. JEQ match_nolit_loop_encodeBlockAsm8BAvx
  10569. INCL CX
  10570. JMP search_loop_encodeBlockAsm8BAvx
  10571. emit_remainder_encodeBlockAsm8BAvx:
  10572. MOVQ src_len+32(FP), CX
  10573. SUBL 12(SP), CX
  10574. LEAQ 4(AX)(CX*1), CX
  10575. CMPQ CX, (SP)
  10576. JL emit_remainder_ok_encodeBlockAsm8BAvx
  10577. MOVQ $0x00000000, ret+48(FP)
  10578. RET
  10579. emit_remainder_ok_encodeBlockAsm8BAvx:
  10580. MOVQ src_len+32(FP), CX
  10581. MOVL 12(SP), BX
  10582. CMPL BX, CX
  10583. JEQ emit_literal_done_emit_remainder_encodeBlockAsm8BAvx
  10584. MOVL CX, BP
  10585. MOVL CX, 12(SP)
  10586. LEAQ (DX)(BX*1), CX
  10587. SUBL BX, BP
  10588. MOVL BP, DX
  10589. SUBL $0x01, DX
  10590. JC emit_literal_done_emit_remainder_encodeBlockAsm8BAvx
  10591. CMPL DX, $0x3c
  10592. JLT one_byte_emit_remainder_encodeBlockAsm8BAvx
  10593. CMPL DX, $0x00000100
  10594. JLT two_bytes_emit_remainder_encodeBlockAsm8BAvx
  10595. CMPL DX, $0x00010000
  10596. JLT three_bytes_emit_remainder_encodeBlockAsm8BAvx
  10597. CMPL DX, $0x01000000
  10598. JLT four_bytes_emit_remainder_encodeBlockAsm8BAvx
  10599. MOVB $0xfc, (AX)
  10600. MOVL DX, 1(AX)
  10601. ADDQ $0x05, AX
  10602. JMP memmove_emit_remainder_encodeBlockAsm8BAvx
  10603. four_bytes_emit_remainder_encodeBlockAsm8BAvx:
  10604. MOVL DX, BX
  10605. SHRL $0x10, BX
  10606. MOVB $0xf8, (AX)
  10607. MOVW DX, 1(AX)
  10608. MOVB BL, 3(AX)
  10609. ADDQ $0x04, AX
  10610. JMP memmove_emit_remainder_encodeBlockAsm8BAvx
  10611. three_bytes_emit_remainder_encodeBlockAsm8BAvx:
  10612. MOVB $0xf4, (AX)
  10613. MOVW DX, 1(AX)
  10614. ADDQ $0x03, AX
  10615. JMP memmove_emit_remainder_encodeBlockAsm8BAvx
  10616. two_bytes_emit_remainder_encodeBlockAsm8BAvx:
  10617. MOVB $0xf0, (AX)
  10618. MOVB DL, 1(AX)
  10619. ADDQ $0x02, AX
  10620. JMP memmove_emit_remainder_encodeBlockAsm8BAvx
  10621. one_byte_emit_remainder_encodeBlockAsm8BAvx:
  10622. SHLB $0x02, DL
  10623. MOVB DL, (AX)
  10624. ADDQ $0x01, AX
  10625. memmove_emit_remainder_encodeBlockAsm8BAvx:
  10626. LEAQ (AX)(BP*1), DX
  10627. MOVL BP, BX
  10628. NOP
  10629. emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_tail:
  10630. TESTQ BX, BX
  10631. JEQ memmove_end_copy_emit_remainder_encodeBlockAsm8BAvx
  10632. CMPQ BX, $0x02
  10633. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_1or2
  10634. CMPQ BX, $0x04
  10635. JB emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_3
  10636. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_4
  10637. CMPQ BX, $0x08
  10638. JB emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_5through7
  10639. JE emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_8
  10640. CMPQ BX, $0x10
  10641. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_9through16
  10642. CMPQ BX, $0x20
  10643. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_17through32
  10644. CMPQ BX, $0x40
  10645. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_33through64
  10646. CMPQ BX, $0x80
  10647. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_65through128
  10648. CMPQ BX, $0x00000100
  10649. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_129through256
  10650. JMP emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_avxUnaligned
  10651. emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_1or2:
  10652. MOVB (CX), BP
  10653. MOVB -1(CX)(BX*1), SI
  10654. MOVB BP, (AX)
  10655. MOVB SI, -1(AX)(BX*1)
  10656. JMP memmove_end_copy_emit_remainder_encodeBlockAsm8BAvx
  10657. emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_4:
  10658. MOVL (CX), BP
  10659. MOVL BP, (AX)
  10660. JMP memmove_end_copy_emit_remainder_encodeBlockAsm8BAvx
  10661. emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_3:
  10662. MOVW (CX), BP
  10663. MOVB 2(CX), SI
  10664. MOVW BP, (AX)
  10665. MOVB SI, 2(AX)
  10666. JMP memmove_end_copy_emit_remainder_encodeBlockAsm8BAvx
  10667. emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_5through7:
  10668. MOVL (CX), BP
  10669. MOVL -4(CX)(BX*1), SI
  10670. MOVL BP, (AX)
  10671. MOVL SI, -4(AX)(BX*1)
  10672. JMP memmove_end_copy_emit_remainder_encodeBlockAsm8BAvx
  10673. emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_8:
  10674. MOVQ (CX), BP
  10675. MOVQ BP, (AX)
  10676. JMP memmove_end_copy_emit_remainder_encodeBlockAsm8BAvx
  10677. emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_9through16:
  10678. MOVQ (CX), BP
  10679. MOVQ -8(CX)(BX*1), SI
  10680. MOVQ BP, (AX)
  10681. MOVQ SI, -8(AX)(BX*1)
  10682. JMP memmove_end_copy_emit_remainder_encodeBlockAsm8BAvx
  10683. emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_17through32:
  10684. MOVOU (CX), X0
  10685. MOVOU -16(CX)(BX*1), X1
  10686. MOVOU X0, (AX)
  10687. MOVOU X1, -16(AX)(BX*1)
  10688. JMP memmove_end_copy_emit_remainder_encodeBlockAsm8BAvx
  10689. emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_33through64:
  10690. MOVOU (CX), X0
  10691. MOVOU 16(CX), X1
  10692. MOVOU -32(CX)(BX*1), X2
  10693. MOVOU -16(CX)(BX*1), X3
  10694. MOVOU X0, (AX)
  10695. MOVOU X1, 16(AX)
  10696. MOVOU X2, -32(AX)(BX*1)
  10697. MOVOU X3, -16(AX)(BX*1)
  10698. JMP memmove_end_copy_emit_remainder_encodeBlockAsm8BAvx
  10699. emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_65through128:
  10700. MOVOU (CX), X0
  10701. MOVOU 16(CX), X1
  10702. MOVOU 32(CX), X2
  10703. MOVOU 48(CX), X3
  10704. MOVOU -64(CX)(BX*1), X12
  10705. MOVOU -48(CX)(BX*1), X13
  10706. MOVOU -32(CX)(BX*1), X14
  10707. MOVOU -16(CX)(BX*1), X15
  10708. MOVOU X0, (AX)
  10709. MOVOU X1, 16(AX)
  10710. MOVOU X2, 32(AX)
  10711. MOVOU X3, 48(AX)
  10712. MOVOU X12, -64(AX)(BX*1)
  10713. MOVOU X13, -48(AX)(BX*1)
  10714. MOVOU X14, -32(AX)(BX*1)
  10715. MOVOU X15, -16(AX)(BX*1)
  10716. JMP memmove_end_copy_emit_remainder_encodeBlockAsm8BAvx
  10717. emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_129through256:
  10718. MOVOU (CX), X0
  10719. MOVOU 16(CX), X1
  10720. MOVOU 32(CX), X2
  10721. MOVOU 48(CX), X3
  10722. MOVOU 64(CX), X4
  10723. MOVOU 80(CX), X5
  10724. MOVOU 96(CX), X6
  10725. MOVOU 112(CX), X7
  10726. MOVOU -128(CX)(BX*1), X8
  10727. MOVOU -112(CX)(BX*1), X9
  10728. MOVOU -96(CX)(BX*1), X10
  10729. MOVOU -80(CX)(BX*1), X11
  10730. MOVOU -64(CX)(BX*1), X12
  10731. MOVOU -48(CX)(BX*1), X13
  10732. MOVOU -32(CX)(BX*1), X14
  10733. MOVOU -16(CX)(BX*1), X15
  10734. MOVOU X0, (AX)
  10735. MOVOU X1, 16(AX)
  10736. MOVOU X2, 32(AX)
  10737. MOVOU X3, 48(AX)
  10738. MOVOU X4, 64(AX)
  10739. MOVOU X5, 80(AX)
  10740. MOVOU X6, 96(AX)
  10741. MOVOU X7, 112(AX)
  10742. MOVOU X8, -128(AX)(BX*1)
  10743. MOVOU X9, -112(AX)(BX*1)
  10744. MOVOU X10, -96(AX)(BX*1)
  10745. MOVOU X11, -80(AX)(BX*1)
  10746. MOVOU X12, -64(AX)(BX*1)
  10747. MOVOU X13, -48(AX)(BX*1)
  10748. MOVOU X14, -32(AX)(BX*1)
  10749. MOVOU X15, -16(AX)(BX*1)
  10750. JMP memmove_end_copy_emit_remainder_encodeBlockAsm8BAvx
  10751. emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_256through2048:
  10752. LEAQ -256(BX), BX
  10753. MOVOU (CX), X0
  10754. MOVOU 16(CX), X1
  10755. MOVOU 32(CX), X2
  10756. MOVOU 48(CX), X3
  10757. MOVOU 64(CX), X4
  10758. MOVOU 80(CX), X5
  10759. MOVOU 96(CX), X6
  10760. MOVOU 112(CX), X7
  10761. MOVOU 128(CX), X8
  10762. MOVOU 144(CX), X9
  10763. MOVOU 160(CX), X10
  10764. MOVOU 176(CX), X11
  10765. MOVOU 192(CX), X12
  10766. MOVOU 208(CX), X13
  10767. MOVOU 224(CX), X14
  10768. MOVOU 240(CX), X15
  10769. MOVOU X0, (AX)
  10770. MOVOU X1, 16(AX)
  10771. MOVOU X2, 32(AX)
  10772. MOVOU X3, 48(AX)
  10773. MOVOU X4, 64(AX)
  10774. MOVOU X5, 80(AX)
  10775. MOVOU X6, 96(AX)
  10776. MOVOU X7, 112(AX)
  10777. MOVOU X8, 128(AX)
  10778. MOVOU X9, 144(AX)
  10779. MOVOU X10, 160(AX)
  10780. MOVOU X11, 176(AX)
  10781. MOVOU X12, 192(AX)
  10782. MOVOU X13, 208(AX)
  10783. MOVOU X14, 224(AX)
  10784. MOVOU X15, 240(AX)
  10785. CMPQ BX, $0x00000100
  10786. LEAQ 256(CX), CX
  10787. LEAQ 256(AX), AX
  10788. JGE emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_256through2048
  10789. JMP emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_tail
  10790. emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_avxUnaligned:
  10791. LEAQ (CX)(BX*1), SI
  10792. MOVQ AX, R8
  10793. MOVOU -128(SI), X5
  10794. MOVOU -112(SI), X6
  10795. MOVQ $0x00000080, BP
  10796. ANDQ $0xffffffe0, AX
  10797. ADDQ $0x20, AX
  10798. MOVOU -96(SI), X7
  10799. MOVOU -80(SI), X8
  10800. MOVQ AX, DI
  10801. SUBQ R8, DI
  10802. MOVOU -64(SI), X9
  10803. MOVOU -48(SI), X10
  10804. SUBQ DI, BX
  10805. MOVOU -32(SI), X11
  10806. MOVOU -16(SI), X12
  10807. VMOVDQU (CX), Y4
  10808. ADDQ DI, CX
  10809. SUBQ BP, BX
  10810. emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_gobble_128_loop:
  10811. VMOVDQU (CX), Y0
  10812. VMOVDQU 32(CX), Y1
  10813. VMOVDQU 64(CX), Y2
  10814. VMOVDQU 96(CX), Y3
  10815. ADDQ BP, CX
  10816. VMOVDQA Y0, (AX)
  10817. VMOVDQA Y1, 32(AX)
  10818. VMOVDQA Y2, 64(AX)
  10819. VMOVDQA Y3, 96(AX)
  10820. ADDQ BP, AX
  10821. SUBQ BP, BX
  10822. JA emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_gobble_128_loop
  10823. ADDQ BP, BX
  10824. ADDQ AX, BX
  10825. VMOVDQU Y4, (R8)
  10826. VZEROUPPER
  10827. MOVOU X5, -128(BX)
  10828. MOVOU X6, -112(BX)
  10829. MOVOU X7, -96(BX)
  10830. MOVOU X8, -80(BX)
  10831. MOVOU X9, -64(BX)
  10832. MOVOU X10, -48(BX)
  10833. MOVOU X11, -32(BX)
  10834. MOVOU X12, -16(BX)
  10835. memmove_end_copy_emit_remainder_encodeBlockAsm8BAvx:
  10836. MOVQ DX, AX
  10837. emit_literal_done_emit_remainder_encodeBlockAsm8BAvx:
  10838. MOVQ dst_base+0(FP), CX
  10839. SUBQ CX, AX
  10840. MOVQ AX, ret+48(FP)
  10841. RET
  10842. // func encodeSnappyBlockAsm(dst []byte, src []byte) int
  10843. // Requires: SSE2
  10844. TEXT ·encodeSnappyBlockAsm(SB), $65560-56
  10845. MOVQ dst_base+0(FP), AX
  10846. MOVQ $0x00000200, CX
  10847. LEAQ 24(SP), DX
  10848. PXOR X0, X0
  10849. zero_loop_encodeSnappyBlockAsm:
  10850. MOVOU X0, (DX)
  10851. MOVOU X0, 16(DX)
  10852. MOVOU X0, 32(DX)
  10853. MOVOU X0, 48(DX)
  10854. MOVOU X0, 64(DX)
  10855. MOVOU X0, 80(DX)
  10856. MOVOU X0, 96(DX)
  10857. MOVOU X0, 112(DX)
  10858. ADDQ $0x80, DX
  10859. DECQ CX
  10860. JNZ zero_loop_encodeSnappyBlockAsm
  10861. MOVL $0x00000000, 12(SP)
  10862. MOVQ src_len+32(FP), CX
  10863. LEAQ -5(CX), DX
  10864. LEAQ -8(CX), BP
  10865. MOVL BP, 8(SP)
  10866. SHRQ $0x05, CX
  10867. SUBL CX, DX
  10868. LEAQ (AX)(DX*1), DX
  10869. MOVQ DX, (SP)
  10870. MOVL $0x00000001, CX
  10871. MOVL CX, 16(SP)
  10872. MOVQ src_base+24(FP), DX
  10873. search_loop_encodeSnappyBlockAsm:
  10874. MOVQ (DX)(CX*1), SI
  10875. MOVL CX, BP
  10876. SUBL 12(SP), BP
  10877. SHRL $0x06, BP
  10878. LEAL 4(CX)(BP*1), BP
  10879. MOVL 8(SP), DI
  10880. CMPL BP, DI
  10881. JGT emit_remainder_encodeSnappyBlockAsm
  10882. MOVL BP, 20(SP)
  10883. MOVQ $0x0000cf1bbcdcbf9b, R8
  10884. MOVQ SI, R9
  10885. MOVQ SI, R10
  10886. SHRQ $0x08, R10
  10887. SHLQ $0x10, R9
  10888. IMULQ R8, R9
  10889. SHRQ $0x32, R9
  10890. SHLQ $0x10, R10
  10891. IMULQ R8, R10
  10892. SHRQ $0x32, R10
  10893. MOVL 24(SP)(R9*4), BP
  10894. MOVL 24(SP)(R10*4), DI
  10895. MOVL CX, 24(SP)(R9*4)
  10896. LEAL 1(CX), R9
  10897. MOVL R9, 24(SP)(R10*4)
  10898. MOVQ SI, R9
  10899. SHRQ $0x10, R9
  10900. SHLQ $0x10, R9
  10901. IMULQ R8, R9
  10902. SHRQ $0x32, R9
  10903. MOVL CX, R8
  10904. SUBL 16(SP), R8
  10905. MOVL 1(DX)(R8*1), R10
  10906. MOVQ SI, R8
  10907. SHRQ $0x08, R8
  10908. CMPL R8, R10
  10909. JNE no_repeat_found_encodeSnappyBlockAsm
  10910. LEAL 1(CX), SI
  10911. MOVL 12(SP), BP
  10912. MOVL SI, DI
  10913. SUBL 16(SP), DI
  10914. JZ repeat_extend_back_end_encodeSnappyBlockAsm
  10915. repeat_extend_back_loop_encodeSnappyBlockAsm:
  10916. CMPL SI, BP
  10917. JLE repeat_extend_back_end_encodeSnappyBlockAsm
  10918. MOVB -1(DX)(DI*1), BL
  10919. MOVB -1(DX)(SI*1), R8
  10920. CMPB BL, R8
  10921. JNE repeat_extend_back_end_encodeSnappyBlockAsm
  10922. LEAL -1(SI), SI
  10923. DECL DI
  10924. JNZ repeat_extend_back_loop_encodeSnappyBlockAsm
  10925. repeat_extend_back_end_encodeSnappyBlockAsm:
  10926. MOVL 12(SP), BP
  10927. CMPL BP, SI
  10928. JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm
  10929. MOVL SI, DI
  10930. MOVL SI, 12(SP)
  10931. LEAQ (DX)(BP*1), R8
  10932. SUBL BP, DI
  10933. MOVL DI, BP
  10934. SUBL $0x01, BP
  10935. JC emit_literal_done_repeat_emit_encodeSnappyBlockAsm
  10936. CMPL BP, $0x3c
  10937. JLT one_byte_repeat_emit_encodeSnappyBlockAsm
  10938. CMPL BP, $0x00000100
  10939. JLT two_bytes_repeat_emit_encodeSnappyBlockAsm
  10940. CMPL BP, $0x00010000
  10941. JLT three_bytes_repeat_emit_encodeSnappyBlockAsm
  10942. CMPL BP, $0x01000000
  10943. JLT four_bytes_repeat_emit_encodeSnappyBlockAsm
  10944. MOVB $0xfc, (AX)
  10945. MOVL BP, 1(AX)
  10946. ADDQ $0x05, AX
  10947. JMP memmove_repeat_emit_encodeSnappyBlockAsm
  10948. four_bytes_repeat_emit_encodeSnappyBlockAsm:
  10949. MOVL BP, R9
  10950. SHRL $0x10, R9
  10951. MOVB $0xf8, (AX)
  10952. MOVW BP, 1(AX)
  10953. MOVB R9, 3(AX)
  10954. ADDQ $0x04, AX
  10955. JMP memmove_repeat_emit_encodeSnappyBlockAsm
  10956. three_bytes_repeat_emit_encodeSnappyBlockAsm:
  10957. MOVB $0xf4, (AX)
  10958. MOVW BP, 1(AX)
  10959. ADDQ $0x03, AX
  10960. JMP memmove_repeat_emit_encodeSnappyBlockAsm
  10961. two_bytes_repeat_emit_encodeSnappyBlockAsm:
  10962. MOVB $0xf0, (AX)
  10963. MOVB BP, 1(AX)
  10964. ADDQ $0x02, AX
  10965. JMP memmove_repeat_emit_encodeSnappyBlockAsm
  10966. one_byte_repeat_emit_encodeSnappyBlockAsm:
  10967. SHLB $0x02, BP
  10968. MOVB BP, (AX)
  10969. ADDQ $0x01, AX
  10970. memmove_repeat_emit_encodeSnappyBlockAsm:
  10971. LEAQ (AX)(DI*1), BP
  10972. NOP
  10973. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_tail:
  10974. TESTQ DI, DI
  10975. JEQ memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
  10976. CMPQ DI, $0x02
  10977. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_1or2
  10978. CMPQ DI, $0x04
  10979. JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_3
  10980. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_4
  10981. CMPQ DI, $0x08
  10982. JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_5through7
  10983. JE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8
  10984. CMPQ DI, $0x10
  10985. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_9through16
  10986. CMPQ DI, $0x20
  10987. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32
  10988. CMPQ DI, $0x40
  10989. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64
  10990. CMPQ DI, $0x80
  10991. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_65through128
  10992. CMPQ DI, $0x00000100
  10993. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_129through256
  10994. JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_256through2048
  10995. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_1or2:
  10996. MOVB (R8), R9
  10997. MOVB -1(R8)(DI*1), R8
  10998. MOVB R9, (AX)
  10999. MOVB R8, -1(AX)(DI*1)
  11000. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
  11001. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_4:
  11002. MOVL (R8), R9
  11003. MOVL R9, (AX)
  11004. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
  11005. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_3:
  11006. MOVW (R8), R9
  11007. MOVB 2(R8), R8
  11008. MOVW R9, (AX)
  11009. MOVB R8, 2(AX)
  11010. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
  11011. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_5through7:
  11012. MOVL (R8), R9
  11013. MOVL -4(R8)(DI*1), R8
  11014. MOVL R9, (AX)
  11015. MOVL R8, -4(AX)(DI*1)
  11016. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
  11017. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8:
  11018. MOVQ (R8), R9
  11019. MOVQ R9, (AX)
  11020. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
  11021. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_9through16:
  11022. MOVQ (R8), R9
  11023. MOVQ -8(R8)(DI*1), R8
  11024. MOVQ R9, (AX)
  11025. MOVQ R8, -8(AX)(DI*1)
  11026. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
  11027. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32:
  11028. MOVOU (R8), X0
  11029. MOVOU -16(R8)(DI*1), X1
  11030. MOVOU X0, (AX)
  11031. MOVOU X1, -16(AX)(DI*1)
  11032. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
  11033. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64:
  11034. MOVOU (R8), X0
  11035. MOVOU 16(R8), X1
  11036. MOVOU -32(R8)(DI*1), X2
  11037. MOVOU -16(R8)(DI*1), X3
  11038. MOVOU X0, (AX)
  11039. MOVOU X1, 16(AX)
  11040. MOVOU X2, -32(AX)(DI*1)
  11041. MOVOU X3, -16(AX)(DI*1)
  11042. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
  11043. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_65through128:
  11044. MOVOU (R8), X0
  11045. MOVOU 16(R8), X1
  11046. MOVOU 32(R8), X2
  11047. MOVOU 48(R8), X3
  11048. MOVOU -64(R8)(DI*1), X12
  11049. MOVOU -48(R8)(DI*1), X13
  11050. MOVOU -32(R8)(DI*1), X14
  11051. MOVOU -16(R8)(DI*1), X15
  11052. MOVOU X0, (AX)
  11053. MOVOU X1, 16(AX)
  11054. MOVOU X2, 32(AX)
  11055. MOVOU X3, 48(AX)
  11056. MOVOU X12, -64(AX)(DI*1)
  11057. MOVOU X13, -48(AX)(DI*1)
  11058. MOVOU X14, -32(AX)(DI*1)
  11059. MOVOU X15, -16(AX)(DI*1)
  11060. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
  11061. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_129through256:
  11062. MOVOU (R8), X0
  11063. MOVOU 16(R8), X1
  11064. MOVOU 32(R8), X2
  11065. MOVOU 48(R8), X3
  11066. MOVOU 64(R8), X4
  11067. MOVOU 80(R8), X5
  11068. MOVOU 96(R8), X6
  11069. MOVOU 112(R8), X7
  11070. MOVOU -128(R8)(DI*1), X8
  11071. MOVOU -112(R8)(DI*1), X9
  11072. MOVOU -96(R8)(DI*1), X10
  11073. MOVOU -80(R8)(DI*1), X11
  11074. MOVOU -64(R8)(DI*1), X12
  11075. MOVOU -48(R8)(DI*1), X13
  11076. MOVOU -32(R8)(DI*1), X14
  11077. MOVOU -16(R8)(DI*1), X15
  11078. MOVOU X0, (AX)
  11079. MOVOU X1, 16(AX)
  11080. MOVOU X2, 32(AX)
  11081. MOVOU X3, 48(AX)
  11082. MOVOU X4, 64(AX)
  11083. MOVOU X5, 80(AX)
  11084. MOVOU X6, 96(AX)
  11085. MOVOU X7, 112(AX)
  11086. MOVOU X8, -128(AX)(DI*1)
  11087. MOVOU X9, -112(AX)(DI*1)
  11088. MOVOU X10, -96(AX)(DI*1)
  11089. MOVOU X11, -80(AX)(DI*1)
  11090. MOVOU X12, -64(AX)(DI*1)
  11091. MOVOU X13, -48(AX)(DI*1)
  11092. MOVOU X14, -32(AX)(DI*1)
  11093. MOVOU X15, -16(AX)(DI*1)
  11094. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
  11095. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_256through2048:
  11096. LEAQ -256(DI), DI
  11097. MOVOU (R8), X0
  11098. MOVOU 16(R8), X1
  11099. MOVOU 32(R8), X2
  11100. MOVOU 48(R8), X3
  11101. MOVOU 64(R8), X4
  11102. MOVOU 80(R8), X5
  11103. MOVOU 96(R8), X6
  11104. MOVOU 112(R8), X7
  11105. MOVOU 128(R8), X8
  11106. MOVOU 144(R8), X9
  11107. MOVOU 160(R8), X10
  11108. MOVOU 176(R8), X11
  11109. MOVOU 192(R8), X12
  11110. MOVOU 208(R8), X13
  11111. MOVOU 224(R8), X14
  11112. MOVOU 240(R8), X15
  11113. MOVOU X0, (AX)
  11114. MOVOU X1, 16(AX)
  11115. MOVOU X2, 32(AX)
  11116. MOVOU X3, 48(AX)
  11117. MOVOU X4, 64(AX)
  11118. MOVOU X5, 80(AX)
  11119. MOVOU X6, 96(AX)
  11120. MOVOU X7, 112(AX)
  11121. MOVOU X8, 128(AX)
  11122. MOVOU X9, 144(AX)
  11123. MOVOU X10, 160(AX)
  11124. MOVOU X11, 176(AX)
  11125. MOVOU X12, 192(AX)
  11126. MOVOU X13, 208(AX)
  11127. MOVOU X14, 224(AX)
  11128. MOVOU X15, 240(AX)
  11129. CMPQ DI, $0x00000100
  11130. LEAQ 256(R8), R8
  11131. LEAQ 256(AX), AX
  11132. JGE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_256through2048
  11133. JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_tail
  11134. memmove_end_copy_repeat_emit_encodeSnappyBlockAsm:
  11135. MOVQ BP, AX
  11136. emit_literal_done_repeat_emit_encodeSnappyBlockAsm:
  11137. ADDL $0x05, CX
  11138. MOVL CX, BP
  11139. SUBL 16(SP), BP
  11140. MOVQ src_len+32(FP), DI
  11141. SUBL CX, DI
  11142. LEAQ (DX)(CX*1), R8
  11143. LEAQ (DX)(BP*1), BP
  11144. XORL R10, R10
  11145. CMPL DI, $0x08
  11146. JL matchlen_single_repeat_extend
  11147. matchlen_loopback_repeat_extend:
  11148. MOVQ (R8)(R10*1), R9
  11149. XORQ (BP)(R10*1), R9
  11150. TESTQ R9, R9
  11151. JZ matchlen_loop_repeat_extend
  11152. BSFQ R9, R9
  11153. SARQ $0x03, R9
  11154. LEAL (R10)(R9*1), R10
  11155. JMP repeat_extend_forward_end_encodeSnappyBlockAsm
  11156. matchlen_loop_repeat_extend:
  11157. LEAL -8(DI), DI
  11158. LEAL 8(R10), R10
  11159. CMPL DI, $0x08
  11160. JGE matchlen_loopback_repeat_extend
  11161. matchlen_single_repeat_extend:
  11162. TESTL DI, DI
  11163. JZ repeat_extend_forward_end_encodeSnappyBlockAsm
  11164. matchlen_single_loopback_repeat_extend:
  11165. MOVB (R8)(R10*1), R9
  11166. CMPB (BP)(R10*1), R9
  11167. JNE repeat_extend_forward_end_encodeSnappyBlockAsm
  11168. LEAL 1(R10), R10
  11169. DECL DI
  11170. JNZ matchlen_single_loopback_repeat_extend
  11171. repeat_extend_forward_end_encodeSnappyBlockAsm:
  11172. ADDL R10, CX
  11173. MOVL CX, BP
  11174. SUBL SI, BP
  11175. MOVL 16(SP), SI
  11176. CMPL SI, $0x00010000
  11177. JL two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm
  11178. four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm:
  11179. CMPL BP, $0x40
  11180. JLE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm
  11181. MOVB $0xff, (AX)
  11182. MOVL SI, 1(AX)
  11183. LEAL -64(BP), BP
  11184. ADDQ $0x05, AX
  11185. CMPL BP, $0x04
  11186. JL four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm
  11187. JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm
  11188. four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm:
  11189. TESTL BP, BP
  11190. JZ repeat_end_emit_encodeSnappyBlockAsm
  11191. MOVB $0x03, BL
  11192. LEAL -4(BX)(BP*4), BP
  11193. MOVB BP, (AX)
  11194. MOVL SI, 1(AX)
  11195. ADDQ $0x05, AX
  11196. JMP repeat_end_emit_encodeSnappyBlockAsm
  11197. two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm:
  11198. CMPL BP, $0x40
  11199. JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm
  11200. MOVB $0xee, (AX)
  11201. MOVW SI, 1(AX)
  11202. LEAL -60(BP), BP
  11203. ADDQ $0x03, AX
  11204. JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm
  11205. two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm:
  11206. CMPL BP, $0x0c
  11207. JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm
  11208. CMPL SI, $0x00000800
  11209. JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm
  11210. MOVB $0x01, BL
  11211. LEAL -16(BX)(BP*4), BP
  11212. MOVB SI, 1(AX)
  11213. SHRL $0x08, SI
  11214. SHLL $0x05, SI
  11215. ORL SI, BP
  11216. MOVB BP, (AX)
  11217. ADDQ $0x02, AX
  11218. JMP repeat_end_emit_encodeSnappyBlockAsm
  11219. emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm:
  11220. MOVB $0x02, BL
  11221. LEAL -4(BX)(BP*4), BP
  11222. MOVB BP, (AX)
  11223. MOVW SI, 1(AX)
  11224. ADDQ $0x03, AX
  11225. repeat_end_emit_encodeSnappyBlockAsm:
  11226. MOVL CX, 12(SP)
  11227. CMPL CX, 8(SP)
  11228. JGE emit_remainder_encodeSnappyBlockAsm
  11229. JMP search_loop_encodeSnappyBlockAsm
  11230. no_repeat_found_encodeSnappyBlockAsm:
  11231. CMPL (DX)(BP*1), SI
  11232. JEQ candidate_match_encodeSnappyBlockAsm
  11233. SHRQ $0x08, SI
  11234. MOVL 24(SP)(R9*4), BP
  11235. LEAL 2(CX), R8
  11236. CMPL (DX)(DI*1), SI
  11237. JEQ candidate2_match_encodeSnappyBlockAsm
  11238. MOVL R8, 24(SP)(R9*4)
  11239. SHRQ $0x08, SI
  11240. CMPL (DX)(BP*1), SI
  11241. JEQ candidate3_match_encodeSnappyBlockAsm
  11242. MOVL 20(SP), CX
  11243. JMP search_loop_encodeSnappyBlockAsm
  11244. candidate3_match_encodeSnappyBlockAsm:
  11245. ADDL $0x02, CX
  11246. JMP candidate_match_encodeSnappyBlockAsm
  11247. candidate2_match_encodeSnappyBlockAsm:
  11248. MOVL R8, 24(SP)(R9*4)
  11249. INCL CX
  11250. MOVL DI, BP
  11251. candidate_match_encodeSnappyBlockAsm:
  11252. MOVL 12(SP), SI
  11253. TESTL BP, BP
  11254. JZ match_extend_back_end_encodeSnappyBlockAsm
  11255. match_extend_back_loop_encodeSnappyBlockAsm:
  11256. CMPL CX, SI
  11257. JLE match_extend_back_end_encodeSnappyBlockAsm
  11258. MOVB -1(DX)(BP*1), BL
  11259. MOVB -1(DX)(CX*1), DI
  11260. CMPB BL, DI
  11261. JNE match_extend_back_end_encodeSnappyBlockAsm
  11262. LEAL -1(CX), CX
  11263. DECL BP
  11264. JZ match_extend_back_end_encodeSnappyBlockAsm
  11265. JMP match_extend_back_loop_encodeSnappyBlockAsm
  11266. match_extend_back_end_encodeSnappyBlockAsm:
  11267. MOVL CX, SI
  11268. SUBL 12(SP), SI
  11269. LEAQ 4(AX)(SI*1), SI
  11270. CMPQ SI, (SP)
  11271. JL match_dst_size_check_encodeSnappyBlockAsm
  11272. MOVQ $0x00000000, ret+48(FP)
  11273. RET
  11274. match_dst_size_check_encodeSnappyBlockAsm:
  11275. MOVL CX, SI
  11276. MOVL 12(SP), DI
  11277. CMPL DI, SI
  11278. JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm
  11279. MOVL SI, R8
  11280. MOVL SI, 12(SP)
  11281. LEAQ (DX)(DI*1), SI
  11282. SUBL DI, R8
  11283. MOVL R8, DI
  11284. SUBL $0x01, DI
  11285. JC emit_literal_done_match_emit_encodeSnappyBlockAsm
  11286. CMPL DI, $0x3c
  11287. JLT one_byte_match_emit_encodeSnappyBlockAsm
  11288. CMPL DI, $0x00000100
  11289. JLT two_bytes_match_emit_encodeSnappyBlockAsm
  11290. CMPL DI, $0x00010000
  11291. JLT three_bytes_match_emit_encodeSnappyBlockAsm
  11292. CMPL DI, $0x01000000
  11293. JLT four_bytes_match_emit_encodeSnappyBlockAsm
  11294. MOVB $0xfc, (AX)
  11295. MOVL DI, 1(AX)
  11296. ADDQ $0x05, AX
  11297. JMP memmove_match_emit_encodeSnappyBlockAsm
  11298. four_bytes_match_emit_encodeSnappyBlockAsm:
  11299. MOVL DI, R9
  11300. SHRL $0x10, R9
  11301. MOVB $0xf8, (AX)
  11302. MOVW DI, 1(AX)
  11303. MOVB R9, 3(AX)
  11304. ADDQ $0x04, AX
  11305. JMP memmove_match_emit_encodeSnappyBlockAsm
  11306. three_bytes_match_emit_encodeSnappyBlockAsm:
  11307. MOVB $0xf4, (AX)
  11308. MOVW DI, 1(AX)
  11309. ADDQ $0x03, AX
  11310. JMP memmove_match_emit_encodeSnappyBlockAsm
  11311. two_bytes_match_emit_encodeSnappyBlockAsm:
  11312. MOVB $0xf0, (AX)
  11313. MOVB DI, 1(AX)
  11314. ADDQ $0x02, AX
  11315. JMP memmove_match_emit_encodeSnappyBlockAsm
  11316. one_byte_match_emit_encodeSnappyBlockAsm:
  11317. SHLB $0x02, DI
  11318. MOVB DI, (AX)
  11319. ADDQ $0x01, AX
  11320. memmove_match_emit_encodeSnappyBlockAsm:
  11321. LEAQ (AX)(R8*1), DI
  11322. NOP
  11323. emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_tail:
  11324. TESTQ R8, R8
  11325. JEQ memmove_end_copy_match_emit_encodeSnappyBlockAsm
  11326. CMPQ R8, $0x02
  11327. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_1or2
  11328. CMPQ R8, $0x04
  11329. JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_3
  11330. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_4
  11331. CMPQ R8, $0x08
  11332. JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_5through7
  11333. JE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8
  11334. CMPQ R8, $0x10
  11335. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_9through16
  11336. CMPQ R8, $0x20
  11337. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32
  11338. CMPQ R8, $0x40
  11339. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64
  11340. CMPQ R8, $0x80
  11341. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_65through128
  11342. CMPQ R8, $0x00000100
  11343. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_129through256
  11344. JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_256through2048
  11345. emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_1or2:
  11346. MOVB (SI), R9
  11347. MOVB -1(SI)(R8*1), SI
  11348. MOVB R9, (AX)
  11349. MOVB SI, -1(AX)(R8*1)
  11350. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm
  11351. emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_4:
  11352. MOVL (SI), R9
  11353. MOVL R9, (AX)
  11354. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm
  11355. emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_3:
  11356. MOVW (SI), R9
  11357. MOVB 2(SI), SI
  11358. MOVW R9, (AX)
  11359. MOVB SI, 2(AX)
  11360. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm
  11361. emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_5through7:
  11362. MOVL (SI), R9
  11363. MOVL -4(SI)(R8*1), SI
  11364. MOVL R9, (AX)
  11365. MOVL SI, -4(AX)(R8*1)
  11366. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm
  11367. emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8:
  11368. MOVQ (SI), R9
  11369. MOVQ R9, (AX)
  11370. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm
  11371. emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_9through16:
  11372. MOVQ (SI), R9
  11373. MOVQ -8(SI)(R8*1), SI
  11374. MOVQ R9, (AX)
  11375. MOVQ SI, -8(AX)(R8*1)
  11376. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm
  11377. emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32:
  11378. MOVOU (SI), X0
  11379. MOVOU -16(SI)(R8*1), X1
  11380. MOVOU X0, (AX)
  11381. MOVOU X1, -16(AX)(R8*1)
  11382. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm
  11383. emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64:
  11384. MOVOU (SI), X0
  11385. MOVOU 16(SI), X1
  11386. MOVOU -32(SI)(R8*1), X2
  11387. MOVOU -16(SI)(R8*1), X3
  11388. MOVOU X0, (AX)
  11389. MOVOU X1, 16(AX)
  11390. MOVOU X2, -32(AX)(R8*1)
  11391. MOVOU X3, -16(AX)(R8*1)
  11392. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm
  11393. emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_65through128:
  11394. MOVOU (SI), X0
  11395. MOVOU 16(SI), X1
  11396. MOVOU 32(SI), X2
  11397. MOVOU 48(SI), X3
  11398. MOVOU -64(SI)(R8*1), X12
  11399. MOVOU -48(SI)(R8*1), X13
  11400. MOVOU -32(SI)(R8*1), X14
  11401. MOVOU -16(SI)(R8*1), X15
  11402. MOVOU X0, (AX)
  11403. MOVOU X1, 16(AX)
  11404. MOVOU X2, 32(AX)
  11405. MOVOU X3, 48(AX)
  11406. MOVOU X12, -64(AX)(R8*1)
  11407. MOVOU X13, -48(AX)(R8*1)
  11408. MOVOU X14, -32(AX)(R8*1)
  11409. MOVOU X15, -16(AX)(R8*1)
  11410. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm
  11411. emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_129through256:
  11412. MOVOU (SI), X0
  11413. MOVOU 16(SI), X1
  11414. MOVOU 32(SI), X2
  11415. MOVOU 48(SI), X3
  11416. MOVOU 64(SI), X4
  11417. MOVOU 80(SI), X5
  11418. MOVOU 96(SI), X6
  11419. MOVOU 112(SI), X7
  11420. MOVOU -128(SI)(R8*1), X8
  11421. MOVOU -112(SI)(R8*1), X9
  11422. MOVOU -96(SI)(R8*1), X10
  11423. MOVOU -80(SI)(R8*1), X11
  11424. MOVOU -64(SI)(R8*1), X12
  11425. MOVOU -48(SI)(R8*1), X13
  11426. MOVOU -32(SI)(R8*1), X14
  11427. MOVOU -16(SI)(R8*1), X15
  11428. MOVOU X0, (AX)
  11429. MOVOU X1, 16(AX)
  11430. MOVOU X2, 32(AX)
  11431. MOVOU X3, 48(AX)
  11432. MOVOU X4, 64(AX)
  11433. MOVOU X5, 80(AX)
  11434. MOVOU X6, 96(AX)
  11435. MOVOU X7, 112(AX)
  11436. MOVOU X8, -128(AX)(R8*1)
  11437. MOVOU X9, -112(AX)(R8*1)
  11438. MOVOU X10, -96(AX)(R8*1)
  11439. MOVOU X11, -80(AX)(R8*1)
  11440. MOVOU X12, -64(AX)(R8*1)
  11441. MOVOU X13, -48(AX)(R8*1)
  11442. MOVOU X14, -32(AX)(R8*1)
  11443. MOVOU X15, -16(AX)(R8*1)
  11444. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm
  11445. emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_256through2048:
  11446. LEAQ -256(R8), R8
  11447. MOVOU (SI), X0
  11448. MOVOU 16(SI), X1
  11449. MOVOU 32(SI), X2
  11450. MOVOU 48(SI), X3
  11451. MOVOU 64(SI), X4
  11452. MOVOU 80(SI), X5
  11453. MOVOU 96(SI), X6
  11454. MOVOU 112(SI), X7
  11455. MOVOU 128(SI), X8
  11456. MOVOU 144(SI), X9
  11457. MOVOU 160(SI), X10
  11458. MOVOU 176(SI), X11
  11459. MOVOU 192(SI), X12
  11460. MOVOU 208(SI), X13
  11461. MOVOU 224(SI), X14
  11462. MOVOU 240(SI), X15
  11463. MOVOU X0, (AX)
  11464. MOVOU X1, 16(AX)
  11465. MOVOU X2, 32(AX)
  11466. MOVOU X3, 48(AX)
  11467. MOVOU X4, 64(AX)
  11468. MOVOU X5, 80(AX)
  11469. MOVOU X6, 96(AX)
  11470. MOVOU X7, 112(AX)
  11471. MOVOU X8, 128(AX)
  11472. MOVOU X9, 144(AX)
  11473. MOVOU X10, 160(AX)
  11474. MOVOU X11, 176(AX)
  11475. MOVOU X12, 192(AX)
  11476. MOVOU X13, 208(AX)
  11477. MOVOU X14, 224(AX)
  11478. MOVOU X15, 240(AX)
  11479. CMPQ R8, $0x00000100
  11480. LEAQ 256(SI), SI
  11481. LEAQ 256(AX), AX
  11482. JGE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_256through2048
  11483. JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_tail
  11484. memmove_end_copy_match_emit_encodeSnappyBlockAsm:
  11485. MOVQ DI, AX
  11486. emit_literal_done_match_emit_encodeSnappyBlockAsm:
  11487. match_nolit_loop_encodeSnappyBlockAsm:
  11488. MOVL CX, SI
  11489. SUBL BP, SI
  11490. MOVL SI, 16(SP)
  11491. ADDL $0x04, CX
  11492. ADDL $0x04, BP
  11493. MOVQ src_len+32(FP), SI
  11494. SUBL CX, SI
  11495. LEAQ (DX)(CX*1), DI
  11496. LEAQ (DX)(BP*1), BP
  11497. XORL R9, R9
  11498. CMPL SI, $0x08
  11499. JL matchlen_single_match_nolit_encodeSnappyBlockAsm
  11500. matchlen_loopback_match_nolit_encodeSnappyBlockAsm:
  11501. MOVQ (DI)(R9*1), R8
  11502. XORQ (BP)(R9*1), R8
  11503. TESTQ R8, R8
  11504. JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm
  11505. BSFQ R8, R8
  11506. SARQ $0x03, R8
  11507. LEAL (R9)(R8*1), R9
  11508. JMP match_nolit_end_encodeSnappyBlockAsm
  11509. matchlen_loop_match_nolit_encodeSnappyBlockAsm:
  11510. LEAL -8(SI), SI
  11511. LEAL 8(R9), R9
  11512. CMPL SI, $0x08
  11513. JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm
  11514. matchlen_single_match_nolit_encodeSnappyBlockAsm:
  11515. TESTL SI, SI
  11516. JZ match_nolit_end_encodeSnappyBlockAsm
  11517. matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm:
  11518. MOVB (DI)(R9*1), R8
  11519. CMPB (BP)(R9*1), R8
  11520. JNE match_nolit_end_encodeSnappyBlockAsm
  11521. LEAL 1(R9), R9
  11522. DECL SI
  11523. JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm
  11524. match_nolit_end_encodeSnappyBlockAsm:
  11525. ADDL R9, CX
  11526. MOVL 16(SP), BP
  11527. ADDL $0x04, R9
  11528. CMPL BP, $0x00010000
  11529. JL two_byte_offset_match_nolit_encodeSnappyBlockAsm
  11530. four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm:
  11531. CMPL R9, $0x40
  11532. JLE four_bytes_remain_match_nolit_encodeSnappyBlockAsm
  11533. MOVB $0xff, (AX)
  11534. MOVL BP, 1(AX)
  11535. LEAL -64(R9), R9
  11536. ADDQ $0x05, AX
  11537. CMPL R9, $0x04
  11538. JL four_bytes_remain_match_nolit_encodeSnappyBlockAsm
  11539. JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm
  11540. four_bytes_remain_match_nolit_encodeSnappyBlockAsm:
  11541. TESTL R9, R9
  11542. JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm
  11543. MOVB $0x03, BL
  11544. LEAL -4(BX)(R9*4), R9
  11545. MOVB R9, (AX)
  11546. MOVL BP, 1(AX)
  11547. ADDQ $0x05, AX
  11548. JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm
  11549. two_byte_offset_match_nolit_encodeSnappyBlockAsm:
  11550. CMPL R9, $0x40
  11551. JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm
  11552. MOVB $0xee, (AX)
  11553. MOVW BP, 1(AX)
  11554. LEAL -60(R9), R9
  11555. ADDQ $0x03, AX
  11556. JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm
  11557. two_byte_offset_short_match_nolit_encodeSnappyBlockAsm:
  11558. CMPL R9, $0x0c
  11559. JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm
  11560. CMPL BP, $0x00000800
  11561. JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm
  11562. MOVB $0x01, BL
  11563. LEAL -16(BX)(R9*4), R9
  11564. MOVB BP, 1(AX)
  11565. SHRL $0x08, BP
  11566. SHLL $0x05, BP
  11567. ORL BP, R9
  11568. MOVB R9, (AX)
  11569. ADDQ $0x02, AX
  11570. JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm
  11571. emit_copy_three_match_nolit_encodeSnappyBlockAsm:
  11572. MOVB $0x02, BL
  11573. LEAL -4(BX)(R9*4), R9
  11574. MOVB R9, (AX)
  11575. MOVW BP, 1(AX)
  11576. ADDQ $0x03, AX
  11577. match_nolit_emitcopy_end_encodeSnappyBlockAsm:
  11578. MOVL CX, 12(SP)
  11579. CMPL CX, 8(SP)
  11580. JGE emit_remainder_encodeSnappyBlockAsm
  11581. CMPQ AX, (SP)
  11582. JL match_nolit_dst_ok_encodeSnappyBlockAsm
  11583. MOVQ $0x00000000, ret+48(FP)
  11584. RET
  11585. match_nolit_dst_ok_encodeSnappyBlockAsm:
  11586. MOVQ -2(DX)(CX*1), SI
  11587. MOVQ $0x0000cf1bbcdcbf9b, BP
  11588. MOVQ SI, DI
  11589. SHRQ $0x10, SI
  11590. MOVQ SI, R8
  11591. SHLQ $0x10, DI
  11592. IMULQ BP, DI
  11593. SHRQ $0x32, DI
  11594. SHLQ $0x10, R8
  11595. IMULQ BP, R8
  11596. SHRQ $0x32, R8
  11597. LEAL -2(CX), R9
  11598. MOVL 24(SP)(R8*4), BP
  11599. MOVL R9, 24(SP)(DI*4)
  11600. MOVL CX, 24(SP)(R8*4)
  11601. CMPL (DX)(BP*1), SI
  11602. JEQ match_nolit_loop_encodeSnappyBlockAsm
  11603. INCL CX
  11604. JMP search_loop_encodeSnappyBlockAsm
  11605. emit_remainder_encodeSnappyBlockAsm:
  11606. MOVQ src_len+32(FP), CX
  11607. SUBL 12(SP), CX
  11608. LEAQ 4(AX)(CX*1), CX
  11609. CMPQ CX, (SP)
  11610. JL emit_remainder_ok_encodeSnappyBlockAsm
  11611. MOVQ $0x00000000, ret+48(FP)
  11612. RET
  11613. emit_remainder_ok_encodeSnappyBlockAsm:
  11614. MOVQ src_len+32(FP), CX
  11615. MOVL 12(SP), BX
  11616. CMPL BX, CX
  11617. JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm
  11618. MOVL CX, BP
  11619. MOVL CX, 12(SP)
  11620. LEAQ (DX)(BX*1), CX
  11621. SUBL BX, BP
  11622. MOVL BP, DX
  11623. SUBL $0x01, DX
  11624. JC emit_literal_done_emit_remainder_encodeSnappyBlockAsm
  11625. CMPL DX, $0x3c
  11626. JLT one_byte_emit_remainder_encodeSnappyBlockAsm
  11627. CMPL DX, $0x00000100
  11628. JLT two_bytes_emit_remainder_encodeSnappyBlockAsm
  11629. CMPL DX, $0x00010000
  11630. JLT three_bytes_emit_remainder_encodeSnappyBlockAsm
  11631. CMPL DX, $0x01000000
  11632. JLT four_bytes_emit_remainder_encodeSnappyBlockAsm
  11633. MOVB $0xfc, (AX)
  11634. MOVL DX, 1(AX)
  11635. ADDQ $0x05, AX
  11636. JMP memmove_emit_remainder_encodeSnappyBlockAsm
  11637. four_bytes_emit_remainder_encodeSnappyBlockAsm:
  11638. MOVL DX, BX
  11639. SHRL $0x10, BX
  11640. MOVB $0xf8, (AX)
  11641. MOVW DX, 1(AX)
  11642. MOVB BL, 3(AX)
  11643. ADDQ $0x04, AX
  11644. JMP memmove_emit_remainder_encodeSnappyBlockAsm
  11645. three_bytes_emit_remainder_encodeSnappyBlockAsm:
  11646. MOVB $0xf4, (AX)
  11647. MOVW DX, 1(AX)
  11648. ADDQ $0x03, AX
  11649. JMP memmove_emit_remainder_encodeSnappyBlockAsm
  11650. two_bytes_emit_remainder_encodeSnappyBlockAsm:
  11651. MOVB $0xf0, (AX)
  11652. MOVB DL, 1(AX)
  11653. ADDQ $0x02, AX
  11654. JMP memmove_emit_remainder_encodeSnappyBlockAsm
  11655. one_byte_emit_remainder_encodeSnappyBlockAsm:
  11656. SHLB $0x02, DL
  11657. MOVB DL, (AX)
  11658. ADDQ $0x01, AX
  11659. memmove_emit_remainder_encodeSnappyBlockAsm:
  11660. LEAQ (AX)(BP*1), DX
  11661. MOVL BP, BX
  11662. NOP
  11663. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_tail:
  11664. TESTQ BX, BX
  11665. JEQ memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
  11666. CMPQ BX, $0x02
  11667. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_1or2
  11668. CMPQ BX, $0x04
  11669. JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_3
  11670. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_4
  11671. CMPQ BX, $0x08
  11672. JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_5through7
  11673. JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8
  11674. CMPQ BX, $0x10
  11675. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_9through16
  11676. CMPQ BX, $0x20
  11677. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32
  11678. CMPQ BX, $0x40
  11679. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64
  11680. CMPQ BX, $0x80
  11681. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_65through128
  11682. CMPQ BX, $0x00000100
  11683. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_129through256
  11684. JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_256through2048
  11685. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_1or2:
  11686. MOVB (CX), BP
  11687. MOVB -1(CX)(BX*1), CL
  11688. MOVB BP, (AX)
  11689. MOVB CL, -1(AX)(BX*1)
  11690. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
  11691. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_4:
  11692. MOVL (CX), BP
  11693. MOVL BP, (AX)
  11694. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
  11695. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_3:
  11696. MOVW (CX), BP
  11697. MOVB 2(CX), CL
  11698. MOVW BP, (AX)
  11699. MOVB CL, 2(AX)
  11700. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
  11701. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_5through7:
  11702. MOVL (CX), BP
  11703. MOVL -4(CX)(BX*1), CX
  11704. MOVL BP, (AX)
  11705. MOVL CX, -4(AX)(BX*1)
  11706. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
  11707. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8:
  11708. MOVQ (CX), BP
  11709. MOVQ BP, (AX)
  11710. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
  11711. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_9through16:
  11712. MOVQ (CX), BP
  11713. MOVQ -8(CX)(BX*1), CX
  11714. MOVQ BP, (AX)
  11715. MOVQ CX, -8(AX)(BX*1)
  11716. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
  11717. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32:
  11718. MOVOU (CX), X0
  11719. MOVOU -16(CX)(BX*1), X1
  11720. MOVOU X0, (AX)
  11721. MOVOU X1, -16(AX)(BX*1)
  11722. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
  11723. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64:
  11724. MOVOU (CX), X0
  11725. MOVOU 16(CX), X1
  11726. MOVOU -32(CX)(BX*1), X2
  11727. MOVOU -16(CX)(BX*1), X3
  11728. MOVOU X0, (AX)
  11729. MOVOU X1, 16(AX)
  11730. MOVOU X2, -32(AX)(BX*1)
  11731. MOVOU X3, -16(AX)(BX*1)
  11732. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
  11733. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_65through128:
  11734. MOVOU (CX), X0
  11735. MOVOU 16(CX), X1
  11736. MOVOU 32(CX), X2
  11737. MOVOU 48(CX), X3
  11738. MOVOU -64(CX)(BX*1), X12
  11739. MOVOU -48(CX)(BX*1), X13
  11740. MOVOU -32(CX)(BX*1), X14
  11741. MOVOU -16(CX)(BX*1), X15
  11742. MOVOU X0, (AX)
  11743. MOVOU X1, 16(AX)
  11744. MOVOU X2, 32(AX)
  11745. MOVOU X3, 48(AX)
  11746. MOVOU X12, -64(AX)(BX*1)
  11747. MOVOU X13, -48(AX)(BX*1)
  11748. MOVOU X14, -32(AX)(BX*1)
  11749. MOVOU X15, -16(AX)(BX*1)
  11750. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
  11751. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_129through256:
  11752. MOVOU (CX), X0
  11753. MOVOU 16(CX), X1
  11754. MOVOU 32(CX), X2
  11755. MOVOU 48(CX), X3
  11756. MOVOU 64(CX), X4
  11757. MOVOU 80(CX), X5
  11758. MOVOU 96(CX), X6
  11759. MOVOU 112(CX), X7
  11760. MOVOU -128(CX)(BX*1), X8
  11761. MOVOU -112(CX)(BX*1), X9
  11762. MOVOU -96(CX)(BX*1), X10
  11763. MOVOU -80(CX)(BX*1), X11
  11764. MOVOU -64(CX)(BX*1), X12
  11765. MOVOU -48(CX)(BX*1), X13
  11766. MOVOU -32(CX)(BX*1), X14
  11767. MOVOU -16(CX)(BX*1), X15
  11768. MOVOU X0, (AX)
  11769. MOVOU X1, 16(AX)
  11770. MOVOU X2, 32(AX)
  11771. MOVOU X3, 48(AX)
  11772. MOVOU X4, 64(AX)
  11773. MOVOU X5, 80(AX)
  11774. MOVOU X6, 96(AX)
  11775. MOVOU X7, 112(AX)
  11776. MOVOU X8, -128(AX)(BX*1)
  11777. MOVOU X9, -112(AX)(BX*1)
  11778. MOVOU X10, -96(AX)(BX*1)
  11779. MOVOU X11, -80(AX)(BX*1)
  11780. MOVOU X12, -64(AX)(BX*1)
  11781. MOVOU X13, -48(AX)(BX*1)
  11782. MOVOU X14, -32(AX)(BX*1)
  11783. MOVOU X15, -16(AX)(BX*1)
  11784. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
  11785. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_256through2048:
  11786. LEAQ -256(BX), BX
  11787. MOVOU (CX), X0
  11788. MOVOU 16(CX), X1
  11789. MOVOU 32(CX), X2
  11790. MOVOU 48(CX), X3
  11791. MOVOU 64(CX), X4
  11792. MOVOU 80(CX), X5
  11793. MOVOU 96(CX), X6
  11794. MOVOU 112(CX), X7
  11795. MOVOU 128(CX), X8
  11796. MOVOU 144(CX), X9
  11797. MOVOU 160(CX), X10
  11798. MOVOU 176(CX), X11
  11799. MOVOU 192(CX), X12
  11800. MOVOU 208(CX), X13
  11801. MOVOU 224(CX), X14
  11802. MOVOU 240(CX), X15
  11803. MOVOU X0, (AX)
  11804. MOVOU X1, 16(AX)
  11805. MOVOU X2, 32(AX)
  11806. MOVOU X3, 48(AX)
  11807. MOVOU X4, 64(AX)
  11808. MOVOU X5, 80(AX)
  11809. MOVOU X6, 96(AX)
  11810. MOVOU X7, 112(AX)
  11811. MOVOU X8, 128(AX)
  11812. MOVOU X9, 144(AX)
  11813. MOVOU X10, 160(AX)
  11814. MOVOU X11, 176(AX)
  11815. MOVOU X12, 192(AX)
  11816. MOVOU X13, 208(AX)
  11817. MOVOU X14, 224(AX)
  11818. MOVOU X15, 240(AX)
  11819. CMPQ BX, $0x00000100
  11820. LEAQ 256(CX), CX
  11821. LEAQ 256(AX), AX
  11822. JGE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_256through2048
  11823. JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_tail
  11824. memmove_end_copy_emit_remainder_encodeSnappyBlockAsm:
  11825. MOVQ DX, AX
  11826. emit_literal_done_emit_remainder_encodeSnappyBlockAsm:
  11827. MOVQ dst_base+0(FP), CX
  11828. SUBQ CX, AX
  11829. MOVQ AX, ret+48(FP)
  11830. RET
  11831. // func encodeSnappyBlockAsm12B(dst []byte, src []byte) int
  11832. // Requires: SSE2
  11833. TEXT ·encodeSnappyBlockAsm12B(SB), $16408-56
  11834. MOVQ dst_base+0(FP), AX
  11835. MOVQ $0x00000080, CX
  11836. LEAQ 24(SP), DX
  11837. PXOR X0, X0
  11838. zero_loop_encodeSnappyBlockAsm12B:
  11839. MOVOU X0, (DX)
  11840. MOVOU X0, 16(DX)
  11841. MOVOU X0, 32(DX)
  11842. MOVOU X0, 48(DX)
  11843. MOVOU X0, 64(DX)
  11844. MOVOU X0, 80(DX)
  11845. MOVOU X0, 96(DX)
  11846. MOVOU X0, 112(DX)
  11847. ADDQ $0x80, DX
  11848. DECQ CX
  11849. JNZ zero_loop_encodeSnappyBlockAsm12B
  11850. MOVL $0x00000000, 12(SP)
  11851. MOVQ src_len+32(FP), CX
  11852. LEAQ -5(CX), DX
  11853. LEAQ -8(CX), BP
  11854. MOVL BP, 8(SP)
  11855. SHRQ $0x05, CX
  11856. SUBL CX, DX
  11857. LEAQ (AX)(DX*1), DX
  11858. MOVQ DX, (SP)
  11859. MOVL $0x00000001, CX
  11860. MOVL CX, 16(SP)
  11861. MOVQ src_base+24(FP), DX
  11862. search_loop_encodeSnappyBlockAsm12B:
  11863. MOVQ (DX)(CX*1), SI
  11864. MOVL CX, BP
  11865. SUBL 12(SP), BP
  11866. SHRL $0x05, BP
  11867. LEAL 4(CX)(BP*1), BP
  11868. MOVL 8(SP), DI
  11869. CMPL BP, DI
  11870. JGT emit_remainder_encodeSnappyBlockAsm12B
  11871. MOVL BP, 20(SP)
  11872. MOVQ $0x000000cf1bbcdcbb, R8
  11873. MOVQ SI, R9
  11874. MOVQ SI, R10
  11875. SHRQ $0x08, R10
  11876. SHLQ $0x18, R9
  11877. IMULQ R8, R9
  11878. SHRQ $0x34, R9
  11879. SHLQ $0x18, R10
  11880. IMULQ R8, R10
  11881. SHRQ $0x34, R10
  11882. MOVL 24(SP)(R9*4), BP
  11883. MOVL 24(SP)(R10*4), DI
  11884. MOVL CX, 24(SP)(R9*4)
  11885. LEAL 1(CX), R9
  11886. MOVL R9, 24(SP)(R10*4)
  11887. MOVQ SI, R9
  11888. SHRQ $0x10, R9
  11889. SHLQ $0x18, R9
  11890. IMULQ R8, R9
  11891. SHRQ $0x34, R9
  11892. MOVL CX, R8
  11893. SUBL 16(SP), R8
  11894. MOVL 1(DX)(R8*1), R10
  11895. MOVQ SI, R8
  11896. SHRQ $0x08, R8
  11897. CMPL R8, R10
  11898. JNE no_repeat_found_encodeSnappyBlockAsm12B
  11899. LEAL 1(CX), SI
  11900. MOVL 12(SP), BP
  11901. MOVL SI, DI
  11902. SUBL 16(SP), DI
  11903. JZ repeat_extend_back_end_encodeSnappyBlockAsm12B
  11904. repeat_extend_back_loop_encodeSnappyBlockAsm12B:
  11905. CMPL SI, BP
  11906. JLE repeat_extend_back_end_encodeSnappyBlockAsm12B
  11907. MOVB -1(DX)(DI*1), BL
  11908. MOVB -1(DX)(SI*1), R8
  11909. CMPB BL, R8
  11910. JNE repeat_extend_back_end_encodeSnappyBlockAsm12B
  11911. LEAL -1(SI), SI
  11912. DECL DI
  11913. JNZ repeat_extend_back_loop_encodeSnappyBlockAsm12B
  11914. repeat_extend_back_end_encodeSnappyBlockAsm12B:
  11915. MOVL 12(SP), BP
  11916. CMPL BP, SI
  11917. JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B
  11918. MOVL SI, DI
  11919. MOVL SI, 12(SP)
  11920. LEAQ (DX)(BP*1), R8
  11921. SUBL BP, DI
  11922. MOVL DI, BP
  11923. SUBL $0x01, BP
  11924. JC emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B
  11925. CMPL BP, $0x3c
  11926. JLT one_byte_repeat_emit_encodeSnappyBlockAsm12B
  11927. CMPL BP, $0x00000100
  11928. JLT two_bytes_repeat_emit_encodeSnappyBlockAsm12B
  11929. CMPL BP, $0x00010000
  11930. JLT three_bytes_repeat_emit_encodeSnappyBlockAsm12B
  11931. CMPL BP, $0x01000000
  11932. JLT four_bytes_repeat_emit_encodeSnappyBlockAsm12B
  11933. MOVB $0xfc, (AX)
  11934. MOVL BP, 1(AX)
  11935. ADDQ $0x05, AX
  11936. JMP memmove_repeat_emit_encodeSnappyBlockAsm12B
  11937. four_bytes_repeat_emit_encodeSnappyBlockAsm12B:
  11938. MOVL BP, R9
  11939. SHRL $0x10, R9
  11940. MOVB $0xf8, (AX)
  11941. MOVW BP, 1(AX)
  11942. MOVB R9, 3(AX)
  11943. ADDQ $0x04, AX
  11944. JMP memmove_repeat_emit_encodeSnappyBlockAsm12B
  11945. three_bytes_repeat_emit_encodeSnappyBlockAsm12B:
  11946. MOVB $0xf4, (AX)
  11947. MOVW BP, 1(AX)
  11948. ADDQ $0x03, AX
  11949. JMP memmove_repeat_emit_encodeSnappyBlockAsm12B
  11950. two_bytes_repeat_emit_encodeSnappyBlockAsm12B:
  11951. MOVB $0xf0, (AX)
  11952. MOVB BP, 1(AX)
  11953. ADDQ $0x02, AX
  11954. JMP memmove_repeat_emit_encodeSnappyBlockAsm12B
  11955. one_byte_repeat_emit_encodeSnappyBlockAsm12B:
  11956. SHLB $0x02, BP
  11957. MOVB BP, (AX)
  11958. ADDQ $0x01, AX
  11959. memmove_repeat_emit_encodeSnappyBlockAsm12B:
  11960. LEAQ (AX)(DI*1), BP
  11961. NOP
  11962. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_tail:
  11963. TESTQ DI, DI
  11964. JEQ memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
  11965. CMPQ DI, $0x02
  11966. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_1or2
  11967. CMPQ DI, $0x04
  11968. JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_3
  11969. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_4
  11970. CMPQ DI, $0x08
  11971. JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_5through7
  11972. JE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8
  11973. CMPQ DI, $0x10
  11974. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_9through16
  11975. CMPQ DI, $0x20
  11976. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32
  11977. CMPQ DI, $0x40
  11978. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64
  11979. CMPQ DI, $0x80
  11980. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_65through128
  11981. CMPQ DI, $0x00000100
  11982. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_129through256
  11983. JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_256through2048
  11984. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_1or2:
  11985. MOVB (R8), R9
  11986. MOVB -1(R8)(DI*1), R8
  11987. MOVB R9, (AX)
  11988. MOVB R8, -1(AX)(DI*1)
  11989. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
  11990. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_4:
  11991. MOVL (R8), R9
  11992. MOVL R9, (AX)
  11993. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
  11994. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_3:
  11995. MOVW (R8), R9
  11996. MOVB 2(R8), R8
  11997. MOVW R9, (AX)
  11998. MOVB R8, 2(AX)
  11999. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
  12000. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_5through7:
  12001. MOVL (R8), R9
  12002. MOVL -4(R8)(DI*1), R8
  12003. MOVL R9, (AX)
  12004. MOVL R8, -4(AX)(DI*1)
  12005. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
  12006. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8:
  12007. MOVQ (R8), R9
  12008. MOVQ R9, (AX)
  12009. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
  12010. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_9through16:
  12011. MOVQ (R8), R9
  12012. MOVQ -8(R8)(DI*1), R8
  12013. MOVQ R9, (AX)
  12014. MOVQ R8, -8(AX)(DI*1)
  12015. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
  12016. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32:
  12017. MOVOU (R8), X0
  12018. MOVOU -16(R8)(DI*1), X1
  12019. MOVOU X0, (AX)
  12020. MOVOU X1, -16(AX)(DI*1)
  12021. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
  12022. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64:
  12023. MOVOU (R8), X0
  12024. MOVOU 16(R8), X1
  12025. MOVOU -32(R8)(DI*1), X2
  12026. MOVOU -16(R8)(DI*1), X3
  12027. MOVOU X0, (AX)
  12028. MOVOU X1, 16(AX)
  12029. MOVOU X2, -32(AX)(DI*1)
  12030. MOVOU X3, -16(AX)(DI*1)
  12031. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
  12032. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_65through128:
  12033. MOVOU (R8), X0
  12034. MOVOU 16(R8), X1
  12035. MOVOU 32(R8), X2
  12036. MOVOU 48(R8), X3
  12037. MOVOU -64(R8)(DI*1), X12
  12038. MOVOU -48(R8)(DI*1), X13
  12039. MOVOU -32(R8)(DI*1), X14
  12040. MOVOU -16(R8)(DI*1), X15
  12041. MOVOU X0, (AX)
  12042. MOVOU X1, 16(AX)
  12043. MOVOU X2, 32(AX)
  12044. MOVOU X3, 48(AX)
  12045. MOVOU X12, -64(AX)(DI*1)
  12046. MOVOU X13, -48(AX)(DI*1)
  12047. MOVOU X14, -32(AX)(DI*1)
  12048. MOVOU X15, -16(AX)(DI*1)
  12049. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
  12050. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_129through256:
  12051. MOVOU (R8), X0
  12052. MOVOU 16(R8), X1
  12053. MOVOU 32(R8), X2
  12054. MOVOU 48(R8), X3
  12055. MOVOU 64(R8), X4
  12056. MOVOU 80(R8), X5
  12057. MOVOU 96(R8), X6
  12058. MOVOU 112(R8), X7
  12059. MOVOU -128(R8)(DI*1), X8
  12060. MOVOU -112(R8)(DI*1), X9
  12061. MOVOU -96(R8)(DI*1), X10
  12062. MOVOU -80(R8)(DI*1), X11
  12063. MOVOU -64(R8)(DI*1), X12
  12064. MOVOU -48(R8)(DI*1), X13
  12065. MOVOU -32(R8)(DI*1), X14
  12066. MOVOU -16(R8)(DI*1), X15
  12067. MOVOU X0, (AX)
  12068. MOVOU X1, 16(AX)
  12069. MOVOU X2, 32(AX)
  12070. MOVOU X3, 48(AX)
  12071. MOVOU X4, 64(AX)
  12072. MOVOU X5, 80(AX)
  12073. MOVOU X6, 96(AX)
  12074. MOVOU X7, 112(AX)
  12075. MOVOU X8, -128(AX)(DI*1)
  12076. MOVOU X9, -112(AX)(DI*1)
  12077. MOVOU X10, -96(AX)(DI*1)
  12078. MOVOU X11, -80(AX)(DI*1)
  12079. MOVOU X12, -64(AX)(DI*1)
  12080. MOVOU X13, -48(AX)(DI*1)
  12081. MOVOU X14, -32(AX)(DI*1)
  12082. MOVOU X15, -16(AX)(DI*1)
  12083. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
  12084. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_256through2048:
  12085. LEAQ -256(DI), DI
  12086. MOVOU (R8), X0
  12087. MOVOU 16(R8), X1
  12088. MOVOU 32(R8), X2
  12089. MOVOU 48(R8), X3
  12090. MOVOU 64(R8), X4
  12091. MOVOU 80(R8), X5
  12092. MOVOU 96(R8), X6
  12093. MOVOU 112(R8), X7
  12094. MOVOU 128(R8), X8
  12095. MOVOU 144(R8), X9
  12096. MOVOU 160(R8), X10
  12097. MOVOU 176(R8), X11
  12098. MOVOU 192(R8), X12
  12099. MOVOU 208(R8), X13
  12100. MOVOU 224(R8), X14
  12101. MOVOU 240(R8), X15
  12102. MOVOU X0, (AX)
  12103. MOVOU X1, 16(AX)
  12104. MOVOU X2, 32(AX)
  12105. MOVOU X3, 48(AX)
  12106. MOVOU X4, 64(AX)
  12107. MOVOU X5, 80(AX)
  12108. MOVOU X6, 96(AX)
  12109. MOVOU X7, 112(AX)
  12110. MOVOU X8, 128(AX)
  12111. MOVOU X9, 144(AX)
  12112. MOVOU X10, 160(AX)
  12113. MOVOU X11, 176(AX)
  12114. MOVOU X12, 192(AX)
  12115. MOVOU X13, 208(AX)
  12116. MOVOU X14, 224(AX)
  12117. MOVOU X15, 240(AX)
  12118. CMPQ DI, $0x00000100
  12119. LEAQ 256(R8), R8
  12120. LEAQ 256(AX), AX
  12121. JGE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_256through2048
  12122. JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_tail
  12123. memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B:
  12124. MOVQ BP, AX
  12125. emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B:
  12126. ADDL $0x05, CX
  12127. MOVL CX, BP
  12128. SUBL 16(SP), BP
  12129. MOVQ src_len+32(FP), DI
  12130. SUBL CX, DI
  12131. LEAQ (DX)(CX*1), R8
  12132. LEAQ (DX)(BP*1), BP
  12133. XORL R10, R10
  12134. CMPL DI, $0x08
  12135. JL matchlen_single_repeat_extend
  12136. matchlen_loopback_repeat_extend:
  12137. MOVQ (R8)(R10*1), R9
  12138. XORQ (BP)(R10*1), R9
  12139. TESTQ R9, R9
  12140. JZ matchlen_loop_repeat_extend
  12141. BSFQ R9, R9
  12142. SARQ $0x03, R9
  12143. LEAL (R10)(R9*1), R10
  12144. JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B
  12145. matchlen_loop_repeat_extend:
  12146. LEAL -8(DI), DI
  12147. LEAL 8(R10), R10
  12148. CMPL DI, $0x08
  12149. JGE matchlen_loopback_repeat_extend
  12150. matchlen_single_repeat_extend:
  12151. TESTL DI, DI
  12152. JZ repeat_extend_forward_end_encodeSnappyBlockAsm12B
  12153. matchlen_single_loopback_repeat_extend:
  12154. MOVB (R8)(R10*1), R9
  12155. CMPB (BP)(R10*1), R9
  12156. JNE repeat_extend_forward_end_encodeSnappyBlockAsm12B
  12157. LEAL 1(R10), R10
  12158. DECL DI
  12159. JNZ matchlen_single_loopback_repeat_extend
  12160. repeat_extend_forward_end_encodeSnappyBlockAsm12B:
  12161. ADDL R10, CX
  12162. MOVL CX, BP
  12163. SUBL SI, BP
  12164. MOVL 16(SP), SI
  12165. CMPL SI, $0x00010000
  12166. JL two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B
  12167. four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm12B:
  12168. CMPL BP, $0x40
  12169. JLE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm12B
  12170. MOVB $0xff, (AX)
  12171. MOVL SI, 1(AX)
  12172. LEAL -64(BP), BP
  12173. ADDQ $0x05, AX
  12174. CMPL BP, $0x04
  12175. JL four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm12B
  12176. JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm12B
  12177. four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm12B:
  12178. TESTL BP, BP
  12179. JZ repeat_end_emit_encodeSnappyBlockAsm12B
  12180. MOVB $0x03, BL
  12181. LEAL -4(BX)(BP*4), BP
  12182. MOVB BP, (AX)
  12183. MOVL SI, 1(AX)
  12184. ADDQ $0x05, AX
  12185. JMP repeat_end_emit_encodeSnappyBlockAsm12B
  12186. two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B:
  12187. CMPL BP, $0x40
  12188. JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B
  12189. MOVB $0xee, (AX)
  12190. MOVW SI, 1(AX)
  12191. LEAL -60(BP), BP
  12192. ADDQ $0x03, AX
  12193. JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B
  12194. two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B:
  12195. CMPL BP, $0x0c
  12196. JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B
  12197. CMPL SI, $0x00000800
  12198. JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B
  12199. MOVB $0x01, BL
  12200. LEAL -16(BX)(BP*4), BP
  12201. MOVB SI, 1(AX)
  12202. SHRL $0x08, SI
  12203. SHLL $0x05, SI
  12204. ORL SI, BP
  12205. MOVB BP, (AX)
  12206. ADDQ $0x02, AX
  12207. JMP repeat_end_emit_encodeSnappyBlockAsm12B
  12208. emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B:
  12209. MOVB $0x02, BL
  12210. LEAL -4(BX)(BP*4), BP
  12211. MOVB BP, (AX)
  12212. MOVW SI, 1(AX)
  12213. ADDQ $0x03, AX
  12214. repeat_end_emit_encodeSnappyBlockAsm12B:
  12215. MOVL CX, 12(SP)
  12216. CMPL CX, 8(SP)
  12217. JGE emit_remainder_encodeSnappyBlockAsm12B
  12218. JMP search_loop_encodeSnappyBlockAsm12B
  12219. no_repeat_found_encodeSnappyBlockAsm12B:
  12220. CMPL (DX)(BP*1), SI
  12221. JEQ candidate_match_encodeSnappyBlockAsm12B
  12222. SHRQ $0x08, SI
  12223. MOVL 24(SP)(R9*4), BP
  12224. LEAL 2(CX), R8
  12225. CMPL (DX)(DI*1), SI
  12226. JEQ candidate2_match_encodeSnappyBlockAsm12B
  12227. MOVL R8, 24(SP)(R9*4)
  12228. SHRQ $0x08, SI
  12229. CMPL (DX)(BP*1), SI
  12230. JEQ candidate3_match_encodeSnappyBlockAsm12B
  12231. MOVL 20(SP), CX
  12232. JMP search_loop_encodeSnappyBlockAsm12B
  12233. candidate3_match_encodeSnappyBlockAsm12B:
  12234. ADDL $0x02, CX
  12235. JMP candidate_match_encodeSnappyBlockAsm12B
  12236. candidate2_match_encodeSnappyBlockAsm12B:
  12237. MOVL R8, 24(SP)(R9*4)
  12238. INCL CX
  12239. MOVL DI, BP
  12240. candidate_match_encodeSnappyBlockAsm12B:
  12241. MOVL 12(SP), SI
  12242. TESTL BP, BP
  12243. JZ match_extend_back_end_encodeSnappyBlockAsm12B
  12244. match_extend_back_loop_encodeSnappyBlockAsm12B:
  12245. CMPL CX, SI
  12246. JLE match_extend_back_end_encodeSnappyBlockAsm12B
  12247. MOVB -1(DX)(BP*1), BL
  12248. MOVB -1(DX)(CX*1), DI
  12249. CMPB BL, DI
  12250. JNE match_extend_back_end_encodeSnappyBlockAsm12B
  12251. LEAL -1(CX), CX
  12252. DECL BP
  12253. JZ match_extend_back_end_encodeSnappyBlockAsm12B
  12254. JMP match_extend_back_loop_encodeSnappyBlockAsm12B
  12255. match_extend_back_end_encodeSnappyBlockAsm12B:
  12256. MOVL CX, SI
  12257. SUBL 12(SP), SI
  12258. LEAQ 4(AX)(SI*1), SI
  12259. CMPQ SI, (SP)
  12260. JL match_dst_size_check_encodeSnappyBlockAsm12B
  12261. MOVQ $0x00000000, ret+48(FP)
  12262. RET
  12263. match_dst_size_check_encodeSnappyBlockAsm12B:
  12264. MOVL CX, SI
  12265. MOVL 12(SP), DI
  12266. CMPL DI, SI
  12267. JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm12B
  12268. MOVL SI, R8
  12269. MOVL SI, 12(SP)
  12270. LEAQ (DX)(DI*1), SI
  12271. SUBL DI, R8
  12272. MOVL R8, DI
  12273. SUBL $0x01, DI
  12274. JC emit_literal_done_match_emit_encodeSnappyBlockAsm12B
  12275. CMPL DI, $0x3c
  12276. JLT one_byte_match_emit_encodeSnappyBlockAsm12B
  12277. CMPL DI, $0x00000100
  12278. JLT two_bytes_match_emit_encodeSnappyBlockAsm12B
  12279. CMPL DI, $0x00010000
  12280. JLT three_bytes_match_emit_encodeSnappyBlockAsm12B
  12281. CMPL DI, $0x01000000
  12282. JLT four_bytes_match_emit_encodeSnappyBlockAsm12B
  12283. MOVB $0xfc, (AX)
  12284. MOVL DI, 1(AX)
  12285. ADDQ $0x05, AX
  12286. JMP memmove_match_emit_encodeSnappyBlockAsm12B
  12287. four_bytes_match_emit_encodeSnappyBlockAsm12B:
  12288. MOVL DI, R9
  12289. SHRL $0x10, R9
  12290. MOVB $0xf8, (AX)
  12291. MOVW DI, 1(AX)
  12292. MOVB R9, 3(AX)
  12293. ADDQ $0x04, AX
  12294. JMP memmove_match_emit_encodeSnappyBlockAsm12B
  12295. three_bytes_match_emit_encodeSnappyBlockAsm12B:
  12296. MOVB $0xf4, (AX)
  12297. MOVW DI, 1(AX)
  12298. ADDQ $0x03, AX
  12299. JMP memmove_match_emit_encodeSnappyBlockAsm12B
  12300. two_bytes_match_emit_encodeSnappyBlockAsm12B:
  12301. MOVB $0xf0, (AX)
  12302. MOVB DI, 1(AX)
  12303. ADDQ $0x02, AX
  12304. JMP memmove_match_emit_encodeSnappyBlockAsm12B
  12305. one_byte_match_emit_encodeSnappyBlockAsm12B:
  12306. SHLB $0x02, DI
  12307. MOVB DI, (AX)
  12308. ADDQ $0x01, AX
  12309. memmove_match_emit_encodeSnappyBlockAsm12B:
  12310. LEAQ (AX)(R8*1), DI
  12311. NOP
  12312. emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_tail:
  12313. TESTQ R8, R8
  12314. JEQ memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
  12315. CMPQ R8, $0x02
  12316. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_1or2
  12317. CMPQ R8, $0x04
  12318. JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_3
  12319. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_4
  12320. CMPQ R8, $0x08
  12321. JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_5through7
  12322. JE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8
  12323. CMPQ R8, $0x10
  12324. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_9through16
  12325. CMPQ R8, $0x20
  12326. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32
  12327. CMPQ R8, $0x40
  12328. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64
  12329. CMPQ R8, $0x80
  12330. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_65through128
  12331. CMPQ R8, $0x00000100
  12332. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_129through256
  12333. JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_256through2048
  12334. emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_1or2:
  12335. MOVB (SI), R9
  12336. MOVB -1(SI)(R8*1), SI
  12337. MOVB R9, (AX)
  12338. MOVB SI, -1(AX)(R8*1)
  12339. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
  12340. emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_4:
  12341. MOVL (SI), R9
  12342. MOVL R9, (AX)
  12343. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
  12344. emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_3:
  12345. MOVW (SI), R9
  12346. MOVB 2(SI), SI
  12347. MOVW R9, (AX)
  12348. MOVB SI, 2(AX)
  12349. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
  12350. emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_5through7:
  12351. MOVL (SI), R9
  12352. MOVL -4(SI)(R8*1), SI
  12353. MOVL R9, (AX)
  12354. MOVL SI, -4(AX)(R8*1)
  12355. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
  12356. emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8:
  12357. MOVQ (SI), R9
  12358. MOVQ R9, (AX)
  12359. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
  12360. emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_9through16:
  12361. MOVQ (SI), R9
  12362. MOVQ -8(SI)(R8*1), SI
  12363. MOVQ R9, (AX)
  12364. MOVQ SI, -8(AX)(R8*1)
  12365. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
  12366. emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32:
  12367. MOVOU (SI), X0
  12368. MOVOU -16(SI)(R8*1), X1
  12369. MOVOU X0, (AX)
  12370. MOVOU X1, -16(AX)(R8*1)
  12371. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
  12372. emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64:
  12373. MOVOU (SI), X0
  12374. MOVOU 16(SI), X1
  12375. MOVOU -32(SI)(R8*1), X2
  12376. MOVOU -16(SI)(R8*1), X3
  12377. MOVOU X0, (AX)
  12378. MOVOU X1, 16(AX)
  12379. MOVOU X2, -32(AX)(R8*1)
  12380. MOVOU X3, -16(AX)(R8*1)
  12381. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
  12382. emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_65through128:
  12383. MOVOU (SI), X0
  12384. MOVOU 16(SI), X1
  12385. MOVOU 32(SI), X2
  12386. MOVOU 48(SI), X3
  12387. MOVOU -64(SI)(R8*1), X12
  12388. MOVOU -48(SI)(R8*1), X13
  12389. MOVOU -32(SI)(R8*1), X14
  12390. MOVOU -16(SI)(R8*1), X15
  12391. MOVOU X0, (AX)
  12392. MOVOU X1, 16(AX)
  12393. MOVOU X2, 32(AX)
  12394. MOVOU X3, 48(AX)
  12395. MOVOU X12, -64(AX)(R8*1)
  12396. MOVOU X13, -48(AX)(R8*1)
  12397. MOVOU X14, -32(AX)(R8*1)
  12398. MOVOU X15, -16(AX)(R8*1)
  12399. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
  12400. emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_129through256:
  12401. MOVOU (SI), X0
  12402. MOVOU 16(SI), X1
  12403. MOVOU 32(SI), X2
  12404. MOVOU 48(SI), X3
  12405. MOVOU 64(SI), X4
  12406. MOVOU 80(SI), X5
  12407. MOVOU 96(SI), X6
  12408. MOVOU 112(SI), X7
  12409. MOVOU -128(SI)(R8*1), X8
  12410. MOVOU -112(SI)(R8*1), X9
  12411. MOVOU -96(SI)(R8*1), X10
  12412. MOVOU -80(SI)(R8*1), X11
  12413. MOVOU -64(SI)(R8*1), X12
  12414. MOVOU -48(SI)(R8*1), X13
  12415. MOVOU -32(SI)(R8*1), X14
  12416. MOVOU -16(SI)(R8*1), X15
  12417. MOVOU X0, (AX)
  12418. MOVOU X1, 16(AX)
  12419. MOVOU X2, 32(AX)
  12420. MOVOU X3, 48(AX)
  12421. MOVOU X4, 64(AX)
  12422. MOVOU X5, 80(AX)
  12423. MOVOU X6, 96(AX)
  12424. MOVOU X7, 112(AX)
  12425. MOVOU X8, -128(AX)(R8*1)
  12426. MOVOU X9, -112(AX)(R8*1)
  12427. MOVOU X10, -96(AX)(R8*1)
  12428. MOVOU X11, -80(AX)(R8*1)
  12429. MOVOU X12, -64(AX)(R8*1)
  12430. MOVOU X13, -48(AX)(R8*1)
  12431. MOVOU X14, -32(AX)(R8*1)
  12432. MOVOU X15, -16(AX)(R8*1)
  12433. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
  12434. emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_256through2048:
  12435. LEAQ -256(R8), R8
  12436. MOVOU (SI), X0
  12437. MOVOU 16(SI), X1
  12438. MOVOU 32(SI), X2
  12439. MOVOU 48(SI), X3
  12440. MOVOU 64(SI), X4
  12441. MOVOU 80(SI), X5
  12442. MOVOU 96(SI), X6
  12443. MOVOU 112(SI), X7
  12444. MOVOU 128(SI), X8
  12445. MOVOU 144(SI), X9
  12446. MOVOU 160(SI), X10
  12447. MOVOU 176(SI), X11
  12448. MOVOU 192(SI), X12
  12449. MOVOU 208(SI), X13
  12450. MOVOU 224(SI), X14
  12451. MOVOU 240(SI), X15
  12452. MOVOU X0, (AX)
  12453. MOVOU X1, 16(AX)
  12454. MOVOU X2, 32(AX)
  12455. MOVOU X3, 48(AX)
  12456. MOVOU X4, 64(AX)
  12457. MOVOU X5, 80(AX)
  12458. MOVOU X6, 96(AX)
  12459. MOVOU X7, 112(AX)
  12460. MOVOU X8, 128(AX)
  12461. MOVOU X9, 144(AX)
  12462. MOVOU X10, 160(AX)
  12463. MOVOU X11, 176(AX)
  12464. MOVOU X12, 192(AX)
  12465. MOVOU X13, 208(AX)
  12466. MOVOU X14, 224(AX)
  12467. MOVOU X15, 240(AX)
  12468. CMPQ R8, $0x00000100
  12469. LEAQ 256(SI), SI
  12470. LEAQ 256(AX), AX
  12471. JGE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_256through2048
  12472. JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_tail
  12473. memmove_end_copy_match_emit_encodeSnappyBlockAsm12B:
  12474. MOVQ DI, AX
  12475. emit_literal_done_match_emit_encodeSnappyBlockAsm12B:
  12476. match_nolit_loop_encodeSnappyBlockAsm12B:
  12477. MOVL CX, SI
  12478. SUBL BP, SI
  12479. MOVL SI, 16(SP)
  12480. ADDL $0x04, CX
  12481. ADDL $0x04, BP
  12482. MOVQ src_len+32(FP), SI
  12483. SUBL CX, SI
  12484. LEAQ (DX)(CX*1), DI
  12485. LEAQ (DX)(BP*1), BP
  12486. XORL R9, R9
  12487. CMPL SI, $0x08
  12488. JL matchlen_single_match_nolit_encodeSnappyBlockAsm12B
  12489. matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B:
  12490. MOVQ (DI)(R9*1), R8
  12491. XORQ (BP)(R9*1), R8
  12492. TESTQ R8, R8
  12493. JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm12B
  12494. BSFQ R8, R8
  12495. SARQ $0x03, R8
  12496. LEAL (R9)(R8*1), R9
  12497. JMP match_nolit_end_encodeSnappyBlockAsm12B
  12498. matchlen_loop_match_nolit_encodeSnappyBlockAsm12B:
  12499. LEAL -8(SI), SI
  12500. LEAL 8(R9), R9
  12501. CMPL SI, $0x08
  12502. JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B
  12503. matchlen_single_match_nolit_encodeSnappyBlockAsm12B:
  12504. TESTL SI, SI
  12505. JZ match_nolit_end_encodeSnappyBlockAsm12B
  12506. matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm12B:
  12507. MOVB (DI)(R9*1), R8
  12508. CMPB (BP)(R9*1), R8
  12509. JNE match_nolit_end_encodeSnappyBlockAsm12B
  12510. LEAL 1(R9), R9
  12511. DECL SI
  12512. JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm12B
  12513. match_nolit_end_encodeSnappyBlockAsm12B:
  12514. ADDL R9, CX
  12515. MOVL 16(SP), BP
  12516. ADDL $0x04, R9
  12517. CMPL BP, $0x00010000
  12518. JL two_byte_offset_match_nolit_encodeSnappyBlockAsm12B
  12519. four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm12B:
  12520. CMPL R9, $0x40
  12521. JLE four_bytes_remain_match_nolit_encodeSnappyBlockAsm12B
  12522. MOVB $0xff, (AX)
  12523. MOVL BP, 1(AX)
  12524. LEAL -64(R9), R9
  12525. ADDQ $0x05, AX
  12526. CMPL R9, $0x04
  12527. JL four_bytes_remain_match_nolit_encodeSnappyBlockAsm12B
  12528. JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm12B
  12529. four_bytes_remain_match_nolit_encodeSnappyBlockAsm12B:
  12530. TESTL R9, R9
  12531. JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm12B
  12532. MOVB $0x03, BL
  12533. LEAL -4(BX)(R9*4), R9
  12534. MOVB R9, (AX)
  12535. MOVL BP, 1(AX)
  12536. ADDQ $0x05, AX
  12537. JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm12B
  12538. two_byte_offset_match_nolit_encodeSnappyBlockAsm12B:
  12539. CMPL R9, $0x40
  12540. JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B
  12541. MOVB $0xee, (AX)
  12542. MOVW BP, 1(AX)
  12543. LEAL -60(R9), R9
  12544. ADDQ $0x03, AX
  12545. JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm12B
  12546. two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B:
  12547. CMPL R9, $0x0c
  12548. JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B
  12549. CMPL BP, $0x00000800
  12550. JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B
  12551. MOVB $0x01, BL
  12552. LEAL -16(BX)(R9*4), R9
  12553. MOVB BP, 1(AX)
  12554. SHRL $0x08, BP
  12555. SHLL $0x05, BP
  12556. ORL BP, R9
  12557. MOVB R9, (AX)
  12558. ADDQ $0x02, AX
  12559. JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm12B
  12560. emit_copy_three_match_nolit_encodeSnappyBlockAsm12B:
  12561. MOVB $0x02, BL
  12562. LEAL -4(BX)(R9*4), R9
  12563. MOVB R9, (AX)
  12564. MOVW BP, 1(AX)
  12565. ADDQ $0x03, AX
  12566. match_nolit_emitcopy_end_encodeSnappyBlockAsm12B:
  12567. MOVL CX, 12(SP)
  12568. CMPL CX, 8(SP)
  12569. JGE emit_remainder_encodeSnappyBlockAsm12B
  12570. CMPQ AX, (SP)
  12571. JL match_nolit_dst_ok_encodeSnappyBlockAsm12B
  12572. MOVQ $0x00000000, ret+48(FP)
  12573. RET
  12574. match_nolit_dst_ok_encodeSnappyBlockAsm12B:
  12575. MOVQ -2(DX)(CX*1), SI
  12576. MOVQ $0x000000cf1bbcdcbb, BP
  12577. MOVQ SI, DI
  12578. SHRQ $0x10, SI
  12579. MOVQ SI, R8
  12580. SHLQ $0x18, DI
  12581. IMULQ BP, DI
  12582. SHRQ $0x34, DI
  12583. SHLQ $0x18, R8
  12584. IMULQ BP, R8
  12585. SHRQ $0x34, R8
  12586. LEAL -2(CX), R9
  12587. MOVL 24(SP)(R8*4), BP
  12588. MOVL R9, 24(SP)(DI*4)
  12589. MOVL CX, 24(SP)(R8*4)
  12590. CMPL (DX)(BP*1), SI
  12591. JEQ match_nolit_loop_encodeSnappyBlockAsm12B
  12592. INCL CX
  12593. JMP search_loop_encodeSnappyBlockAsm12B
  12594. emit_remainder_encodeSnappyBlockAsm12B:
  12595. MOVQ src_len+32(FP), CX
  12596. SUBL 12(SP), CX
  12597. LEAQ 4(AX)(CX*1), CX
  12598. CMPQ CX, (SP)
  12599. JL emit_remainder_ok_encodeSnappyBlockAsm12B
  12600. MOVQ $0x00000000, ret+48(FP)
  12601. RET
  12602. emit_remainder_ok_encodeSnappyBlockAsm12B:
  12603. MOVQ src_len+32(FP), CX
  12604. MOVL 12(SP), BX
  12605. CMPL BX, CX
  12606. JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B
  12607. MOVL CX, BP
  12608. MOVL CX, 12(SP)
  12609. LEAQ (DX)(BX*1), CX
  12610. SUBL BX, BP
  12611. MOVL BP, DX
  12612. SUBL $0x01, DX
  12613. JC emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B
  12614. CMPL DX, $0x3c
  12615. JLT one_byte_emit_remainder_encodeSnappyBlockAsm12B
  12616. CMPL DX, $0x00000100
  12617. JLT two_bytes_emit_remainder_encodeSnappyBlockAsm12B
  12618. CMPL DX, $0x00010000
  12619. JLT three_bytes_emit_remainder_encodeSnappyBlockAsm12B
  12620. CMPL DX, $0x01000000
  12621. JLT four_bytes_emit_remainder_encodeSnappyBlockAsm12B
  12622. MOVB $0xfc, (AX)
  12623. MOVL DX, 1(AX)
  12624. ADDQ $0x05, AX
  12625. JMP memmove_emit_remainder_encodeSnappyBlockAsm12B
  12626. four_bytes_emit_remainder_encodeSnappyBlockAsm12B:
  12627. MOVL DX, BX
  12628. SHRL $0x10, BX
  12629. MOVB $0xf8, (AX)
  12630. MOVW DX, 1(AX)
  12631. MOVB BL, 3(AX)
  12632. ADDQ $0x04, AX
  12633. JMP memmove_emit_remainder_encodeSnappyBlockAsm12B
  12634. three_bytes_emit_remainder_encodeSnappyBlockAsm12B:
  12635. MOVB $0xf4, (AX)
  12636. MOVW DX, 1(AX)
  12637. ADDQ $0x03, AX
  12638. JMP memmove_emit_remainder_encodeSnappyBlockAsm12B
  12639. two_bytes_emit_remainder_encodeSnappyBlockAsm12B:
  12640. MOVB $0xf0, (AX)
  12641. MOVB DL, 1(AX)
  12642. ADDQ $0x02, AX
  12643. JMP memmove_emit_remainder_encodeSnappyBlockAsm12B
  12644. one_byte_emit_remainder_encodeSnappyBlockAsm12B:
  12645. SHLB $0x02, DL
  12646. MOVB DL, (AX)
  12647. ADDQ $0x01, AX
  12648. memmove_emit_remainder_encodeSnappyBlockAsm12B:
  12649. LEAQ (AX)(BP*1), DX
  12650. MOVL BP, BX
  12651. NOP
  12652. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_tail:
  12653. TESTQ BX, BX
  12654. JEQ memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
  12655. CMPQ BX, $0x02
  12656. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_1or2
  12657. CMPQ BX, $0x04
  12658. JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_3
  12659. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_4
  12660. CMPQ BX, $0x08
  12661. JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_5through7
  12662. JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8
  12663. CMPQ BX, $0x10
  12664. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_9through16
  12665. CMPQ BX, $0x20
  12666. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32
  12667. CMPQ BX, $0x40
  12668. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64
  12669. CMPQ BX, $0x80
  12670. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_65through128
  12671. CMPQ BX, $0x00000100
  12672. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_129through256
  12673. JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_256through2048
  12674. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_1or2:
  12675. MOVB (CX), BP
  12676. MOVB -1(CX)(BX*1), CL
  12677. MOVB BP, (AX)
  12678. MOVB CL, -1(AX)(BX*1)
  12679. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
  12680. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_4:
  12681. MOVL (CX), BP
  12682. MOVL BP, (AX)
  12683. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
  12684. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_3:
  12685. MOVW (CX), BP
  12686. MOVB 2(CX), CL
  12687. MOVW BP, (AX)
  12688. MOVB CL, 2(AX)
  12689. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
  12690. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_5through7:
  12691. MOVL (CX), BP
  12692. MOVL -4(CX)(BX*1), CX
  12693. MOVL BP, (AX)
  12694. MOVL CX, -4(AX)(BX*1)
  12695. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
  12696. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8:
  12697. MOVQ (CX), BP
  12698. MOVQ BP, (AX)
  12699. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
  12700. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_9through16:
  12701. MOVQ (CX), BP
  12702. MOVQ -8(CX)(BX*1), CX
  12703. MOVQ BP, (AX)
  12704. MOVQ CX, -8(AX)(BX*1)
  12705. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
  12706. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32:
  12707. MOVOU (CX), X0
  12708. MOVOU -16(CX)(BX*1), X1
  12709. MOVOU X0, (AX)
  12710. MOVOU X1, -16(AX)(BX*1)
  12711. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
  12712. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64:
  12713. MOVOU (CX), X0
  12714. MOVOU 16(CX), X1
  12715. MOVOU -32(CX)(BX*1), X2
  12716. MOVOU -16(CX)(BX*1), X3
  12717. MOVOU X0, (AX)
  12718. MOVOU X1, 16(AX)
  12719. MOVOU X2, -32(AX)(BX*1)
  12720. MOVOU X3, -16(AX)(BX*1)
  12721. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
  12722. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_65through128:
  12723. MOVOU (CX), X0
  12724. MOVOU 16(CX), X1
  12725. MOVOU 32(CX), X2
  12726. MOVOU 48(CX), X3
  12727. MOVOU -64(CX)(BX*1), X12
  12728. MOVOU -48(CX)(BX*1), X13
  12729. MOVOU -32(CX)(BX*1), X14
  12730. MOVOU -16(CX)(BX*1), X15
  12731. MOVOU X0, (AX)
  12732. MOVOU X1, 16(AX)
  12733. MOVOU X2, 32(AX)
  12734. MOVOU X3, 48(AX)
  12735. MOVOU X12, -64(AX)(BX*1)
  12736. MOVOU X13, -48(AX)(BX*1)
  12737. MOVOU X14, -32(AX)(BX*1)
  12738. MOVOU X15, -16(AX)(BX*1)
  12739. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
  12740. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_129through256:
  12741. MOVOU (CX), X0
  12742. MOVOU 16(CX), X1
  12743. MOVOU 32(CX), X2
  12744. MOVOU 48(CX), X3
  12745. MOVOU 64(CX), X4
  12746. MOVOU 80(CX), X5
  12747. MOVOU 96(CX), X6
  12748. MOVOU 112(CX), X7
  12749. MOVOU -128(CX)(BX*1), X8
  12750. MOVOU -112(CX)(BX*1), X9
  12751. MOVOU -96(CX)(BX*1), X10
  12752. MOVOU -80(CX)(BX*1), X11
  12753. MOVOU -64(CX)(BX*1), X12
  12754. MOVOU -48(CX)(BX*1), X13
  12755. MOVOU -32(CX)(BX*1), X14
  12756. MOVOU -16(CX)(BX*1), X15
  12757. MOVOU X0, (AX)
  12758. MOVOU X1, 16(AX)
  12759. MOVOU X2, 32(AX)
  12760. MOVOU X3, 48(AX)
  12761. MOVOU X4, 64(AX)
  12762. MOVOU X5, 80(AX)
  12763. MOVOU X6, 96(AX)
  12764. MOVOU X7, 112(AX)
  12765. MOVOU X8, -128(AX)(BX*1)
  12766. MOVOU X9, -112(AX)(BX*1)
  12767. MOVOU X10, -96(AX)(BX*1)
  12768. MOVOU X11, -80(AX)(BX*1)
  12769. MOVOU X12, -64(AX)(BX*1)
  12770. MOVOU X13, -48(AX)(BX*1)
  12771. MOVOU X14, -32(AX)(BX*1)
  12772. MOVOU X15, -16(AX)(BX*1)
  12773. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
  12774. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_256through2048:
  12775. LEAQ -256(BX), BX
  12776. MOVOU (CX), X0
  12777. MOVOU 16(CX), X1
  12778. MOVOU 32(CX), X2
  12779. MOVOU 48(CX), X3
  12780. MOVOU 64(CX), X4
  12781. MOVOU 80(CX), X5
  12782. MOVOU 96(CX), X6
  12783. MOVOU 112(CX), X7
  12784. MOVOU 128(CX), X8
  12785. MOVOU 144(CX), X9
  12786. MOVOU 160(CX), X10
  12787. MOVOU 176(CX), X11
  12788. MOVOU 192(CX), X12
  12789. MOVOU 208(CX), X13
  12790. MOVOU 224(CX), X14
  12791. MOVOU 240(CX), X15
  12792. MOVOU X0, (AX)
  12793. MOVOU X1, 16(AX)
  12794. MOVOU X2, 32(AX)
  12795. MOVOU X3, 48(AX)
  12796. MOVOU X4, 64(AX)
  12797. MOVOU X5, 80(AX)
  12798. MOVOU X6, 96(AX)
  12799. MOVOU X7, 112(AX)
  12800. MOVOU X8, 128(AX)
  12801. MOVOU X9, 144(AX)
  12802. MOVOU X10, 160(AX)
  12803. MOVOU X11, 176(AX)
  12804. MOVOU X12, 192(AX)
  12805. MOVOU X13, 208(AX)
  12806. MOVOU X14, 224(AX)
  12807. MOVOU X15, 240(AX)
  12808. CMPQ BX, $0x00000100
  12809. LEAQ 256(CX), CX
  12810. LEAQ 256(AX), AX
  12811. JGE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_256through2048
  12812. JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_tail
  12813. memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B:
  12814. MOVQ DX, AX
  12815. emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B:
  12816. MOVQ dst_base+0(FP), CX
  12817. SUBQ CX, AX
  12818. MOVQ AX, ret+48(FP)
  12819. RET
  12820. // func encodeSnappyBlockAsm10B(dst []byte, src []byte) int
  12821. // Requires: SSE2
  12822. TEXT ·encodeSnappyBlockAsm10B(SB), $4120-56
  12823. MOVQ dst_base+0(FP), AX
  12824. MOVQ $0x00000020, CX
  12825. LEAQ 24(SP), DX
  12826. PXOR X0, X0
  12827. zero_loop_encodeSnappyBlockAsm10B:
  12828. MOVOU X0, (DX)
  12829. MOVOU X0, 16(DX)
  12830. MOVOU X0, 32(DX)
  12831. MOVOU X0, 48(DX)
  12832. MOVOU X0, 64(DX)
  12833. MOVOU X0, 80(DX)
  12834. MOVOU X0, 96(DX)
  12835. MOVOU X0, 112(DX)
  12836. ADDQ $0x80, DX
  12837. DECQ CX
  12838. JNZ zero_loop_encodeSnappyBlockAsm10B
  12839. MOVL $0x00000000, 12(SP)
  12840. MOVQ src_len+32(FP), CX
  12841. LEAQ -5(CX), DX
  12842. LEAQ -8(CX), BP
  12843. MOVL BP, 8(SP)
  12844. SHRQ $0x05, CX
  12845. SUBL CX, DX
  12846. LEAQ (AX)(DX*1), DX
  12847. MOVQ DX, (SP)
  12848. MOVL $0x00000001, CX
  12849. MOVL CX, 16(SP)
  12850. MOVQ src_base+24(FP), DX
  12851. search_loop_encodeSnappyBlockAsm10B:
  12852. MOVQ (DX)(CX*1), SI
  12853. MOVL CX, BP
  12854. SUBL 12(SP), BP
  12855. SHRL $0x05, BP
  12856. LEAL 4(CX)(BP*1), BP
  12857. MOVL 8(SP), DI
  12858. CMPL BP, DI
  12859. JGT emit_remainder_encodeSnappyBlockAsm10B
  12860. MOVL BP, 20(SP)
  12861. MOVQ $0x000000cf1bbcdcbb, R8
  12862. MOVQ SI, R9
  12863. MOVQ SI, R10
  12864. SHRQ $0x08, R10
  12865. SHLQ $0x18, R9
  12866. IMULQ R8, R9
  12867. SHRQ $0x36, R9
  12868. SHLQ $0x18, R10
  12869. IMULQ R8, R10
  12870. SHRQ $0x36, R10
  12871. MOVL 24(SP)(R9*4), BP
  12872. MOVL 24(SP)(R10*4), DI
  12873. MOVL CX, 24(SP)(R9*4)
  12874. LEAL 1(CX), R9
  12875. MOVL R9, 24(SP)(R10*4)
  12876. MOVQ SI, R9
  12877. SHRQ $0x10, R9
  12878. SHLQ $0x18, R9
  12879. IMULQ R8, R9
  12880. SHRQ $0x36, R9
  12881. MOVL CX, R8
  12882. SUBL 16(SP), R8
  12883. MOVL 1(DX)(R8*1), R10
  12884. MOVQ SI, R8
  12885. SHRQ $0x08, R8
  12886. CMPL R8, R10
  12887. JNE no_repeat_found_encodeSnappyBlockAsm10B
  12888. LEAL 1(CX), SI
  12889. MOVL 12(SP), BP
  12890. MOVL SI, DI
  12891. SUBL 16(SP), DI
  12892. JZ repeat_extend_back_end_encodeSnappyBlockAsm10B
  12893. repeat_extend_back_loop_encodeSnappyBlockAsm10B:
  12894. CMPL SI, BP
  12895. JLE repeat_extend_back_end_encodeSnappyBlockAsm10B
  12896. MOVB -1(DX)(DI*1), BL
  12897. MOVB -1(DX)(SI*1), R8
  12898. CMPB BL, R8
  12899. JNE repeat_extend_back_end_encodeSnappyBlockAsm10B
  12900. LEAL -1(SI), SI
  12901. DECL DI
  12902. JNZ repeat_extend_back_loop_encodeSnappyBlockAsm10B
  12903. repeat_extend_back_end_encodeSnappyBlockAsm10B:
  12904. MOVL 12(SP), BP
  12905. CMPL BP, SI
  12906. JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B
  12907. MOVL SI, DI
  12908. MOVL SI, 12(SP)
  12909. LEAQ (DX)(BP*1), R8
  12910. SUBL BP, DI
  12911. MOVL DI, BP
  12912. SUBL $0x01, BP
  12913. JC emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B
  12914. CMPL BP, $0x3c
  12915. JLT one_byte_repeat_emit_encodeSnappyBlockAsm10B
  12916. CMPL BP, $0x00000100
  12917. JLT two_bytes_repeat_emit_encodeSnappyBlockAsm10B
  12918. CMPL BP, $0x00010000
  12919. JLT three_bytes_repeat_emit_encodeSnappyBlockAsm10B
  12920. CMPL BP, $0x01000000
  12921. JLT four_bytes_repeat_emit_encodeSnappyBlockAsm10B
  12922. MOVB $0xfc, (AX)
  12923. MOVL BP, 1(AX)
  12924. ADDQ $0x05, AX
  12925. JMP memmove_repeat_emit_encodeSnappyBlockAsm10B
  12926. four_bytes_repeat_emit_encodeSnappyBlockAsm10B:
  12927. MOVL BP, R9
  12928. SHRL $0x10, R9
  12929. MOVB $0xf8, (AX)
  12930. MOVW BP, 1(AX)
  12931. MOVB R9, 3(AX)
  12932. ADDQ $0x04, AX
  12933. JMP memmove_repeat_emit_encodeSnappyBlockAsm10B
  12934. three_bytes_repeat_emit_encodeSnappyBlockAsm10B:
  12935. MOVB $0xf4, (AX)
  12936. MOVW BP, 1(AX)
  12937. ADDQ $0x03, AX
  12938. JMP memmove_repeat_emit_encodeSnappyBlockAsm10B
  12939. two_bytes_repeat_emit_encodeSnappyBlockAsm10B:
  12940. MOVB $0xf0, (AX)
  12941. MOVB BP, 1(AX)
  12942. ADDQ $0x02, AX
  12943. JMP memmove_repeat_emit_encodeSnappyBlockAsm10B
  12944. one_byte_repeat_emit_encodeSnappyBlockAsm10B:
  12945. SHLB $0x02, BP
  12946. MOVB BP, (AX)
  12947. ADDQ $0x01, AX
  12948. memmove_repeat_emit_encodeSnappyBlockAsm10B:
  12949. LEAQ (AX)(DI*1), BP
  12950. NOP
  12951. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_tail:
  12952. TESTQ DI, DI
  12953. JEQ memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
  12954. CMPQ DI, $0x02
  12955. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_1or2
  12956. CMPQ DI, $0x04
  12957. JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_3
  12958. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_4
  12959. CMPQ DI, $0x08
  12960. JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_5through7
  12961. JE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8
  12962. CMPQ DI, $0x10
  12963. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_9through16
  12964. CMPQ DI, $0x20
  12965. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32
  12966. CMPQ DI, $0x40
  12967. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64
  12968. CMPQ DI, $0x80
  12969. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_65through128
  12970. CMPQ DI, $0x00000100
  12971. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_129through256
  12972. JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_256through2048
  12973. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_1or2:
  12974. MOVB (R8), R9
  12975. MOVB -1(R8)(DI*1), R8
  12976. MOVB R9, (AX)
  12977. MOVB R8, -1(AX)(DI*1)
  12978. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
  12979. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_4:
  12980. MOVL (R8), R9
  12981. MOVL R9, (AX)
  12982. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
  12983. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_3:
  12984. MOVW (R8), R9
  12985. MOVB 2(R8), R8
  12986. MOVW R9, (AX)
  12987. MOVB R8, 2(AX)
  12988. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
  12989. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_5through7:
  12990. MOVL (R8), R9
  12991. MOVL -4(R8)(DI*1), R8
  12992. MOVL R9, (AX)
  12993. MOVL R8, -4(AX)(DI*1)
  12994. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
  12995. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8:
  12996. MOVQ (R8), R9
  12997. MOVQ R9, (AX)
  12998. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
  12999. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_9through16:
  13000. MOVQ (R8), R9
  13001. MOVQ -8(R8)(DI*1), R8
  13002. MOVQ R9, (AX)
  13003. MOVQ R8, -8(AX)(DI*1)
  13004. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
  13005. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32:
  13006. MOVOU (R8), X0
  13007. MOVOU -16(R8)(DI*1), X1
  13008. MOVOU X0, (AX)
  13009. MOVOU X1, -16(AX)(DI*1)
  13010. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
  13011. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64:
  13012. MOVOU (R8), X0
  13013. MOVOU 16(R8), X1
  13014. MOVOU -32(R8)(DI*1), X2
  13015. MOVOU -16(R8)(DI*1), X3
  13016. MOVOU X0, (AX)
  13017. MOVOU X1, 16(AX)
  13018. MOVOU X2, -32(AX)(DI*1)
  13019. MOVOU X3, -16(AX)(DI*1)
  13020. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
  13021. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_65through128:
  13022. MOVOU (R8), X0
  13023. MOVOU 16(R8), X1
  13024. MOVOU 32(R8), X2
  13025. MOVOU 48(R8), X3
  13026. MOVOU -64(R8)(DI*1), X12
  13027. MOVOU -48(R8)(DI*1), X13
  13028. MOVOU -32(R8)(DI*1), X14
  13029. MOVOU -16(R8)(DI*1), X15
  13030. MOVOU X0, (AX)
  13031. MOVOU X1, 16(AX)
  13032. MOVOU X2, 32(AX)
  13033. MOVOU X3, 48(AX)
  13034. MOVOU X12, -64(AX)(DI*1)
  13035. MOVOU X13, -48(AX)(DI*1)
  13036. MOVOU X14, -32(AX)(DI*1)
  13037. MOVOU X15, -16(AX)(DI*1)
  13038. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
  13039. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_129through256:
  13040. MOVOU (R8), X0
  13041. MOVOU 16(R8), X1
  13042. MOVOU 32(R8), X2
  13043. MOVOU 48(R8), X3
  13044. MOVOU 64(R8), X4
  13045. MOVOU 80(R8), X5
  13046. MOVOU 96(R8), X6
  13047. MOVOU 112(R8), X7
  13048. MOVOU -128(R8)(DI*1), X8
  13049. MOVOU -112(R8)(DI*1), X9
  13050. MOVOU -96(R8)(DI*1), X10
  13051. MOVOU -80(R8)(DI*1), X11
  13052. MOVOU -64(R8)(DI*1), X12
  13053. MOVOU -48(R8)(DI*1), X13
  13054. MOVOU -32(R8)(DI*1), X14
  13055. MOVOU -16(R8)(DI*1), X15
  13056. MOVOU X0, (AX)
  13057. MOVOU X1, 16(AX)
  13058. MOVOU X2, 32(AX)
  13059. MOVOU X3, 48(AX)
  13060. MOVOU X4, 64(AX)
  13061. MOVOU X5, 80(AX)
  13062. MOVOU X6, 96(AX)
  13063. MOVOU X7, 112(AX)
  13064. MOVOU X8, -128(AX)(DI*1)
  13065. MOVOU X9, -112(AX)(DI*1)
  13066. MOVOU X10, -96(AX)(DI*1)
  13067. MOVOU X11, -80(AX)(DI*1)
  13068. MOVOU X12, -64(AX)(DI*1)
  13069. MOVOU X13, -48(AX)(DI*1)
  13070. MOVOU X14, -32(AX)(DI*1)
  13071. MOVOU X15, -16(AX)(DI*1)
  13072. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
  13073. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_256through2048:
  13074. LEAQ -256(DI), DI
  13075. MOVOU (R8), X0
  13076. MOVOU 16(R8), X1
  13077. MOVOU 32(R8), X2
  13078. MOVOU 48(R8), X3
  13079. MOVOU 64(R8), X4
  13080. MOVOU 80(R8), X5
  13081. MOVOU 96(R8), X6
  13082. MOVOU 112(R8), X7
  13083. MOVOU 128(R8), X8
  13084. MOVOU 144(R8), X9
  13085. MOVOU 160(R8), X10
  13086. MOVOU 176(R8), X11
  13087. MOVOU 192(R8), X12
  13088. MOVOU 208(R8), X13
  13089. MOVOU 224(R8), X14
  13090. MOVOU 240(R8), X15
  13091. MOVOU X0, (AX)
  13092. MOVOU X1, 16(AX)
  13093. MOVOU X2, 32(AX)
  13094. MOVOU X3, 48(AX)
  13095. MOVOU X4, 64(AX)
  13096. MOVOU X5, 80(AX)
  13097. MOVOU X6, 96(AX)
  13098. MOVOU X7, 112(AX)
  13099. MOVOU X8, 128(AX)
  13100. MOVOU X9, 144(AX)
  13101. MOVOU X10, 160(AX)
  13102. MOVOU X11, 176(AX)
  13103. MOVOU X12, 192(AX)
  13104. MOVOU X13, 208(AX)
  13105. MOVOU X14, 224(AX)
  13106. MOVOU X15, 240(AX)
  13107. CMPQ DI, $0x00000100
  13108. LEAQ 256(R8), R8
  13109. LEAQ 256(AX), AX
  13110. JGE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_256through2048
  13111. JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_tail
  13112. memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B:
  13113. MOVQ BP, AX
  13114. emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B:
  13115. ADDL $0x05, CX
  13116. MOVL CX, BP
  13117. SUBL 16(SP), BP
  13118. MOVQ src_len+32(FP), DI
  13119. SUBL CX, DI
  13120. LEAQ (DX)(CX*1), R8
  13121. LEAQ (DX)(BP*1), BP
  13122. XORL R10, R10
  13123. CMPL DI, $0x08
  13124. JL matchlen_single_repeat_extend
  13125. matchlen_loopback_repeat_extend:
  13126. MOVQ (R8)(R10*1), R9
  13127. XORQ (BP)(R10*1), R9
  13128. TESTQ R9, R9
  13129. JZ matchlen_loop_repeat_extend
  13130. BSFQ R9, R9
  13131. SARQ $0x03, R9
  13132. LEAL (R10)(R9*1), R10
  13133. JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B
  13134. matchlen_loop_repeat_extend:
  13135. LEAL -8(DI), DI
  13136. LEAL 8(R10), R10
  13137. CMPL DI, $0x08
  13138. JGE matchlen_loopback_repeat_extend
  13139. matchlen_single_repeat_extend:
  13140. TESTL DI, DI
  13141. JZ repeat_extend_forward_end_encodeSnappyBlockAsm10B
  13142. matchlen_single_loopback_repeat_extend:
  13143. MOVB (R8)(R10*1), R9
  13144. CMPB (BP)(R10*1), R9
  13145. JNE repeat_extend_forward_end_encodeSnappyBlockAsm10B
  13146. LEAL 1(R10), R10
  13147. DECL DI
  13148. JNZ matchlen_single_loopback_repeat_extend
  13149. repeat_extend_forward_end_encodeSnappyBlockAsm10B:
  13150. ADDL R10, CX
  13151. MOVL CX, BP
  13152. SUBL SI, BP
  13153. MOVL 16(SP), SI
  13154. CMPL SI, $0x00010000
  13155. JL two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B
  13156. four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm10B:
  13157. CMPL BP, $0x40
  13158. JLE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm10B
  13159. MOVB $0xff, (AX)
  13160. MOVL SI, 1(AX)
  13161. LEAL -64(BP), BP
  13162. ADDQ $0x05, AX
  13163. CMPL BP, $0x04
  13164. JL four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm10B
  13165. JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm10B
  13166. four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm10B:
  13167. TESTL BP, BP
  13168. JZ repeat_end_emit_encodeSnappyBlockAsm10B
  13169. MOVB $0x03, BL
  13170. LEAL -4(BX)(BP*4), BP
  13171. MOVB BP, (AX)
  13172. MOVL SI, 1(AX)
  13173. ADDQ $0x05, AX
  13174. JMP repeat_end_emit_encodeSnappyBlockAsm10B
  13175. two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B:
  13176. CMPL BP, $0x40
  13177. JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B
  13178. MOVB $0xee, (AX)
  13179. MOVW SI, 1(AX)
  13180. LEAL -60(BP), BP
  13181. ADDQ $0x03, AX
  13182. JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B
  13183. two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B:
  13184. CMPL BP, $0x0c
  13185. JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B
  13186. CMPL SI, $0x00000800
  13187. JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B
  13188. MOVB $0x01, BL
  13189. LEAL -16(BX)(BP*4), BP
  13190. MOVB SI, 1(AX)
  13191. SHRL $0x08, SI
  13192. SHLL $0x05, SI
  13193. ORL SI, BP
  13194. MOVB BP, (AX)
  13195. ADDQ $0x02, AX
  13196. JMP repeat_end_emit_encodeSnappyBlockAsm10B
  13197. emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B:
  13198. MOVB $0x02, BL
  13199. LEAL -4(BX)(BP*4), BP
  13200. MOVB BP, (AX)
  13201. MOVW SI, 1(AX)
  13202. ADDQ $0x03, AX
  13203. repeat_end_emit_encodeSnappyBlockAsm10B:
  13204. MOVL CX, 12(SP)
  13205. CMPL CX, 8(SP)
  13206. JGE emit_remainder_encodeSnappyBlockAsm10B
  13207. JMP search_loop_encodeSnappyBlockAsm10B
  13208. no_repeat_found_encodeSnappyBlockAsm10B:
  13209. CMPL (DX)(BP*1), SI
  13210. JEQ candidate_match_encodeSnappyBlockAsm10B
  13211. SHRQ $0x08, SI
  13212. MOVL 24(SP)(R9*4), BP
  13213. LEAL 2(CX), R8
  13214. CMPL (DX)(DI*1), SI
  13215. JEQ candidate2_match_encodeSnappyBlockAsm10B
  13216. MOVL R8, 24(SP)(R9*4)
  13217. SHRQ $0x08, SI
  13218. CMPL (DX)(BP*1), SI
  13219. JEQ candidate3_match_encodeSnappyBlockAsm10B
  13220. MOVL 20(SP), CX
  13221. JMP search_loop_encodeSnappyBlockAsm10B
  13222. candidate3_match_encodeSnappyBlockAsm10B:
  13223. ADDL $0x02, CX
  13224. JMP candidate_match_encodeSnappyBlockAsm10B
  13225. candidate2_match_encodeSnappyBlockAsm10B:
  13226. MOVL R8, 24(SP)(R9*4)
  13227. INCL CX
  13228. MOVL DI, BP
  13229. candidate_match_encodeSnappyBlockAsm10B:
  13230. MOVL 12(SP), SI
  13231. TESTL BP, BP
  13232. JZ match_extend_back_end_encodeSnappyBlockAsm10B
  13233. match_extend_back_loop_encodeSnappyBlockAsm10B:
  13234. CMPL CX, SI
  13235. JLE match_extend_back_end_encodeSnappyBlockAsm10B
  13236. MOVB -1(DX)(BP*1), BL
  13237. MOVB -1(DX)(CX*1), DI
  13238. CMPB BL, DI
  13239. JNE match_extend_back_end_encodeSnappyBlockAsm10B
  13240. LEAL -1(CX), CX
  13241. DECL BP
  13242. JZ match_extend_back_end_encodeSnappyBlockAsm10B
  13243. JMP match_extend_back_loop_encodeSnappyBlockAsm10B
  13244. match_extend_back_end_encodeSnappyBlockAsm10B:
  13245. MOVL CX, SI
  13246. SUBL 12(SP), SI
  13247. LEAQ 4(AX)(SI*1), SI
  13248. CMPQ SI, (SP)
  13249. JL match_dst_size_check_encodeSnappyBlockAsm10B
  13250. MOVQ $0x00000000, ret+48(FP)
  13251. RET
  13252. match_dst_size_check_encodeSnappyBlockAsm10B:
  13253. MOVL CX, SI
  13254. MOVL 12(SP), DI
  13255. CMPL DI, SI
  13256. JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm10B
  13257. MOVL SI, R8
  13258. MOVL SI, 12(SP)
  13259. LEAQ (DX)(DI*1), SI
  13260. SUBL DI, R8
  13261. MOVL R8, DI
  13262. SUBL $0x01, DI
  13263. JC emit_literal_done_match_emit_encodeSnappyBlockAsm10B
  13264. CMPL DI, $0x3c
  13265. JLT one_byte_match_emit_encodeSnappyBlockAsm10B
  13266. CMPL DI, $0x00000100
  13267. JLT two_bytes_match_emit_encodeSnappyBlockAsm10B
  13268. CMPL DI, $0x00010000
  13269. JLT three_bytes_match_emit_encodeSnappyBlockAsm10B
  13270. CMPL DI, $0x01000000
  13271. JLT four_bytes_match_emit_encodeSnappyBlockAsm10B
  13272. MOVB $0xfc, (AX)
  13273. MOVL DI, 1(AX)
  13274. ADDQ $0x05, AX
  13275. JMP memmove_match_emit_encodeSnappyBlockAsm10B
  13276. four_bytes_match_emit_encodeSnappyBlockAsm10B:
  13277. MOVL DI, R9
  13278. SHRL $0x10, R9
  13279. MOVB $0xf8, (AX)
  13280. MOVW DI, 1(AX)
  13281. MOVB R9, 3(AX)
  13282. ADDQ $0x04, AX
  13283. JMP memmove_match_emit_encodeSnappyBlockAsm10B
  13284. three_bytes_match_emit_encodeSnappyBlockAsm10B:
  13285. MOVB $0xf4, (AX)
  13286. MOVW DI, 1(AX)
  13287. ADDQ $0x03, AX
  13288. JMP memmove_match_emit_encodeSnappyBlockAsm10B
  13289. two_bytes_match_emit_encodeSnappyBlockAsm10B:
  13290. MOVB $0xf0, (AX)
  13291. MOVB DI, 1(AX)
  13292. ADDQ $0x02, AX
  13293. JMP memmove_match_emit_encodeSnappyBlockAsm10B
  13294. one_byte_match_emit_encodeSnappyBlockAsm10B:
  13295. SHLB $0x02, DI
  13296. MOVB DI, (AX)
  13297. ADDQ $0x01, AX
  13298. memmove_match_emit_encodeSnappyBlockAsm10B:
  13299. LEAQ (AX)(R8*1), DI
  13300. NOP
  13301. emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_tail:
  13302. TESTQ R8, R8
  13303. JEQ memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
  13304. CMPQ R8, $0x02
  13305. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_1or2
  13306. CMPQ R8, $0x04
  13307. JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_3
  13308. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_4
  13309. CMPQ R8, $0x08
  13310. JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_5through7
  13311. JE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8
  13312. CMPQ R8, $0x10
  13313. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_9through16
  13314. CMPQ R8, $0x20
  13315. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32
  13316. CMPQ R8, $0x40
  13317. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64
  13318. CMPQ R8, $0x80
  13319. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_65through128
  13320. CMPQ R8, $0x00000100
  13321. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_129through256
  13322. JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_256through2048
  13323. emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_1or2:
  13324. MOVB (SI), R9
  13325. MOVB -1(SI)(R8*1), SI
  13326. MOVB R9, (AX)
  13327. MOVB SI, -1(AX)(R8*1)
  13328. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
  13329. emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_4:
  13330. MOVL (SI), R9
  13331. MOVL R9, (AX)
  13332. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
  13333. emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_3:
  13334. MOVW (SI), R9
  13335. MOVB 2(SI), SI
  13336. MOVW R9, (AX)
  13337. MOVB SI, 2(AX)
  13338. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
  13339. emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_5through7:
  13340. MOVL (SI), R9
  13341. MOVL -4(SI)(R8*1), SI
  13342. MOVL R9, (AX)
  13343. MOVL SI, -4(AX)(R8*1)
  13344. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
  13345. emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8:
  13346. MOVQ (SI), R9
  13347. MOVQ R9, (AX)
  13348. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
  13349. emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_9through16:
  13350. MOVQ (SI), R9
  13351. MOVQ -8(SI)(R8*1), SI
  13352. MOVQ R9, (AX)
  13353. MOVQ SI, -8(AX)(R8*1)
  13354. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
  13355. emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32:
  13356. MOVOU (SI), X0
  13357. MOVOU -16(SI)(R8*1), X1
  13358. MOVOU X0, (AX)
  13359. MOVOU X1, -16(AX)(R8*1)
  13360. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
  13361. emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64:
  13362. MOVOU (SI), X0
  13363. MOVOU 16(SI), X1
  13364. MOVOU -32(SI)(R8*1), X2
  13365. MOVOU -16(SI)(R8*1), X3
  13366. MOVOU X0, (AX)
  13367. MOVOU X1, 16(AX)
  13368. MOVOU X2, -32(AX)(R8*1)
  13369. MOVOU X3, -16(AX)(R8*1)
  13370. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
  13371. emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_65through128:
  13372. MOVOU (SI), X0
  13373. MOVOU 16(SI), X1
  13374. MOVOU 32(SI), X2
  13375. MOVOU 48(SI), X3
  13376. MOVOU -64(SI)(R8*1), X12
  13377. MOVOU -48(SI)(R8*1), X13
  13378. MOVOU -32(SI)(R8*1), X14
  13379. MOVOU -16(SI)(R8*1), X15
  13380. MOVOU X0, (AX)
  13381. MOVOU X1, 16(AX)
  13382. MOVOU X2, 32(AX)
  13383. MOVOU X3, 48(AX)
  13384. MOVOU X12, -64(AX)(R8*1)
  13385. MOVOU X13, -48(AX)(R8*1)
  13386. MOVOU X14, -32(AX)(R8*1)
  13387. MOVOU X15, -16(AX)(R8*1)
  13388. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
  13389. emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_129through256:
  13390. MOVOU (SI), X0
  13391. MOVOU 16(SI), X1
  13392. MOVOU 32(SI), X2
  13393. MOVOU 48(SI), X3
  13394. MOVOU 64(SI), X4
  13395. MOVOU 80(SI), X5
  13396. MOVOU 96(SI), X6
  13397. MOVOU 112(SI), X7
  13398. MOVOU -128(SI)(R8*1), X8
  13399. MOVOU -112(SI)(R8*1), X9
  13400. MOVOU -96(SI)(R8*1), X10
  13401. MOVOU -80(SI)(R8*1), X11
  13402. MOVOU -64(SI)(R8*1), X12
  13403. MOVOU -48(SI)(R8*1), X13
  13404. MOVOU -32(SI)(R8*1), X14
  13405. MOVOU -16(SI)(R8*1), X15
  13406. MOVOU X0, (AX)
  13407. MOVOU X1, 16(AX)
  13408. MOVOU X2, 32(AX)
  13409. MOVOU X3, 48(AX)
  13410. MOVOU X4, 64(AX)
  13411. MOVOU X5, 80(AX)
  13412. MOVOU X6, 96(AX)
  13413. MOVOU X7, 112(AX)
  13414. MOVOU X8, -128(AX)(R8*1)
  13415. MOVOU X9, -112(AX)(R8*1)
  13416. MOVOU X10, -96(AX)(R8*1)
  13417. MOVOU X11, -80(AX)(R8*1)
  13418. MOVOU X12, -64(AX)(R8*1)
  13419. MOVOU X13, -48(AX)(R8*1)
  13420. MOVOU X14, -32(AX)(R8*1)
  13421. MOVOU X15, -16(AX)(R8*1)
  13422. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
  13423. emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_256through2048:
  13424. LEAQ -256(R8), R8
  13425. MOVOU (SI), X0
  13426. MOVOU 16(SI), X1
  13427. MOVOU 32(SI), X2
  13428. MOVOU 48(SI), X3
  13429. MOVOU 64(SI), X4
  13430. MOVOU 80(SI), X5
  13431. MOVOU 96(SI), X6
  13432. MOVOU 112(SI), X7
  13433. MOVOU 128(SI), X8
  13434. MOVOU 144(SI), X9
  13435. MOVOU 160(SI), X10
  13436. MOVOU 176(SI), X11
  13437. MOVOU 192(SI), X12
  13438. MOVOU 208(SI), X13
  13439. MOVOU 224(SI), X14
  13440. MOVOU 240(SI), X15
  13441. MOVOU X0, (AX)
  13442. MOVOU X1, 16(AX)
  13443. MOVOU X2, 32(AX)
  13444. MOVOU X3, 48(AX)
  13445. MOVOU X4, 64(AX)
  13446. MOVOU X5, 80(AX)
  13447. MOVOU X6, 96(AX)
  13448. MOVOU X7, 112(AX)
  13449. MOVOU X8, 128(AX)
  13450. MOVOU X9, 144(AX)
  13451. MOVOU X10, 160(AX)
  13452. MOVOU X11, 176(AX)
  13453. MOVOU X12, 192(AX)
  13454. MOVOU X13, 208(AX)
  13455. MOVOU X14, 224(AX)
  13456. MOVOU X15, 240(AX)
  13457. CMPQ R8, $0x00000100
  13458. LEAQ 256(SI), SI
  13459. LEAQ 256(AX), AX
  13460. JGE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_256through2048
  13461. JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_tail
  13462. memmove_end_copy_match_emit_encodeSnappyBlockAsm10B:
  13463. MOVQ DI, AX
  13464. emit_literal_done_match_emit_encodeSnappyBlockAsm10B:
  13465. match_nolit_loop_encodeSnappyBlockAsm10B:
  13466. MOVL CX, SI
  13467. SUBL BP, SI
  13468. MOVL SI, 16(SP)
  13469. ADDL $0x04, CX
  13470. ADDL $0x04, BP
  13471. MOVQ src_len+32(FP), SI
  13472. SUBL CX, SI
  13473. LEAQ (DX)(CX*1), DI
  13474. LEAQ (DX)(BP*1), BP
  13475. XORL R9, R9
  13476. CMPL SI, $0x08
  13477. JL matchlen_single_match_nolit_encodeSnappyBlockAsm10B
  13478. matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B:
  13479. MOVQ (DI)(R9*1), R8
  13480. XORQ (BP)(R9*1), R8
  13481. TESTQ R8, R8
  13482. JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm10B
  13483. BSFQ R8, R8
  13484. SARQ $0x03, R8
  13485. LEAL (R9)(R8*1), R9
  13486. JMP match_nolit_end_encodeSnappyBlockAsm10B
  13487. matchlen_loop_match_nolit_encodeSnappyBlockAsm10B:
  13488. LEAL -8(SI), SI
  13489. LEAL 8(R9), R9
  13490. CMPL SI, $0x08
  13491. JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B
  13492. matchlen_single_match_nolit_encodeSnappyBlockAsm10B:
  13493. TESTL SI, SI
  13494. JZ match_nolit_end_encodeSnappyBlockAsm10B
  13495. matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm10B:
  13496. MOVB (DI)(R9*1), R8
  13497. CMPB (BP)(R9*1), R8
  13498. JNE match_nolit_end_encodeSnappyBlockAsm10B
  13499. LEAL 1(R9), R9
  13500. DECL SI
  13501. JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm10B
  13502. match_nolit_end_encodeSnappyBlockAsm10B:
  13503. ADDL R9, CX
  13504. MOVL 16(SP), BP
  13505. ADDL $0x04, R9
  13506. CMPL BP, $0x00010000
  13507. JL two_byte_offset_match_nolit_encodeSnappyBlockAsm10B
  13508. four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm10B:
  13509. CMPL R9, $0x40
  13510. JLE four_bytes_remain_match_nolit_encodeSnappyBlockAsm10B
  13511. MOVB $0xff, (AX)
  13512. MOVL BP, 1(AX)
  13513. LEAL -64(R9), R9
  13514. ADDQ $0x05, AX
  13515. CMPL R9, $0x04
  13516. JL four_bytes_remain_match_nolit_encodeSnappyBlockAsm10B
  13517. JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm10B
  13518. four_bytes_remain_match_nolit_encodeSnappyBlockAsm10B:
  13519. TESTL R9, R9
  13520. JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm10B
  13521. MOVB $0x03, BL
  13522. LEAL -4(BX)(R9*4), R9
  13523. MOVB R9, (AX)
  13524. MOVL BP, 1(AX)
  13525. ADDQ $0x05, AX
  13526. JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm10B
  13527. two_byte_offset_match_nolit_encodeSnappyBlockAsm10B:
  13528. CMPL R9, $0x40
  13529. JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B
  13530. MOVB $0xee, (AX)
  13531. MOVW BP, 1(AX)
  13532. LEAL -60(R9), R9
  13533. ADDQ $0x03, AX
  13534. JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm10B
  13535. two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B:
  13536. CMPL R9, $0x0c
  13537. JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B
  13538. CMPL BP, $0x00000800
  13539. JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B
  13540. MOVB $0x01, BL
  13541. LEAL -16(BX)(R9*4), R9
  13542. MOVB BP, 1(AX)
  13543. SHRL $0x08, BP
  13544. SHLL $0x05, BP
  13545. ORL BP, R9
  13546. MOVB R9, (AX)
  13547. ADDQ $0x02, AX
  13548. JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm10B
  13549. emit_copy_three_match_nolit_encodeSnappyBlockAsm10B:
  13550. MOVB $0x02, BL
  13551. LEAL -4(BX)(R9*4), R9
  13552. MOVB R9, (AX)
  13553. MOVW BP, 1(AX)
  13554. ADDQ $0x03, AX
  13555. match_nolit_emitcopy_end_encodeSnappyBlockAsm10B:
  13556. MOVL CX, 12(SP)
  13557. CMPL CX, 8(SP)
  13558. JGE emit_remainder_encodeSnappyBlockAsm10B
  13559. CMPQ AX, (SP)
  13560. JL match_nolit_dst_ok_encodeSnappyBlockAsm10B
  13561. MOVQ $0x00000000, ret+48(FP)
  13562. RET
  13563. match_nolit_dst_ok_encodeSnappyBlockAsm10B:
  13564. MOVQ -2(DX)(CX*1), SI
  13565. MOVQ $0x000000cf1bbcdcbb, BP
  13566. MOVQ SI, DI
  13567. SHRQ $0x10, SI
  13568. MOVQ SI, R8
  13569. SHLQ $0x18, DI
  13570. IMULQ BP, DI
  13571. SHRQ $0x36, DI
  13572. SHLQ $0x18, R8
  13573. IMULQ BP, R8
  13574. SHRQ $0x36, R8
  13575. LEAL -2(CX), R9
  13576. MOVL 24(SP)(R8*4), BP
  13577. MOVL R9, 24(SP)(DI*4)
  13578. MOVL CX, 24(SP)(R8*4)
  13579. CMPL (DX)(BP*1), SI
  13580. JEQ match_nolit_loop_encodeSnappyBlockAsm10B
  13581. INCL CX
  13582. JMP search_loop_encodeSnappyBlockAsm10B
  13583. emit_remainder_encodeSnappyBlockAsm10B:
  13584. MOVQ src_len+32(FP), CX
  13585. SUBL 12(SP), CX
  13586. LEAQ 4(AX)(CX*1), CX
  13587. CMPQ CX, (SP)
  13588. JL emit_remainder_ok_encodeSnappyBlockAsm10B
  13589. MOVQ $0x00000000, ret+48(FP)
  13590. RET
  13591. emit_remainder_ok_encodeSnappyBlockAsm10B:
  13592. MOVQ src_len+32(FP), CX
  13593. MOVL 12(SP), BX
  13594. CMPL BX, CX
  13595. JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B
  13596. MOVL CX, BP
  13597. MOVL CX, 12(SP)
  13598. LEAQ (DX)(BX*1), CX
  13599. SUBL BX, BP
  13600. MOVL BP, DX
  13601. SUBL $0x01, DX
  13602. JC emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B
  13603. CMPL DX, $0x3c
  13604. JLT one_byte_emit_remainder_encodeSnappyBlockAsm10B
  13605. CMPL DX, $0x00000100
  13606. JLT two_bytes_emit_remainder_encodeSnappyBlockAsm10B
  13607. CMPL DX, $0x00010000
  13608. JLT three_bytes_emit_remainder_encodeSnappyBlockAsm10B
  13609. CMPL DX, $0x01000000
  13610. JLT four_bytes_emit_remainder_encodeSnappyBlockAsm10B
  13611. MOVB $0xfc, (AX)
  13612. MOVL DX, 1(AX)
  13613. ADDQ $0x05, AX
  13614. JMP memmove_emit_remainder_encodeSnappyBlockAsm10B
  13615. four_bytes_emit_remainder_encodeSnappyBlockAsm10B:
  13616. MOVL DX, BX
  13617. SHRL $0x10, BX
  13618. MOVB $0xf8, (AX)
  13619. MOVW DX, 1(AX)
  13620. MOVB BL, 3(AX)
  13621. ADDQ $0x04, AX
  13622. JMP memmove_emit_remainder_encodeSnappyBlockAsm10B
  13623. three_bytes_emit_remainder_encodeSnappyBlockAsm10B:
  13624. MOVB $0xf4, (AX)
  13625. MOVW DX, 1(AX)
  13626. ADDQ $0x03, AX
  13627. JMP memmove_emit_remainder_encodeSnappyBlockAsm10B
  13628. two_bytes_emit_remainder_encodeSnappyBlockAsm10B:
  13629. MOVB $0xf0, (AX)
  13630. MOVB DL, 1(AX)
  13631. ADDQ $0x02, AX
  13632. JMP memmove_emit_remainder_encodeSnappyBlockAsm10B
  13633. one_byte_emit_remainder_encodeSnappyBlockAsm10B:
  13634. SHLB $0x02, DL
  13635. MOVB DL, (AX)
  13636. ADDQ $0x01, AX
  13637. memmove_emit_remainder_encodeSnappyBlockAsm10B:
  13638. LEAQ (AX)(BP*1), DX
  13639. MOVL BP, BX
  13640. NOP
  13641. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_tail:
  13642. TESTQ BX, BX
  13643. JEQ memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
  13644. CMPQ BX, $0x02
  13645. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_1or2
  13646. CMPQ BX, $0x04
  13647. JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_3
  13648. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_4
  13649. CMPQ BX, $0x08
  13650. JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_5through7
  13651. JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8
  13652. CMPQ BX, $0x10
  13653. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_9through16
  13654. CMPQ BX, $0x20
  13655. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32
  13656. CMPQ BX, $0x40
  13657. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64
  13658. CMPQ BX, $0x80
  13659. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_65through128
  13660. CMPQ BX, $0x00000100
  13661. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_129through256
  13662. JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_256through2048
  13663. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_1or2:
  13664. MOVB (CX), BP
  13665. MOVB -1(CX)(BX*1), CL
  13666. MOVB BP, (AX)
  13667. MOVB CL, -1(AX)(BX*1)
  13668. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
  13669. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_4:
  13670. MOVL (CX), BP
  13671. MOVL BP, (AX)
  13672. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
  13673. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_3:
  13674. MOVW (CX), BP
  13675. MOVB 2(CX), CL
  13676. MOVW BP, (AX)
  13677. MOVB CL, 2(AX)
  13678. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
  13679. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_5through7:
  13680. MOVL (CX), BP
  13681. MOVL -4(CX)(BX*1), CX
  13682. MOVL BP, (AX)
  13683. MOVL CX, -4(AX)(BX*1)
  13684. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
  13685. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8:
  13686. MOVQ (CX), BP
  13687. MOVQ BP, (AX)
  13688. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
  13689. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_9through16:
  13690. MOVQ (CX), BP
  13691. MOVQ -8(CX)(BX*1), CX
  13692. MOVQ BP, (AX)
  13693. MOVQ CX, -8(AX)(BX*1)
  13694. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
  13695. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32:
  13696. MOVOU (CX), X0
  13697. MOVOU -16(CX)(BX*1), X1
  13698. MOVOU X0, (AX)
  13699. MOVOU X1, -16(AX)(BX*1)
  13700. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
  13701. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64:
  13702. MOVOU (CX), X0
  13703. MOVOU 16(CX), X1
  13704. MOVOU -32(CX)(BX*1), X2
  13705. MOVOU -16(CX)(BX*1), X3
  13706. MOVOU X0, (AX)
  13707. MOVOU X1, 16(AX)
  13708. MOVOU X2, -32(AX)(BX*1)
  13709. MOVOU X3, -16(AX)(BX*1)
  13710. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
  13711. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_65through128:
  13712. MOVOU (CX), X0
  13713. MOVOU 16(CX), X1
  13714. MOVOU 32(CX), X2
  13715. MOVOU 48(CX), X3
  13716. MOVOU -64(CX)(BX*1), X12
  13717. MOVOU -48(CX)(BX*1), X13
  13718. MOVOU -32(CX)(BX*1), X14
  13719. MOVOU -16(CX)(BX*1), X15
  13720. MOVOU X0, (AX)
  13721. MOVOU X1, 16(AX)
  13722. MOVOU X2, 32(AX)
  13723. MOVOU X3, 48(AX)
  13724. MOVOU X12, -64(AX)(BX*1)
  13725. MOVOU X13, -48(AX)(BX*1)
  13726. MOVOU X14, -32(AX)(BX*1)
  13727. MOVOU X15, -16(AX)(BX*1)
  13728. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
  13729. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_129through256:
  13730. MOVOU (CX), X0
  13731. MOVOU 16(CX), X1
  13732. MOVOU 32(CX), X2
  13733. MOVOU 48(CX), X3
  13734. MOVOU 64(CX), X4
  13735. MOVOU 80(CX), X5
  13736. MOVOU 96(CX), X6
  13737. MOVOU 112(CX), X7
  13738. MOVOU -128(CX)(BX*1), X8
  13739. MOVOU -112(CX)(BX*1), X9
  13740. MOVOU -96(CX)(BX*1), X10
  13741. MOVOU -80(CX)(BX*1), X11
  13742. MOVOU -64(CX)(BX*1), X12
  13743. MOVOU -48(CX)(BX*1), X13
  13744. MOVOU -32(CX)(BX*1), X14
  13745. MOVOU -16(CX)(BX*1), X15
  13746. MOVOU X0, (AX)
  13747. MOVOU X1, 16(AX)
  13748. MOVOU X2, 32(AX)
  13749. MOVOU X3, 48(AX)
  13750. MOVOU X4, 64(AX)
  13751. MOVOU X5, 80(AX)
  13752. MOVOU X6, 96(AX)
  13753. MOVOU X7, 112(AX)
  13754. MOVOU X8, -128(AX)(BX*1)
  13755. MOVOU X9, -112(AX)(BX*1)
  13756. MOVOU X10, -96(AX)(BX*1)
  13757. MOVOU X11, -80(AX)(BX*1)
  13758. MOVOU X12, -64(AX)(BX*1)
  13759. MOVOU X13, -48(AX)(BX*1)
  13760. MOVOU X14, -32(AX)(BX*1)
  13761. MOVOU X15, -16(AX)(BX*1)
  13762. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
  13763. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_256through2048:
  13764. LEAQ -256(BX), BX
  13765. MOVOU (CX), X0
  13766. MOVOU 16(CX), X1
  13767. MOVOU 32(CX), X2
  13768. MOVOU 48(CX), X3
  13769. MOVOU 64(CX), X4
  13770. MOVOU 80(CX), X5
  13771. MOVOU 96(CX), X6
  13772. MOVOU 112(CX), X7
  13773. MOVOU 128(CX), X8
  13774. MOVOU 144(CX), X9
  13775. MOVOU 160(CX), X10
  13776. MOVOU 176(CX), X11
  13777. MOVOU 192(CX), X12
  13778. MOVOU 208(CX), X13
  13779. MOVOU 224(CX), X14
  13780. MOVOU 240(CX), X15
  13781. MOVOU X0, (AX)
  13782. MOVOU X1, 16(AX)
  13783. MOVOU X2, 32(AX)
  13784. MOVOU X3, 48(AX)
  13785. MOVOU X4, 64(AX)
  13786. MOVOU X5, 80(AX)
  13787. MOVOU X6, 96(AX)
  13788. MOVOU X7, 112(AX)
  13789. MOVOU X8, 128(AX)
  13790. MOVOU X9, 144(AX)
  13791. MOVOU X10, 160(AX)
  13792. MOVOU X11, 176(AX)
  13793. MOVOU X12, 192(AX)
  13794. MOVOU X13, 208(AX)
  13795. MOVOU X14, 224(AX)
  13796. MOVOU X15, 240(AX)
  13797. CMPQ BX, $0x00000100
  13798. LEAQ 256(CX), CX
  13799. LEAQ 256(AX), AX
  13800. JGE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_256through2048
  13801. JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_tail
  13802. memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B:
  13803. MOVQ DX, AX
  13804. emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B:
  13805. MOVQ dst_base+0(FP), CX
  13806. SUBQ CX, AX
  13807. MOVQ AX, ret+48(FP)
  13808. RET
  13809. // func encodeSnappyBlockAsm8B(dst []byte, src []byte) int
  13810. // Requires: SSE2
  13811. TEXT ·encodeSnappyBlockAsm8B(SB), $1048-56
  13812. MOVQ dst_base+0(FP), AX
  13813. MOVQ $0x00000008, CX
  13814. LEAQ 24(SP), DX
  13815. PXOR X0, X0
  13816. zero_loop_encodeSnappyBlockAsm8B:
  13817. MOVOU X0, (DX)
  13818. MOVOU X0, 16(DX)
  13819. MOVOU X0, 32(DX)
  13820. MOVOU X0, 48(DX)
  13821. MOVOU X0, 64(DX)
  13822. MOVOU X0, 80(DX)
  13823. MOVOU X0, 96(DX)
  13824. MOVOU X0, 112(DX)
  13825. ADDQ $0x80, DX
  13826. DECQ CX
  13827. JNZ zero_loop_encodeSnappyBlockAsm8B
  13828. MOVL $0x00000000, 12(SP)
  13829. MOVQ src_len+32(FP), CX
  13830. LEAQ -5(CX), DX
  13831. LEAQ -8(CX), BP
  13832. MOVL BP, 8(SP)
  13833. SHRQ $0x05, CX
  13834. SUBL CX, DX
  13835. LEAQ (AX)(DX*1), DX
  13836. MOVQ DX, (SP)
  13837. MOVL $0x00000001, CX
  13838. MOVL CX, 16(SP)
  13839. MOVQ src_base+24(FP), DX
  13840. search_loop_encodeSnappyBlockAsm8B:
  13841. MOVQ (DX)(CX*1), SI
  13842. MOVL CX, BP
  13843. SUBL 12(SP), BP
  13844. SHRL $0x04, BP
  13845. LEAL 4(CX)(BP*1), BP
  13846. MOVL 8(SP), DI
  13847. CMPL BP, DI
  13848. JGT emit_remainder_encodeSnappyBlockAsm8B
  13849. MOVL BP, 20(SP)
  13850. MOVQ $0x9e3779b1, R8
  13851. MOVQ SI, R9
  13852. MOVQ SI, R10
  13853. SHRQ $0x08, R10
  13854. SHLQ $0x20, R9
  13855. IMULQ R8, R9
  13856. SHRQ $0x38, R9
  13857. SHLQ $0x20, R10
  13858. IMULQ R8, R10
  13859. SHRQ $0x38, R10
  13860. MOVL 24(SP)(R9*4), BP
  13861. MOVL 24(SP)(R10*4), DI
  13862. MOVL CX, 24(SP)(R9*4)
  13863. LEAL 1(CX), R9
  13864. MOVL R9, 24(SP)(R10*4)
  13865. MOVQ SI, R9
  13866. SHRQ $0x10, R9
  13867. SHLQ $0x20, R9
  13868. IMULQ R8, R9
  13869. SHRQ $0x38, R9
  13870. MOVL CX, R8
  13871. SUBL 16(SP), R8
  13872. MOVL 1(DX)(R8*1), R10
  13873. MOVQ SI, R8
  13874. SHRQ $0x08, R8
  13875. CMPL R8, R10
  13876. JNE no_repeat_found_encodeSnappyBlockAsm8B
  13877. LEAL 1(CX), SI
  13878. MOVL 12(SP), BP
  13879. MOVL SI, DI
  13880. SUBL 16(SP), DI
  13881. JZ repeat_extend_back_end_encodeSnappyBlockAsm8B
  13882. repeat_extend_back_loop_encodeSnappyBlockAsm8B:
  13883. CMPL SI, BP
  13884. JLE repeat_extend_back_end_encodeSnappyBlockAsm8B
  13885. MOVB -1(DX)(DI*1), BL
  13886. MOVB -1(DX)(SI*1), R8
  13887. CMPB BL, R8
  13888. JNE repeat_extend_back_end_encodeSnappyBlockAsm8B
  13889. LEAL -1(SI), SI
  13890. DECL DI
  13891. JNZ repeat_extend_back_loop_encodeSnappyBlockAsm8B
  13892. repeat_extend_back_end_encodeSnappyBlockAsm8B:
  13893. MOVL 12(SP), BP
  13894. CMPL BP, SI
  13895. JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B
  13896. MOVL SI, DI
  13897. MOVL SI, 12(SP)
  13898. LEAQ (DX)(BP*1), R8
  13899. SUBL BP, DI
  13900. MOVL DI, BP
  13901. SUBL $0x01, BP
  13902. JC emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B
  13903. CMPL BP, $0x3c
  13904. JLT one_byte_repeat_emit_encodeSnappyBlockAsm8B
  13905. CMPL BP, $0x00000100
  13906. JLT two_bytes_repeat_emit_encodeSnappyBlockAsm8B
  13907. CMPL BP, $0x00010000
  13908. JLT three_bytes_repeat_emit_encodeSnappyBlockAsm8B
  13909. CMPL BP, $0x01000000
  13910. JLT four_bytes_repeat_emit_encodeSnappyBlockAsm8B
  13911. MOVB $0xfc, (AX)
  13912. MOVL BP, 1(AX)
  13913. ADDQ $0x05, AX
  13914. JMP memmove_repeat_emit_encodeSnappyBlockAsm8B
  13915. four_bytes_repeat_emit_encodeSnappyBlockAsm8B:
  13916. MOVL BP, R9
  13917. SHRL $0x10, R9
  13918. MOVB $0xf8, (AX)
  13919. MOVW BP, 1(AX)
  13920. MOVB R9, 3(AX)
  13921. ADDQ $0x04, AX
  13922. JMP memmove_repeat_emit_encodeSnappyBlockAsm8B
  13923. three_bytes_repeat_emit_encodeSnappyBlockAsm8B:
  13924. MOVB $0xf4, (AX)
  13925. MOVW BP, 1(AX)
  13926. ADDQ $0x03, AX
  13927. JMP memmove_repeat_emit_encodeSnappyBlockAsm8B
  13928. two_bytes_repeat_emit_encodeSnappyBlockAsm8B:
  13929. MOVB $0xf0, (AX)
  13930. MOVB BP, 1(AX)
  13931. ADDQ $0x02, AX
  13932. JMP memmove_repeat_emit_encodeSnappyBlockAsm8B
  13933. one_byte_repeat_emit_encodeSnappyBlockAsm8B:
  13934. SHLB $0x02, BP
  13935. MOVB BP, (AX)
  13936. ADDQ $0x01, AX
  13937. memmove_repeat_emit_encodeSnappyBlockAsm8B:
  13938. LEAQ (AX)(DI*1), BP
  13939. NOP
  13940. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_tail:
  13941. TESTQ DI, DI
  13942. JEQ memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
  13943. CMPQ DI, $0x02
  13944. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_1or2
  13945. CMPQ DI, $0x04
  13946. JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_3
  13947. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_4
  13948. CMPQ DI, $0x08
  13949. JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_5through7
  13950. JE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8
  13951. CMPQ DI, $0x10
  13952. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_9through16
  13953. CMPQ DI, $0x20
  13954. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32
  13955. CMPQ DI, $0x40
  13956. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64
  13957. CMPQ DI, $0x80
  13958. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_65through128
  13959. CMPQ DI, $0x00000100
  13960. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_129through256
  13961. JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_256through2048
  13962. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_1or2:
  13963. MOVB (R8), R9
  13964. MOVB -1(R8)(DI*1), R8
  13965. MOVB R9, (AX)
  13966. MOVB R8, -1(AX)(DI*1)
  13967. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
  13968. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_4:
  13969. MOVL (R8), R9
  13970. MOVL R9, (AX)
  13971. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
  13972. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_3:
  13973. MOVW (R8), R9
  13974. MOVB 2(R8), R8
  13975. MOVW R9, (AX)
  13976. MOVB R8, 2(AX)
  13977. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
  13978. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_5through7:
  13979. MOVL (R8), R9
  13980. MOVL -4(R8)(DI*1), R8
  13981. MOVL R9, (AX)
  13982. MOVL R8, -4(AX)(DI*1)
  13983. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
  13984. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8:
  13985. MOVQ (R8), R9
  13986. MOVQ R9, (AX)
  13987. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
  13988. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_9through16:
  13989. MOVQ (R8), R9
  13990. MOVQ -8(R8)(DI*1), R8
  13991. MOVQ R9, (AX)
  13992. MOVQ R8, -8(AX)(DI*1)
  13993. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
  13994. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32:
  13995. MOVOU (R8), X0
  13996. MOVOU -16(R8)(DI*1), X1
  13997. MOVOU X0, (AX)
  13998. MOVOU X1, -16(AX)(DI*1)
  13999. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
  14000. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64:
  14001. MOVOU (R8), X0
  14002. MOVOU 16(R8), X1
  14003. MOVOU -32(R8)(DI*1), X2
  14004. MOVOU -16(R8)(DI*1), X3
  14005. MOVOU X0, (AX)
  14006. MOVOU X1, 16(AX)
  14007. MOVOU X2, -32(AX)(DI*1)
  14008. MOVOU X3, -16(AX)(DI*1)
  14009. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
  14010. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_65through128:
  14011. MOVOU (R8), X0
  14012. MOVOU 16(R8), X1
  14013. MOVOU 32(R8), X2
  14014. MOVOU 48(R8), X3
  14015. MOVOU -64(R8)(DI*1), X12
  14016. MOVOU -48(R8)(DI*1), X13
  14017. MOVOU -32(R8)(DI*1), X14
  14018. MOVOU -16(R8)(DI*1), X15
  14019. MOVOU X0, (AX)
  14020. MOVOU X1, 16(AX)
  14021. MOVOU X2, 32(AX)
  14022. MOVOU X3, 48(AX)
  14023. MOVOU X12, -64(AX)(DI*1)
  14024. MOVOU X13, -48(AX)(DI*1)
  14025. MOVOU X14, -32(AX)(DI*1)
  14026. MOVOU X15, -16(AX)(DI*1)
  14027. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
  14028. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_129through256:
  14029. MOVOU (R8), X0
  14030. MOVOU 16(R8), X1
  14031. MOVOU 32(R8), X2
  14032. MOVOU 48(R8), X3
  14033. MOVOU 64(R8), X4
  14034. MOVOU 80(R8), X5
  14035. MOVOU 96(R8), X6
  14036. MOVOU 112(R8), X7
  14037. MOVOU -128(R8)(DI*1), X8
  14038. MOVOU -112(R8)(DI*1), X9
  14039. MOVOU -96(R8)(DI*1), X10
  14040. MOVOU -80(R8)(DI*1), X11
  14041. MOVOU -64(R8)(DI*1), X12
  14042. MOVOU -48(R8)(DI*1), X13
  14043. MOVOU -32(R8)(DI*1), X14
  14044. MOVOU -16(R8)(DI*1), X15
  14045. MOVOU X0, (AX)
  14046. MOVOU X1, 16(AX)
  14047. MOVOU X2, 32(AX)
  14048. MOVOU X3, 48(AX)
  14049. MOVOU X4, 64(AX)
  14050. MOVOU X5, 80(AX)
  14051. MOVOU X6, 96(AX)
  14052. MOVOU X7, 112(AX)
  14053. MOVOU X8, -128(AX)(DI*1)
  14054. MOVOU X9, -112(AX)(DI*1)
  14055. MOVOU X10, -96(AX)(DI*1)
  14056. MOVOU X11, -80(AX)(DI*1)
  14057. MOVOU X12, -64(AX)(DI*1)
  14058. MOVOU X13, -48(AX)(DI*1)
  14059. MOVOU X14, -32(AX)(DI*1)
  14060. MOVOU X15, -16(AX)(DI*1)
  14061. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
  14062. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_256through2048:
  14063. LEAQ -256(DI), DI
  14064. MOVOU (R8), X0
  14065. MOVOU 16(R8), X1
  14066. MOVOU 32(R8), X2
  14067. MOVOU 48(R8), X3
  14068. MOVOU 64(R8), X4
  14069. MOVOU 80(R8), X5
  14070. MOVOU 96(R8), X6
  14071. MOVOU 112(R8), X7
  14072. MOVOU 128(R8), X8
  14073. MOVOU 144(R8), X9
  14074. MOVOU 160(R8), X10
  14075. MOVOU 176(R8), X11
  14076. MOVOU 192(R8), X12
  14077. MOVOU 208(R8), X13
  14078. MOVOU 224(R8), X14
  14079. MOVOU 240(R8), X15
  14080. MOVOU X0, (AX)
  14081. MOVOU X1, 16(AX)
  14082. MOVOU X2, 32(AX)
  14083. MOVOU X3, 48(AX)
  14084. MOVOU X4, 64(AX)
  14085. MOVOU X5, 80(AX)
  14086. MOVOU X6, 96(AX)
  14087. MOVOU X7, 112(AX)
  14088. MOVOU X8, 128(AX)
  14089. MOVOU X9, 144(AX)
  14090. MOVOU X10, 160(AX)
  14091. MOVOU X11, 176(AX)
  14092. MOVOU X12, 192(AX)
  14093. MOVOU X13, 208(AX)
  14094. MOVOU X14, 224(AX)
  14095. MOVOU X15, 240(AX)
  14096. CMPQ DI, $0x00000100
  14097. LEAQ 256(R8), R8
  14098. LEAQ 256(AX), AX
  14099. JGE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_256through2048
  14100. JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_tail
  14101. memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B:
  14102. MOVQ BP, AX
  14103. emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B:
  14104. ADDL $0x05, CX
  14105. MOVL CX, BP
  14106. SUBL 16(SP), BP
  14107. MOVQ src_len+32(FP), DI
  14108. SUBL CX, DI
  14109. LEAQ (DX)(CX*1), R8
  14110. LEAQ (DX)(BP*1), BP
  14111. XORL R10, R10
  14112. CMPL DI, $0x08
  14113. JL matchlen_single_repeat_extend
  14114. matchlen_loopback_repeat_extend:
  14115. MOVQ (R8)(R10*1), R9
  14116. XORQ (BP)(R10*1), R9
  14117. TESTQ R9, R9
  14118. JZ matchlen_loop_repeat_extend
  14119. BSFQ R9, R9
  14120. SARQ $0x03, R9
  14121. LEAL (R10)(R9*1), R10
  14122. JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B
  14123. matchlen_loop_repeat_extend:
  14124. LEAL -8(DI), DI
  14125. LEAL 8(R10), R10
  14126. CMPL DI, $0x08
  14127. JGE matchlen_loopback_repeat_extend
  14128. matchlen_single_repeat_extend:
  14129. TESTL DI, DI
  14130. JZ repeat_extend_forward_end_encodeSnappyBlockAsm8B
  14131. matchlen_single_loopback_repeat_extend:
  14132. MOVB (R8)(R10*1), R9
  14133. CMPB (BP)(R10*1), R9
  14134. JNE repeat_extend_forward_end_encodeSnappyBlockAsm8B
  14135. LEAL 1(R10), R10
  14136. DECL DI
  14137. JNZ matchlen_single_loopback_repeat_extend
  14138. repeat_extend_forward_end_encodeSnappyBlockAsm8B:
  14139. ADDL R10, CX
  14140. MOVL CX, BP
  14141. SUBL SI, BP
  14142. MOVL 16(SP), SI
  14143. CMPL SI, $0x00010000
  14144. JL two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B
  14145. four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm8B:
  14146. CMPL BP, $0x40
  14147. JLE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm8B
  14148. MOVB $0xff, (AX)
  14149. MOVL SI, 1(AX)
  14150. LEAL -64(BP), BP
  14151. ADDQ $0x05, AX
  14152. CMPL BP, $0x04
  14153. JL four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm8B
  14154. JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm8B
  14155. four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm8B:
  14156. TESTL BP, BP
  14157. JZ repeat_end_emit_encodeSnappyBlockAsm8B
  14158. MOVB $0x03, BL
  14159. LEAL -4(BX)(BP*4), BP
  14160. MOVB BP, (AX)
  14161. MOVL SI, 1(AX)
  14162. ADDQ $0x05, AX
  14163. JMP repeat_end_emit_encodeSnappyBlockAsm8B
  14164. two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B:
  14165. CMPL BP, $0x40
  14166. JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B
  14167. MOVB $0xee, (AX)
  14168. MOVW SI, 1(AX)
  14169. LEAL -60(BP), BP
  14170. ADDQ $0x03, AX
  14171. JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B
  14172. two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B:
  14173. CMPL BP, $0x0c
  14174. JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B
  14175. CMPL SI, $0x00000800
  14176. JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B
  14177. MOVB $0x01, BL
  14178. LEAL -16(BX)(BP*4), BP
  14179. MOVB SI, 1(AX)
  14180. SHRL $0x08, SI
  14181. SHLL $0x05, SI
  14182. ORL SI, BP
  14183. MOVB BP, (AX)
  14184. ADDQ $0x02, AX
  14185. JMP repeat_end_emit_encodeSnappyBlockAsm8B
  14186. emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B:
  14187. MOVB $0x02, BL
  14188. LEAL -4(BX)(BP*4), BP
  14189. MOVB BP, (AX)
  14190. MOVW SI, 1(AX)
  14191. ADDQ $0x03, AX
  14192. repeat_end_emit_encodeSnappyBlockAsm8B:
  14193. MOVL CX, 12(SP)
  14194. CMPL CX, 8(SP)
  14195. JGE emit_remainder_encodeSnappyBlockAsm8B
  14196. JMP search_loop_encodeSnappyBlockAsm8B
  14197. no_repeat_found_encodeSnappyBlockAsm8B:
  14198. CMPL (DX)(BP*1), SI
  14199. JEQ candidate_match_encodeSnappyBlockAsm8B
  14200. SHRQ $0x08, SI
  14201. MOVL 24(SP)(R9*4), BP
  14202. LEAL 2(CX), R8
  14203. CMPL (DX)(DI*1), SI
  14204. JEQ candidate2_match_encodeSnappyBlockAsm8B
  14205. MOVL R8, 24(SP)(R9*4)
  14206. SHRQ $0x08, SI
  14207. CMPL (DX)(BP*1), SI
  14208. JEQ candidate3_match_encodeSnappyBlockAsm8B
  14209. MOVL 20(SP), CX
  14210. JMP search_loop_encodeSnappyBlockAsm8B
  14211. candidate3_match_encodeSnappyBlockAsm8B:
  14212. ADDL $0x02, CX
  14213. JMP candidate_match_encodeSnappyBlockAsm8B
  14214. candidate2_match_encodeSnappyBlockAsm8B:
  14215. MOVL R8, 24(SP)(R9*4)
  14216. INCL CX
  14217. MOVL DI, BP
  14218. candidate_match_encodeSnappyBlockAsm8B:
  14219. MOVL 12(SP), SI
  14220. TESTL BP, BP
  14221. JZ match_extend_back_end_encodeSnappyBlockAsm8B
  14222. match_extend_back_loop_encodeSnappyBlockAsm8B:
  14223. CMPL CX, SI
  14224. JLE match_extend_back_end_encodeSnappyBlockAsm8B
  14225. MOVB -1(DX)(BP*1), BL
  14226. MOVB -1(DX)(CX*1), DI
  14227. CMPB BL, DI
  14228. JNE match_extend_back_end_encodeSnappyBlockAsm8B
  14229. LEAL -1(CX), CX
  14230. DECL BP
  14231. JZ match_extend_back_end_encodeSnappyBlockAsm8B
  14232. JMP match_extend_back_loop_encodeSnappyBlockAsm8B
  14233. match_extend_back_end_encodeSnappyBlockAsm8B:
  14234. MOVL CX, SI
  14235. SUBL 12(SP), SI
  14236. LEAQ 4(AX)(SI*1), SI
  14237. CMPQ SI, (SP)
  14238. JL match_dst_size_check_encodeSnappyBlockAsm8B
  14239. MOVQ $0x00000000, ret+48(FP)
  14240. RET
  14241. match_dst_size_check_encodeSnappyBlockAsm8B:
  14242. MOVL CX, SI
  14243. MOVL 12(SP), DI
  14244. CMPL DI, SI
  14245. JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm8B
  14246. MOVL SI, R8
  14247. MOVL SI, 12(SP)
  14248. LEAQ (DX)(DI*1), SI
  14249. SUBL DI, R8
  14250. MOVL R8, DI
  14251. SUBL $0x01, DI
  14252. JC emit_literal_done_match_emit_encodeSnappyBlockAsm8B
  14253. CMPL DI, $0x3c
  14254. JLT one_byte_match_emit_encodeSnappyBlockAsm8B
  14255. CMPL DI, $0x00000100
  14256. JLT two_bytes_match_emit_encodeSnappyBlockAsm8B
  14257. CMPL DI, $0x00010000
  14258. JLT three_bytes_match_emit_encodeSnappyBlockAsm8B
  14259. CMPL DI, $0x01000000
  14260. JLT four_bytes_match_emit_encodeSnappyBlockAsm8B
  14261. MOVB $0xfc, (AX)
  14262. MOVL DI, 1(AX)
  14263. ADDQ $0x05, AX
  14264. JMP memmove_match_emit_encodeSnappyBlockAsm8B
  14265. four_bytes_match_emit_encodeSnappyBlockAsm8B:
  14266. MOVL DI, R9
  14267. SHRL $0x10, R9
  14268. MOVB $0xf8, (AX)
  14269. MOVW DI, 1(AX)
  14270. MOVB R9, 3(AX)
  14271. ADDQ $0x04, AX
  14272. JMP memmove_match_emit_encodeSnappyBlockAsm8B
  14273. three_bytes_match_emit_encodeSnappyBlockAsm8B:
  14274. MOVB $0xf4, (AX)
  14275. MOVW DI, 1(AX)
  14276. ADDQ $0x03, AX
  14277. JMP memmove_match_emit_encodeSnappyBlockAsm8B
  14278. two_bytes_match_emit_encodeSnappyBlockAsm8B:
  14279. MOVB $0xf0, (AX)
  14280. MOVB DI, 1(AX)
  14281. ADDQ $0x02, AX
  14282. JMP memmove_match_emit_encodeSnappyBlockAsm8B
  14283. one_byte_match_emit_encodeSnappyBlockAsm8B:
  14284. SHLB $0x02, DI
  14285. MOVB DI, (AX)
  14286. ADDQ $0x01, AX
  14287. memmove_match_emit_encodeSnappyBlockAsm8B:
  14288. LEAQ (AX)(R8*1), DI
  14289. NOP
  14290. emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_tail:
  14291. TESTQ R8, R8
  14292. JEQ memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
  14293. CMPQ R8, $0x02
  14294. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_1or2
  14295. CMPQ R8, $0x04
  14296. JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_3
  14297. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_4
  14298. CMPQ R8, $0x08
  14299. JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_5through7
  14300. JE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8
  14301. CMPQ R8, $0x10
  14302. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_9through16
  14303. CMPQ R8, $0x20
  14304. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32
  14305. CMPQ R8, $0x40
  14306. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64
  14307. CMPQ R8, $0x80
  14308. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_65through128
  14309. CMPQ R8, $0x00000100
  14310. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_129through256
  14311. JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_256through2048
  14312. emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_1or2:
  14313. MOVB (SI), R9
  14314. MOVB -1(SI)(R8*1), SI
  14315. MOVB R9, (AX)
  14316. MOVB SI, -1(AX)(R8*1)
  14317. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
  14318. emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_4:
  14319. MOVL (SI), R9
  14320. MOVL R9, (AX)
  14321. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
  14322. emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_3:
  14323. MOVW (SI), R9
  14324. MOVB 2(SI), SI
  14325. MOVW R9, (AX)
  14326. MOVB SI, 2(AX)
  14327. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
  14328. emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_5through7:
  14329. MOVL (SI), R9
  14330. MOVL -4(SI)(R8*1), SI
  14331. MOVL R9, (AX)
  14332. MOVL SI, -4(AX)(R8*1)
  14333. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
  14334. emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8:
  14335. MOVQ (SI), R9
  14336. MOVQ R9, (AX)
  14337. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
  14338. emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_9through16:
  14339. MOVQ (SI), R9
  14340. MOVQ -8(SI)(R8*1), SI
  14341. MOVQ R9, (AX)
  14342. MOVQ SI, -8(AX)(R8*1)
  14343. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
  14344. emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32:
  14345. MOVOU (SI), X0
  14346. MOVOU -16(SI)(R8*1), X1
  14347. MOVOU X0, (AX)
  14348. MOVOU X1, -16(AX)(R8*1)
  14349. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
  14350. emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64:
  14351. MOVOU (SI), X0
  14352. MOVOU 16(SI), X1
  14353. MOVOU -32(SI)(R8*1), X2
  14354. MOVOU -16(SI)(R8*1), X3
  14355. MOVOU X0, (AX)
  14356. MOVOU X1, 16(AX)
  14357. MOVOU X2, -32(AX)(R8*1)
  14358. MOVOU X3, -16(AX)(R8*1)
  14359. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
  14360. emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_65through128:
  14361. MOVOU (SI), X0
  14362. MOVOU 16(SI), X1
  14363. MOVOU 32(SI), X2
  14364. MOVOU 48(SI), X3
  14365. MOVOU -64(SI)(R8*1), X12
  14366. MOVOU -48(SI)(R8*1), X13
  14367. MOVOU -32(SI)(R8*1), X14
  14368. MOVOU -16(SI)(R8*1), X15
  14369. MOVOU X0, (AX)
  14370. MOVOU X1, 16(AX)
  14371. MOVOU X2, 32(AX)
  14372. MOVOU X3, 48(AX)
  14373. MOVOU X12, -64(AX)(R8*1)
  14374. MOVOU X13, -48(AX)(R8*1)
  14375. MOVOU X14, -32(AX)(R8*1)
  14376. MOVOU X15, -16(AX)(R8*1)
  14377. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
  14378. emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_129through256:
  14379. MOVOU (SI), X0
  14380. MOVOU 16(SI), X1
  14381. MOVOU 32(SI), X2
  14382. MOVOU 48(SI), X3
  14383. MOVOU 64(SI), X4
  14384. MOVOU 80(SI), X5
  14385. MOVOU 96(SI), X6
  14386. MOVOU 112(SI), X7
  14387. MOVOU -128(SI)(R8*1), X8
  14388. MOVOU -112(SI)(R8*1), X9
  14389. MOVOU -96(SI)(R8*1), X10
  14390. MOVOU -80(SI)(R8*1), X11
  14391. MOVOU -64(SI)(R8*1), X12
  14392. MOVOU -48(SI)(R8*1), X13
  14393. MOVOU -32(SI)(R8*1), X14
  14394. MOVOU -16(SI)(R8*1), X15
  14395. MOVOU X0, (AX)
  14396. MOVOU X1, 16(AX)
  14397. MOVOU X2, 32(AX)
  14398. MOVOU X3, 48(AX)
  14399. MOVOU X4, 64(AX)
  14400. MOVOU X5, 80(AX)
  14401. MOVOU X6, 96(AX)
  14402. MOVOU X7, 112(AX)
  14403. MOVOU X8, -128(AX)(R8*1)
  14404. MOVOU X9, -112(AX)(R8*1)
  14405. MOVOU X10, -96(AX)(R8*1)
  14406. MOVOU X11, -80(AX)(R8*1)
  14407. MOVOU X12, -64(AX)(R8*1)
  14408. MOVOU X13, -48(AX)(R8*1)
  14409. MOVOU X14, -32(AX)(R8*1)
  14410. MOVOU X15, -16(AX)(R8*1)
  14411. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
  14412. emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_256through2048:
  14413. LEAQ -256(R8), R8
  14414. MOVOU (SI), X0
  14415. MOVOU 16(SI), X1
  14416. MOVOU 32(SI), X2
  14417. MOVOU 48(SI), X3
  14418. MOVOU 64(SI), X4
  14419. MOVOU 80(SI), X5
  14420. MOVOU 96(SI), X6
  14421. MOVOU 112(SI), X7
  14422. MOVOU 128(SI), X8
  14423. MOVOU 144(SI), X9
  14424. MOVOU 160(SI), X10
  14425. MOVOU 176(SI), X11
  14426. MOVOU 192(SI), X12
  14427. MOVOU 208(SI), X13
  14428. MOVOU 224(SI), X14
  14429. MOVOU 240(SI), X15
  14430. MOVOU X0, (AX)
  14431. MOVOU X1, 16(AX)
  14432. MOVOU X2, 32(AX)
  14433. MOVOU X3, 48(AX)
  14434. MOVOU X4, 64(AX)
  14435. MOVOU X5, 80(AX)
  14436. MOVOU X6, 96(AX)
  14437. MOVOU X7, 112(AX)
  14438. MOVOU X8, 128(AX)
  14439. MOVOU X9, 144(AX)
  14440. MOVOU X10, 160(AX)
  14441. MOVOU X11, 176(AX)
  14442. MOVOU X12, 192(AX)
  14443. MOVOU X13, 208(AX)
  14444. MOVOU X14, 224(AX)
  14445. MOVOU X15, 240(AX)
  14446. CMPQ R8, $0x00000100
  14447. LEAQ 256(SI), SI
  14448. LEAQ 256(AX), AX
  14449. JGE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_256through2048
  14450. JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_tail
  14451. memmove_end_copy_match_emit_encodeSnappyBlockAsm8B:
  14452. MOVQ DI, AX
  14453. emit_literal_done_match_emit_encodeSnappyBlockAsm8B:
  14454. match_nolit_loop_encodeSnappyBlockAsm8B:
  14455. MOVL CX, SI
  14456. SUBL BP, SI
  14457. MOVL SI, 16(SP)
  14458. ADDL $0x04, CX
  14459. ADDL $0x04, BP
  14460. MOVQ src_len+32(FP), SI
  14461. SUBL CX, SI
  14462. LEAQ (DX)(CX*1), DI
  14463. LEAQ (DX)(BP*1), BP
  14464. XORL R9, R9
  14465. CMPL SI, $0x08
  14466. JL matchlen_single_match_nolit_encodeSnappyBlockAsm8B
  14467. matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B:
  14468. MOVQ (DI)(R9*1), R8
  14469. XORQ (BP)(R9*1), R8
  14470. TESTQ R8, R8
  14471. JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm8B
  14472. BSFQ R8, R8
  14473. SARQ $0x03, R8
  14474. LEAL (R9)(R8*1), R9
  14475. JMP match_nolit_end_encodeSnappyBlockAsm8B
  14476. matchlen_loop_match_nolit_encodeSnappyBlockAsm8B:
  14477. LEAL -8(SI), SI
  14478. LEAL 8(R9), R9
  14479. CMPL SI, $0x08
  14480. JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B
  14481. matchlen_single_match_nolit_encodeSnappyBlockAsm8B:
  14482. TESTL SI, SI
  14483. JZ match_nolit_end_encodeSnappyBlockAsm8B
  14484. matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm8B:
  14485. MOVB (DI)(R9*1), R8
  14486. CMPB (BP)(R9*1), R8
  14487. JNE match_nolit_end_encodeSnappyBlockAsm8B
  14488. LEAL 1(R9), R9
  14489. DECL SI
  14490. JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm8B
  14491. match_nolit_end_encodeSnappyBlockAsm8B:
  14492. ADDL R9, CX
  14493. MOVL 16(SP), BP
  14494. ADDL $0x04, R9
  14495. CMPL BP, $0x00010000
  14496. JL two_byte_offset_match_nolit_encodeSnappyBlockAsm8B
  14497. four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm8B:
  14498. CMPL R9, $0x40
  14499. JLE four_bytes_remain_match_nolit_encodeSnappyBlockAsm8B
  14500. MOVB $0xff, (AX)
  14501. MOVL BP, 1(AX)
  14502. LEAL -64(R9), R9
  14503. ADDQ $0x05, AX
  14504. CMPL R9, $0x04
  14505. JL four_bytes_remain_match_nolit_encodeSnappyBlockAsm8B
  14506. JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm8B
  14507. four_bytes_remain_match_nolit_encodeSnappyBlockAsm8B:
  14508. TESTL R9, R9
  14509. JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm8B
  14510. MOVB $0x03, BL
  14511. LEAL -4(BX)(R9*4), R9
  14512. MOVB R9, (AX)
  14513. MOVL BP, 1(AX)
  14514. ADDQ $0x05, AX
  14515. JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm8B
  14516. two_byte_offset_match_nolit_encodeSnappyBlockAsm8B:
  14517. CMPL R9, $0x40
  14518. JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B
  14519. MOVB $0xee, (AX)
  14520. MOVW BP, 1(AX)
  14521. LEAL -60(R9), R9
  14522. ADDQ $0x03, AX
  14523. JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm8B
  14524. two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B:
  14525. CMPL R9, $0x0c
  14526. JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm8B
  14527. CMPL BP, $0x00000800
  14528. JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm8B
  14529. MOVB $0x01, BL
  14530. LEAL -16(BX)(R9*4), R9
  14531. MOVB BP, 1(AX)
  14532. SHRL $0x08, BP
  14533. SHLL $0x05, BP
  14534. ORL BP, R9
  14535. MOVB R9, (AX)
  14536. ADDQ $0x02, AX
  14537. JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm8B
  14538. emit_copy_three_match_nolit_encodeSnappyBlockAsm8B:
  14539. MOVB $0x02, BL
  14540. LEAL -4(BX)(R9*4), R9
  14541. MOVB R9, (AX)
  14542. MOVW BP, 1(AX)
  14543. ADDQ $0x03, AX
  14544. match_nolit_emitcopy_end_encodeSnappyBlockAsm8B:
  14545. MOVL CX, 12(SP)
  14546. CMPL CX, 8(SP)
  14547. JGE emit_remainder_encodeSnappyBlockAsm8B
  14548. CMPQ AX, (SP)
  14549. JL match_nolit_dst_ok_encodeSnappyBlockAsm8B
  14550. MOVQ $0x00000000, ret+48(FP)
  14551. RET
  14552. match_nolit_dst_ok_encodeSnappyBlockAsm8B:
  14553. MOVQ -2(DX)(CX*1), SI
  14554. MOVQ $0x9e3779b1, BP
  14555. MOVQ SI, DI
  14556. SHRQ $0x10, SI
  14557. MOVQ SI, R8
  14558. SHLQ $0x20, DI
  14559. IMULQ BP, DI
  14560. SHRQ $0x38, DI
  14561. SHLQ $0x20, R8
  14562. IMULQ BP, R8
  14563. SHRQ $0x38, R8
  14564. LEAL -2(CX), R9
  14565. MOVL 24(SP)(R8*4), BP
  14566. MOVL R9, 24(SP)(DI*4)
  14567. MOVL CX, 24(SP)(R8*4)
  14568. CMPL (DX)(BP*1), SI
  14569. JEQ match_nolit_loop_encodeSnappyBlockAsm8B
  14570. INCL CX
  14571. JMP search_loop_encodeSnappyBlockAsm8B
  14572. emit_remainder_encodeSnappyBlockAsm8B:
  14573. MOVQ src_len+32(FP), CX
  14574. SUBL 12(SP), CX
  14575. LEAQ 4(AX)(CX*1), CX
  14576. CMPQ CX, (SP)
  14577. JL emit_remainder_ok_encodeSnappyBlockAsm8B
  14578. MOVQ $0x00000000, ret+48(FP)
  14579. RET
  14580. emit_remainder_ok_encodeSnappyBlockAsm8B:
  14581. MOVQ src_len+32(FP), CX
  14582. MOVL 12(SP), BX
  14583. CMPL BX, CX
  14584. JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B
  14585. MOVL CX, BP
  14586. MOVL CX, 12(SP)
  14587. LEAQ (DX)(BX*1), CX
  14588. SUBL BX, BP
  14589. MOVL BP, DX
  14590. SUBL $0x01, DX
  14591. JC emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B
  14592. CMPL DX, $0x3c
  14593. JLT one_byte_emit_remainder_encodeSnappyBlockAsm8B
  14594. CMPL DX, $0x00000100
  14595. JLT two_bytes_emit_remainder_encodeSnappyBlockAsm8B
  14596. CMPL DX, $0x00010000
  14597. JLT three_bytes_emit_remainder_encodeSnappyBlockAsm8B
  14598. CMPL DX, $0x01000000
  14599. JLT four_bytes_emit_remainder_encodeSnappyBlockAsm8B
  14600. MOVB $0xfc, (AX)
  14601. MOVL DX, 1(AX)
  14602. ADDQ $0x05, AX
  14603. JMP memmove_emit_remainder_encodeSnappyBlockAsm8B
  14604. four_bytes_emit_remainder_encodeSnappyBlockAsm8B:
  14605. MOVL DX, BX
  14606. SHRL $0x10, BX
  14607. MOVB $0xf8, (AX)
  14608. MOVW DX, 1(AX)
  14609. MOVB BL, 3(AX)
  14610. ADDQ $0x04, AX
  14611. JMP memmove_emit_remainder_encodeSnappyBlockAsm8B
  14612. three_bytes_emit_remainder_encodeSnappyBlockAsm8B:
  14613. MOVB $0xf4, (AX)
  14614. MOVW DX, 1(AX)
  14615. ADDQ $0x03, AX
  14616. JMP memmove_emit_remainder_encodeSnappyBlockAsm8B
  14617. two_bytes_emit_remainder_encodeSnappyBlockAsm8B:
  14618. MOVB $0xf0, (AX)
  14619. MOVB DL, 1(AX)
  14620. ADDQ $0x02, AX
  14621. JMP memmove_emit_remainder_encodeSnappyBlockAsm8B
  14622. one_byte_emit_remainder_encodeSnappyBlockAsm8B:
  14623. SHLB $0x02, DL
  14624. MOVB DL, (AX)
  14625. ADDQ $0x01, AX
  14626. memmove_emit_remainder_encodeSnappyBlockAsm8B:
  14627. LEAQ (AX)(BP*1), DX
  14628. MOVL BP, BX
  14629. NOP
  14630. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_tail:
  14631. TESTQ BX, BX
  14632. JEQ memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
  14633. CMPQ BX, $0x02
  14634. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_1or2
  14635. CMPQ BX, $0x04
  14636. JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_3
  14637. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_4
  14638. CMPQ BX, $0x08
  14639. JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_5through7
  14640. JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8
  14641. CMPQ BX, $0x10
  14642. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_9through16
  14643. CMPQ BX, $0x20
  14644. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32
  14645. CMPQ BX, $0x40
  14646. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64
  14647. CMPQ BX, $0x80
  14648. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_65through128
  14649. CMPQ BX, $0x00000100
  14650. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_129through256
  14651. JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_256through2048
  14652. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_1or2:
  14653. MOVB (CX), BP
  14654. MOVB -1(CX)(BX*1), CL
  14655. MOVB BP, (AX)
  14656. MOVB CL, -1(AX)(BX*1)
  14657. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
  14658. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_4:
  14659. MOVL (CX), BP
  14660. MOVL BP, (AX)
  14661. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
  14662. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_3:
  14663. MOVW (CX), BP
  14664. MOVB 2(CX), CL
  14665. MOVW BP, (AX)
  14666. MOVB CL, 2(AX)
  14667. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
  14668. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_5through7:
  14669. MOVL (CX), BP
  14670. MOVL -4(CX)(BX*1), CX
  14671. MOVL BP, (AX)
  14672. MOVL CX, -4(AX)(BX*1)
  14673. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
  14674. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8:
  14675. MOVQ (CX), BP
  14676. MOVQ BP, (AX)
  14677. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
  14678. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_9through16:
  14679. MOVQ (CX), BP
  14680. MOVQ -8(CX)(BX*1), CX
  14681. MOVQ BP, (AX)
  14682. MOVQ CX, -8(AX)(BX*1)
  14683. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
  14684. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32:
  14685. MOVOU (CX), X0
  14686. MOVOU -16(CX)(BX*1), X1
  14687. MOVOU X0, (AX)
  14688. MOVOU X1, -16(AX)(BX*1)
  14689. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
  14690. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64:
  14691. MOVOU (CX), X0
  14692. MOVOU 16(CX), X1
  14693. MOVOU -32(CX)(BX*1), X2
  14694. MOVOU -16(CX)(BX*1), X3
  14695. MOVOU X0, (AX)
  14696. MOVOU X1, 16(AX)
  14697. MOVOU X2, -32(AX)(BX*1)
  14698. MOVOU X3, -16(AX)(BX*1)
  14699. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
  14700. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_65through128:
  14701. MOVOU (CX), X0
  14702. MOVOU 16(CX), X1
  14703. MOVOU 32(CX), X2
  14704. MOVOU 48(CX), X3
  14705. MOVOU -64(CX)(BX*1), X12
  14706. MOVOU -48(CX)(BX*1), X13
  14707. MOVOU -32(CX)(BX*1), X14
  14708. MOVOU -16(CX)(BX*1), X15
  14709. MOVOU X0, (AX)
  14710. MOVOU X1, 16(AX)
  14711. MOVOU X2, 32(AX)
  14712. MOVOU X3, 48(AX)
  14713. MOVOU X12, -64(AX)(BX*1)
  14714. MOVOU X13, -48(AX)(BX*1)
  14715. MOVOU X14, -32(AX)(BX*1)
  14716. MOVOU X15, -16(AX)(BX*1)
  14717. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
  14718. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_129through256:
  14719. MOVOU (CX), X0
  14720. MOVOU 16(CX), X1
  14721. MOVOU 32(CX), X2
  14722. MOVOU 48(CX), X3
  14723. MOVOU 64(CX), X4
  14724. MOVOU 80(CX), X5
  14725. MOVOU 96(CX), X6
  14726. MOVOU 112(CX), X7
  14727. MOVOU -128(CX)(BX*1), X8
  14728. MOVOU -112(CX)(BX*1), X9
  14729. MOVOU -96(CX)(BX*1), X10
  14730. MOVOU -80(CX)(BX*1), X11
  14731. MOVOU -64(CX)(BX*1), X12
  14732. MOVOU -48(CX)(BX*1), X13
  14733. MOVOU -32(CX)(BX*1), X14
  14734. MOVOU -16(CX)(BX*1), X15
  14735. MOVOU X0, (AX)
  14736. MOVOU X1, 16(AX)
  14737. MOVOU X2, 32(AX)
  14738. MOVOU X3, 48(AX)
  14739. MOVOU X4, 64(AX)
  14740. MOVOU X5, 80(AX)
  14741. MOVOU X6, 96(AX)
  14742. MOVOU X7, 112(AX)
  14743. MOVOU X8, -128(AX)(BX*1)
  14744. MOVOU X9, -112(AX)(BX*1)
  14745. MOVOU X10, -96(AX)(BX*1)
  14746. MOVOU X11, -80(AX)(BX*1)
  14747. MOVOU X12, -64(AX)(BX*1)
  14748. MOVOU X13, -48(AX)(BX*1)
  14749. MOVOU X14, -32(AX)(BX*1)
  14750. MOVOU X15, -16(AX)(BX*1)
  14751. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
  14752. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_256through2048:
  14753. LEAQ -256(BX), BX
  14754. MOVOU (CX), X0
  14755. MOVOU 16(CX), X1
  14756. MOVOU 32(CX), X2
  14757. MOVOU 48(CX), X3
  14758. MOVOU 64(CX), X4
  14759. MOVOU 80(CX), X5
  14760. MOVOU 96(CX), X6
  14761. MOVOU 112(CX), X7
  14762. MOVOU 128(CX), X8
  14763. MOVOU 144(CX), X9
  14764. MOVOU 160(CX), X10
  14765. MOVOU 176(CX), X11
  14766. MOVOU 192(CX), X12
  14767. MOVOU 208(CX), X13
  14768. MOVOU 224(CX), X14
  14769. MOVOU 240(CX), X15
  14770. MOVOU X0, (AX)
  14771. MOVOU X1, 16(AX)
  14772. MOVOU X2, 32(AX)
  14773. MOVOU X3, 48(AX)
  14774. MOVOU X4, 64(AX)
  14775. MOVOU X5, 80(AX)
  14776. MOVOU X6, 96(AX)
  14777. MOVOU X7, 112(AX)
  14778. MOVOU X8, 128(AX)
  14779. MOVOU X9, 144(AX)
  14780. MOVOU X10, 160(AX)
  14781. MOVOU X11, 176(AX)
  14782. MOVOU X12, 192(AX)
  14783. MOVOU X13, 208(AX)
  14784. MOVOU X14, 224(AX)
  14785. MOVOU X15, 240(AX)
  14786. CMPQ BX, $0x00000100
  14787. LEAQ 256(CX), CX
  14788. LEAQ 256(AX), AX
  14789. JGE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_256through2048
  14790. JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_tail
  14791. memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B:
  14792. MOVQ DX, AX
  14793. emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B:
  14794. MOVQ dst_base+0(FP), CX
  14795. SUBQ CX, AX
  14796. MOVQ AX, ret+48(FP)
  14797. RET
  14798. // func encodeSnappyBlockAsmAvx(dst []byte, src []byte) int
  14799. // Requires: AVX, SSE2
  14800. TEXT ·encodeSnappyBlockAsmAvx(SB), $65560-56
  14801. MOVQ dst_base+0(FP), AX
  14802. MOVQ $0x00000200, CX
  14803. LEAQ 24(SP), DX
  14804. PXOR X0, X0
  14805. zero_loop_encodeSnappyBlockAsmAvx:
  14806. MOVOU X0, (DX)
  14807. MOVOU X0, 16(DX)
  14808. MOVOU X0, 32(DX)
  14809. MOVOU X0, 48(DX)
  14810. MOVOU X0, 64(DX)
  14811. MOVOU X0, 80(DX)
  14812. MOVOU X0, 96(DX)
  14813. MOVOU X0, 112(DX)
  14814. ADDQ $0x80, DX
  14815. DECQ CX
  14816. JNZ zero_loop_encodeSnappyBlockAsmAvx
  14817. MOVL $0x00000000, 12(SP)
  14818. MOVQ src_len+32(FP), CX
  14819. LEAQ -5(CX), DX
  14820. LEAQ -8(CX), BP
  14821. MOVL BP, 8(SP)
  14822. SHRQ $0x05, CX
  14823. SUBL CX, DX
  14824. LEAQ (AX)(DX*1), DX
  14825. MOVQ DX, (SP)
  14826. MOVL $0x00000001, CX
  14827. MOVL CX, 16(SP)
  14828. MOVQ src_base+24(FP), DX
  14829. search_loop_encodeSnappyBlockAsmAvx:
  14830. MOVQ (DX)(CX*1), SI
  14831. MOVL CX, BP
  14832. SUBL 12(SP), BP
  14833. SHRL $0x06, BP
  14834. LEAL 4(CX)(BP*1), BP
  14835. MOVL 8(SP), DI
  14836. CMPL BP, DI
  14837. JGT emit_remainder_encodeSnappyBlockAsmAvx
  14838. MOVL BP, 20(SP)
  14839. MOVQ $0x0000cf1bbcdcbf9b, R8
  14840. MOVQ SI, R9
  14841. MOVQ SI, R10
  14842. SHRQ $0x08, R10
  14843. SHLQ $0x10, R9
  14844. IMULQ R8, R9
  14845. SHRQ $0x32, R9
  14846. SHLQ $0x10, R10
  14847. IMULQ R8, R10
  14848. SHRQ $0x32, R10
  14849. MOVL 24(SP)(R9*4), BP
  14850. MOVL 24(SP)(R10*4), DI
  14851. MOVL CX, 24(SP)(R9*4)
  14852. LEAL 1(CX), R9
  14853. MOVL R9, 24(SP)(R10*4)
  14854. MOVQ SI, R9
  14855. SHRQ $0x10, R9
  14856. SHLQ $0x10, R9
  14857. IMULQ R8, R9
  14858. SHRQ $0x32, R9
  14859. MOVL CX, R8
  14860. SUBL 16(SP), R8
  14861. MOVL 1(DX)(R8*1), R10
  14862. MOVQ SI, R8
  14863. SHRQ $0x08, R8
  14864. CMPL R8, R10
  14865. JNE no_repeat_found_encodeSnappyBlockAsmAvx
  14866. LEAL 1(CX), SI
  14867. MOVL 12(SP), BP
  14868. MOVL SI, DI
  14869. SUBL 16(SP), DI
  14870. JZ repeat_extend_back_end_encodeSnappyBlockAsmAvx
  14871. repeat_extend_back_loop_encodeSnappyBlockAsmAvx:
  14872. CMPL SI, BP
  14873. JLE repeat_extend_back_end_encodeSnappyBlockAsmAvx
  14874. MOVB -1(DX)(DI*1), BL
  14875. MOVB -1(DX)(SI*1), R8
  14876. CMPB BL, R8
  14877. JNE repeat_extend_back_end_encodeSnappyBlockAsmAvx
  14878. LEAL -1(SI), SI
  14879. DECL DI
  14880. JNZ repeat_extend_back_loop_encodeSnappyBlockAsmAvx
  14881. repeat_extend_back_end_encodeSnappyBlockAsmAvx:
  14882. MOVL 12(SP), BP
  14883. CMPL BP, SI
  14884. JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsmAvx
  14885. MOVL SI, DI
  14886. MOVL SI, 12(SP)
  14887. LEAQ (DX)(BP*1), R8
  14888. SUBL BP, DI
  14889. MOVL DI, BP
  14890. SUBL $0x01, BP
  14891. JC emit_literal_done_repeat_emit_encodeSnappyBlockAsmAvx
  14892. CMPL BP, $0x3c
  14893. JLT one_byte_repeat_emit_encodeSnappyBlockAsmAvx
  14894. CMPL BP, $0x00000100
  14895. JLT two_bytes_repeat_emit_encodeSnappyBlockAsmAvx
  14896. CMPL BP, $0x00010000
  14897. JLT three_bytes_repeat_emit_encodeSnappyBlockAsmAvx
  14898. CMPL BP, $0x01000000
  14899. JLT four_bytes_repeat_emit_encodeSnappyBlockAsmAvx
  14900. MOVB $0xfc, (AX)
  14901. MOVL BP, 1(AX)
  14902. ADDQ $0x05, AX
  14903. JMP memmove_repeat_emit_encodeSnappyBlockAsmAvx
  14904. four_bytes_repeat_emit_encodeSnappyBlockAsmAvx:
  14905. MOVL BP, R9
  14906. SHRL $0x10, R9
  14907. MOVB $0xf8, (AX)
  14908. MOVW BP, 1(AX)
  14909. MOVB R9, 3(AX)
  14910. ADDQ $0x04, AX
  14911. JMP memmove_repeat_emit_encodeSnappyBlockAsmAvx
  14912. three_bytes_repeat_emit_encodeSnappyBlockAsmAvx:
  14913. MOVB $0xf4, (AX)
  14914. MOVW BP, 1(AX)
  14915. ADDQ $0x03, AX
  14916. JMP memmove_repeat_emit_encodeSnappyBlockAsmAvx
  14917. two_bytes_repeat_emit_encodeSnappyBlockAsmAvx:
  14918. MOVB $0xf0, (AX)
  14919. MOVB BP, 1(AX)
  14920. ADDQ $0x02, AX
  14921. JMP memmove_repeat_emit_encodeSnappyBlockAsmAvx
  14922. one_byte_repeat_emit_encodeSnappyBlockAsmAvx:
  14923. SHLB $0x02, BP
  14924. MOVB BP, (AX)
  14925. ADDQ $0x01, AX
  14926. memmove_repeat_emit_encodeSnappyBlockAsmAvx:
  14927. LEAQ (AX)(DI*1), BP
  14928. NOP
  14929. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_tail:
  14930. TESTQ DI, DI
  14931. JEQ memmove_end_copy_repeat_emit_encodeSnappyBlockAsmAvx
  14932. CMPQ DI, $0x02
  14933. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_1or2
  14934. CMPQ DI, $0x04
  14935. JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_3
  14936. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_4
  14937. CMPQ DI, $0x08
  14938. JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_5through7
  14939. JE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_8
  14940. CMPQ DI, $0x10
  14941. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_9through16
  14942. CMPQ DI, $0x20
  14943. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_17through32
  14944. CMPQ DI, $0x40
  14945. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_33through64
  14946. CMPQ DI, $0x80
  14947. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_65through128
  14948. CMPQ DI, $0x00000100
  14949. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_129through256
  14950. JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_avxUnaligned
  14951. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_1or2:
  14952. MOVB (R8), R9
  14953. MOVB -1(R8)(DI*1), R10
  14954. MOVB R9, (AX)
  14955. MOVB R10, -1(AX)(DI*1)
  14956. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsmAvx
  14957. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_4:
  14958. MOVL (R8), R9
  14959. MOVL R9, (AX)
  14960. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsmAvx
  14961. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_3:
  14962. MOVW (R8), R9
  14963. MOVB 2(R8), R10
  14964. MOVW R9, (AX)
  14965. MOVB R10, 2(AX)
  14966. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsmAvx
  14967. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_5through7:
  14968. MOVL (R8), R9
  14969. MOVL -4(R8)(DI*1), R10
  14970. MOVL R9, (AX)
  14971. MOVL R10, -4(AX)(DI*1)
  14972. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsmAvx
  14973. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_8:
  14974. MOVQ (R8), R9
  14975. MOVQ R9, (AX)
  14976. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsmAvx
  14977. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_9through16:
  14978. MOVQ (R8), R9
  14979. MOVQ -8(R8)(DI*1), R10
  14980. MOVQ R9, (AX)
  14981. MOVQ R10, -8(AX)(DI*1)
  14982. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsmAvx
  14983. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_17through32:
  14984. MOVOU (R8), X0
  14985. MOVOU -16(R8)(DI*1), X1
  14986. MOVOU X0, (AX)
  14987. MOVOU X1, -16(AX)(DI*1)
  14988. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsmAvx
  14989. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_33through64:
  14990. MOVOU (R8), X0
  14991. MOVOU 16(R8), X1
  14992. MOVOU -32(R8)(DI*1), X2
  14993. MOVOU -16(R8)(DI*1), X3
  14994. MOVOU X0, (AX)
  14995. MOVOU X1, 16(AX)
  14996. MOVOU X2, -32(AX)(DI*1)
  14997. MOVOU X3, -16(AX)(DI*1)
  14998. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsmAvx
  14999. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_65through128:
  15000. MOVOU (R8), X0
  15001. MOVOU 16(R8), X1
  15002. MOVOU 32(R8), X2
  15003. MOVOU 48(R8), X3
  15004. MOVOU -64(R8)(DI*1), X12
  15005. MOVOU -48(R8)(DI*1), X13
  15006. MOVOU -32(R8)(DI*1), X14
  15007. MOVOU -16(R8)(DI*1), X15
  15008. MOVOU X0, (AX)
  15009. MOVOU X1, 16(AX)
  15010. MOVOU X2, 32(AX)
  15011. MOVOU X3, 48(AX)
  15012. MOVOU X12, -64(AX)(DI*1)
  15013. MOVOU X13, -48(AX)(DI*1)
  15014. MOVOU X14, -32(AX)(DI*1)
  15015. MOVOU X15, -16(AX)(DI*1)
  15016. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsmAvx
  15017. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_129through256:
  15018. MOVOU (R8), X0
  15019. MOVOU 16(R8), X1
  15020. MOVOU 32(R8), X2
  15021. MOVOU 48(R8), X3
  15022. MOVOU 64(R8), X4
  15023. MOVOU 80(R8), X5
  15024. MOVOU 96(R8), X6
  15025. MOVOU 112(R8), X7
  15026. MOVOU -128(R8)(DI*1), X8
  15027. MOVOU -112(R8)(DI*1), X9
  15028. MOVOU -96(R8)(DI*1), X10
  15029. MOVOU -80(R8)(DI*1), X11
  15030. MOVOU -64(R8)(DI*1), X12
  15031. MOVOU -48(R8)(DI*1), X13
  15032. MOVOU -32(R8)(DI*1), X14
  15033. MOVOU -16(R8)(DI*1), X15
  15034. MOVOU X0, (AX)
  15035. MOVOU X1, 16(AX)
  15036. MOVOU X2, 32(AX)
  15037. MOVOU X3, 48(AX)
  15038. MOVOU X4, 64(AX)
  15039. MOVOU X5, 80(AX)
  15040. MOVOU X6, 96(AX)
  15041. MOVOU X7, 112(AX)
  15042. MOVOU X8, -128(AX)(DI*1)
  15043. MOVOU X9, -112(AX)(DI*1)
  15044. MOVOU X10, -96(AX)(DI*1)
  15045. MOVOU X11, -80(AX)(DI*1)
  15046. MOVOU X12, -64(AX)(DI*1)
  15047. MOVOU X13, -48(AX)(DI*1)
  15048. MOVOU X14, -32(AX)(DI*1)
  15049. MOVOU X15, -16(AX)(DI*1)
  15050. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsmAvx
  15051. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_256through2048:
  15052. LEAQ -256(DI), DI
  15053. MOVOU (R8), X0
  15054. MOVOU 16(R8), X1
  15055. MOVOU 32(R8), X2
  15056. MOVOU 48(R8), X3
  15057. MOVOU 64(R8), X4
  15058. MOVOU 80(R8), X5
  15059. MOVOU 96(R8), X6
  15060. MOVOU 112(R8), X7
  15061. MOVOU 128(R8), X8
  15062. MOVOU 144(R8), X9
  15063. MOVOU 160(R8), X10
  15064. MOVOU 176(R8), X11
  15065. MOVOU 192(R8), X12
  15066. MOVOU 208(R8), X13
  15067. MOVOU 224(R8), X14
  15068. MOVOU 240(R8), X15
  15069. MOVOU X0, (AX)
  15070. MOVOU X1, 16(AX)
  15071. MOVOU X2, 32(AX)
  15072. MOVOU X3, 48(AX)
  15073. MOVOU X4, 64(AX)
  15074. MOVOU X5, 80(AX)
  15075. MOVOU X6, 96(AX)
  15076. MOVOU X7, 112(AX)
  15077. MOVOU X8, 128(AX)
  15078. MOVOU X9, 144(AX)
  15079. MOVOU X10, 160(AX)
  15080. MOVOU X11, 176(AX)
  15081. MOVOU X12, 192(AX)
  15082. MOVOU X13, 208(AX)
  15083. MOVOU X14, 224(AX)
  15084. MOVOU X15, 240(AX)
  15085. CMPQ DI, $0x00000100
  15086. LEAQ 256(R8), R8
  15087. LEAQ 256(AX), AX
  15088. JGE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_256through2048
  15089. JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_tail
  15090. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_avxUnaligned:
  15091. LEAQ (R8)(DI*1), R10
  15092. MOVQ AX, R12
  15093. MOVOU -128(R10), X5
  15094. MOVOU -112(R10), X6
  15095. MOVQ $0x00000080, R9
  15096. ANDQ $0xffffffe0, AX
  15097. ADDQ $0x20, AX
  15098. MOVOU -96(R10), X7
  15099. MOVOU -80(R10), X8
  15100. MOVQ AX, R11
  15101. SUBQ R12, R11
  15102. MOVOU -64(R10), X9
  15103. MOVOU -48(R10), X10
  15104. SUBQ R11, DI
  15105. MOVOU -32(R10), X11
  15106. MOVOU -16(R10), X12
  15107. VMOVDQU (R8), Y4
  15108. ADDQ R11, R8
  15109. SUBQ R9, DI
  15110. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_gobble_128_loop:
  15111. VMOVDQU (R8), Y0
  15112. VMOVDQU 32(R8), Y1
  15113. VMOVDQU 64(R8), Y2
  15114. VMOVDQU 96(R8), Y3
  15115. ADDQ R9, R8
  15116. VMOVDQA Y0, (AX)
  15117. VMOVDQA Y1, 32(AX)
  15118. VMOVDQA Y2, 64(AX)
  15119. VMOVDQA Y3, 96(AX)
  15120. ADDQ R9, AX
  15121. SUBQ R9, DI
  15122. JA emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_gobble_128_loop
  15123. ADDQ R9, DI
  15124. ADDQ AX, DI
  15125. VMOVDQU Y4, (R12)
  15126. VZEROUPPER
  15127. MOVOU X5, -128(DI)
  15128. MOVOU X6, -112(DI)
  15129. MOVOU X7, -96(DI)
  15130. MOVOU X8, -80(DI)
  15131. MOVOU X9, -64(DI)
  15132. MOVOU X10, -48(DI)
  15133. MOVOU X11, -32(DI)
  15134. MOVOU X12, -16(DI)
  15135. memmove_end_copy_repeat_emit_encodeSnappyBlockAsmAvx:
  15136. MOVQ BP, AX
  15137. emit_literal_done_repeat_emit_encodeSnappyBlockAsmAvx:
  15138. ADDL $0x05, CX
  15139. MOVL CX, BP
  15140. SUBL 16(SP), BP
  15141. MOVQ src_len+32(FP), DI
  15142. SUBL CX, DI
  15143. LEAQ (DX)(CX*1), R8
  15144. LEAQ (DX)(BP*1), BP
  15145. XORL R10, R10
  15146. CMPL DI, $0x08
  15147. JL matchlen_single_repeat_extend
  15148. matchlen_loopback_repeat_extend:
  15149. MOVQ (R8)(R10*1), R9
  15150. XORQ (BP)(R10*1), R9
  15151. TESTQ R9, R9
  15152. JZ matchlen_loop_repeat_extend
  15153. BSFQ R9, R9
  15154. SARQ $0x03, R9
  15155. LEAL (R10)(R9*1), R10
  15156. JMP repeat_extend_forward_end_encodeSnappyBlockAsmAvx
  15157. matchlen_loop_repeat_extend:
  15158. LEAL -8(DI), DI
  15159. LEAL 8(R10), R10
  15160. CMPL DI, $0x08
  15161. JGE matchlen_loopback_repeat_extend
  15162. matchlen_single_repeat_extend:
  15163. TESTL DI, DI
  15164. JZ repeat_extend_forward_end_encodeSnappyBlockAsmAvx
  15165. matchlen_single_loopback_repeat_extend:
  15166. MOVB (R8)(R10*1), R9
  15167. CMPB (BP)(R10*1), R9
  15168. JNE repeat_extend_forward_end_encodeSnappyBlockAsmAvx
  15169. LEAL 1(R10), R10
  15170. DECL DI
  15171. JNZ matchlen_single_loopback_repeat_extend
  15172. repeat_extend_forward_end_encodeSnappyBlockAsmAvx:
  15173. ADDL R10, CX
  15174. MOVL CX, BP
  15175. SUBL SI, BP
  15176. MOVL 16(SP), SI
  15177. CMPL SI, $0x00010000
  15178. JL two_byte_offset_repeat_as_copy_encodeSnappyBlockAsmAvx
  15179. four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsmAvx:
  15180. CMPL BP, $0x40
  15181. JLE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsmAvx
  15182. MOVB $0xff, (AX)
  15183. MOVL SI, 1(AX)
  15184. LEAL -64(BP), BP
  15185. ADDQ $0x05, AX
  15186. CMPL BP, $0x04
  15187. JL four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsmAvx
  15188. JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsmAvx
  15189. four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsmAvx:
  15190. TESTL BP, BP
  15191. JZ repeat_end_emit_encodeSnappyBlockAsmAvx
  15192. MOVB $0x03, BL
  15193. LEAL -4(BX)(BP*4), BP
  15194. MOVB BP, (AX)
  15195. MOVL SI, 1(AX)
  15196. ADDQ $0x05, AX
  15197. JMP repeat_end_emit_encodeSnappyBlockAsmAvx
  15198. two_byte_offset_repeat_as_copy_encodeSnappyBlockAsmAvx:
  15199. CMPL BP, $0x40
  15200. JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsmAvx
  15201. MOVB $0xee, (AX)
  15202. MOVW SI, 1(AX)
  15203. LEAL -60(BP), BP
  15204. ADDQ $0x03, AX
  15205. JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsmAvx
  15206. two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsmAvx:
  15207. CMPL BP, $0x0c
  15208. JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsmAvx
  15209. CMPL SI, $0x00000800
  15210. JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsmAvx
  15211. MOVB $0x01, BL
  15212. LEAL -16(BX)(BP*4), BP
  15213. MOVB SI, 1(AX)
  15214. SHRL $0x08, SI
  15215. SHLL $0x05, SI
  15216. ORL SI, BP
  15217. MOVB BP, (AX)
  15218. ADDQ $0x02, AX
  15219. JMP repeat_end_emit_encodeSnappyBlockAsmAvx
  15220. emit_copy_three_repeat_as_copy_encodeSnappyBlockAsmAvx:
  15221. MOVB $0x02, BL
  15222. LEAL -4(BX)(BP*4), BP
  15223. MOVB BP, (AX)
  15224. MOVW SI, 1(AX)
  15225. ADDQ $0x03, AX
  15226. repeat_end_emit_encodeSnappyBlockAsmAvx:
  15227. MOVL CX, 12(SP)
  15228. CMPL CX, 8(SP)
  15229. JGE emit_remainder_encodeSnappyBlockAsmAvx
  15230. JMP search_loop_encodeSnappyBlockAsmAvx
  15231. no_repeat_found_encodeSnappyBlockAsmAvx:
  15232. CMPL (DX)(BP*1), SI
  15233. JEQ candidate_match_encodeSnappyBlockAsmAvx
  15234. SHRQ $0x08, SI
  15235. MOVL 24(SP)(R9*4), BP
  15236. LEAL 2(CX), R8
  15237. CMPL (DX)(DI*1), SI
  15238. JEQ candidate2_match_encodeSnappyBlockAsmAvx
  15239. MOVL R8, 24(SP)(R9*4)
  15240. SHRQ $0x08, SI
  15241. CMPL (DX)(BP*1), SI
  15242. JEQ candidate3_match_encodeSnappyBlockAsmAvx
  15243. MOVL 20(SP), CX
  15244. JMP search_loop_encodeSnappyBlockAsmAvx
  15245. candidate3_match_encodeSnappyBlockAsmAvx:
  15246. ADDL $0x02, CX
  15247. JMP candidate_match_encodeSnappyBlockAsmAvx
  15248. candidate2_match_encodeSnappyBlockAsmAvx:
  15249. MOVL R8, 24(SP)(R9*4)
  15250. INCL CX
  15251. MOVL DI, BP
  15252. candidate_match_encodeSnappyBlockAsmAvx:
  15253. MOVL 12(SP), SI
  15254. TESTL BP, BP
  15255. JZ match_extend_back_end_encodeSnappyBlockAsmAvx
  15256. match_extend_back_loop_encodeSnappyBlockAsmAvx:
  15257. CMPL CX, SI
  15258. JLE match_extend_back_end_encodeSnappyBlockAsmAvx
  15259. MOVB -1(DX)(BP*1), BL
  15260. MOVB -1(DX)(CX*1), DI
  15261. CMPB BL, DI
  15262. JNE match_extend_back_end_encodeSnappyBlockAsmAvx
  15263. LEAL -1(CX), CX
  15264. DECL BP
  15265. JZ match_extend_back_end_encodeSnappyBlockAsmAvx
  15266. JMP match_extend_back_loop_encodeSnappyBlockAsmAvx
  15267. match_extend_back_end_encodeSnappyBlockAsmAvx:
  15268. MOVL CX, SI
  15269. SUBL 12(SP), SI
  15270. LEAQ 4(AX)(SI*1), SI
  15271. CMPQ SI, (SP)
  15272. JL match_dst_size_check_encodeSnappyBlockAsmAvx
  15273. MOVQ $0x00000000, ret+48(FP)
  15274. RET
  15275. match_dst_size_check_encodeSnappyBlockAsmAvx:
  15276. MOVL CX, SI
  15277. MOVL 12(SP), DI
  15278. CMPL DI, SI
  15279. JEQ emit_literal_done_match_emit_encodeSnappyBlockAsmAvx
  15280. MOVL SI, R8
  15281. MOVL SI, 12(SP)
  15282. LEAQ (DX)(DI*1), SI
  15283. SUBL DI, R8
  15284. MOVL R8, DI
  15285. SUBL $0x01, DI
  15286. JC emit_literal_done_match_emit_encodeSnappyBlockAsmAvx
  15287. CMPL DI, $0x3c
  15288. JLT one_byte_match_emit_encodeSnappyBlockAsmAvx
  15289. CMPL DI, $0x00000100
  15290. JLT two_bytes_match_emit_encodeSnappyBlockAsmAvx
  15291. CMPL DI, $0x00010000
  15292. JLT three_bytes_match_emit_encodeSnappyBlockAsmAvx
  15293. CMPL DI, $0x01000000
  15294. JLT four_bytes_match_emit_encodeSnappyBlockAsmAvx
  15295. MOVB $0xfc, (AX)
  15296. MOVL DI, 1(AX)
  15297. ADDQ $0x05, AX
  15298. JMP memmove_match_emit_encodeSnappyBlockAsmAvx
  15299. four_bytes_match_emit_encodeSnappyBlockAsmAvx:
  15300. MOVL DI, R9
  15301. SHRL $0x10, R9
  15302. MOVB $0xf8, (AX)
  15303. MOVW DI, 1(AX)
  15304. MOVB R9, 3(AX)
  15305. ADDQ $0x04, AX
  15306. JMP memmove_match_emit_encodeSnappyBlockAsmAvx
  15307. three_bytes_match_emit_encodeSnappyBlockAsmAvx:
  15308. MOVB $0xf4, (AX)
  15309. MOVW DI, 1(AX)
  15310. ADDQ $0x03, AX
  15311. JMP memmove_match_emit_encodeSnappyBlockAsmAvx
  15312. two_bytes_match_emit_encodeSnappyBlockAsmAvx:
  15313. MOVB $0xf0, (AX)
  15314. MOVB DI, 1(AX)
  15315. ADDQ $0x02, AX
  15316. JMP memmove_match_emit_encodeSnappyBlockAsmAvx
  15317. one_byte_match_emit_encodeSnappyBlockAsmAvx:
  15318. SHLB $0x02, DI
  15319. MOVB DI, (AX)
  15320. ADDQ $0x01, AX
  15321. memmove_match_emit_encodeSnappyBlockAsmAvx:
  15322. LEAQ (AX)(R8*1), DI
  15323. NOP
  15324. emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_tail:
  15325. TESTQ R8, R8
  15326. JEQ memmove_end_copy_match_emit_encodeSnappyBlockAsmAvx
  15327. CMPQ R8, $0x02
  15328. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_1or2
  15329. CMPQ R8, $0x04
  15330. JB emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_3
  15331. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_4
  15332. CMPQ R8, $0x08
  15333. JB emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_5through7
  15334. JE emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_8
  15335. CMPQ R8, $0x10
  15336. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_9through16
  15337. CMPQ R8, $0x20
  15338. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_17through32
  15339. CMPQ R8, $0x40
  15340. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_33through64
  15341. CMPQ R8, $0x80
  15342. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_65through128
  15343. CMPQ R8, $0x00000100
  15344. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_129through256
  15345. JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_avxUnaligned
  15346. emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_1or2:
  15347. MOVB (SI), R9
  15348. MOVB -1(SI)(R8*1), R10
  15349. MOVB R9, (AX)
  15350. MOVB R10, -1(AX)(R8*1)
  15351. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsmAvx
  15352. emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_4:
  15353. MOVL (SI), R9
  15354. MOVL R9, (AX)
  15355. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsmAvx
  15356. emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_3:
  15357. MOVW (SI), R9
  15358. MOVB 2(SI), R10
  15359. MOVW R9, (AX)
  15360. MOVB R10, 2(AX)
  15361. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsmAvx
  15362. emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_5through7:
  15363. MOVL (SI), R9
  15364. MOVL -4(SI)(R8*1), R10
  15365. MOVL R9, (AX)
  15366. MOVL R10, -4(AX)(R8*1)
  15367. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsmAvx
  15368. emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_8:
  15369. MOVQ (SI), R9
  15370. MOVQ R9, (AX)
  15371. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsmAvx
  15372. emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_9through16:
  15373. MOVQ (SI), R9
  15374. MOVQ -8(SI)(R8*1), R10
  15375. MOVQ R9, (AX)
  15376. MOVQ R10, -8(AX)(R8*1)
  15377. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsmAvx
  15378. emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_17through32:
  15379. MOVOU (SI), X0
  15380. MOVOU -16(SI)(R8*1), X1
  15381. MOVOU X0, (AX)
  15382. MOVOU X1, -16(AX)(R8*1)
  15383. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsmAvx
  15384. emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_33through64:
  15385. MOVOU (SI), X0
  15386. MOVOU 16(SI), X1
  15387. MOVOU -32(SI)(R8*1), X2
  15388. MOVOU -16(SI)(R8*1), X3
  15389. MOVOU X0, (AX)
  15390. MOVOU X1, 16(AX)
  15391. MOVOU X2, -32(AX)(R8*1)
  15392. MOVOU X3, -16(AX)(R8*1)
  15393. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsmAvx
  15394. emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_65through128:
  15395. MOVOU (SI), X0
  15396. MOVOU 16(SI), X1
  15397. MOVOU 32(SI), X2
  15398. MOVOU 48(SI), X3
  15399. MOVOU -64(SI)(R8*1), X12
  15400. MOVOU -48(SI)(R8*1), X13
  15401. MOVOU -32(SI)(R8*1), X14
  15402. MOVOU -16(SI)(R8*1), X15
  15403. MOVOU X0, (AX)
  15404. MOVOU X1, 16(AX)
  15405. MOVOU X2, 32(AX)
  15406. MOVOU X3, 48(AX)
  15407. MOVOU X12, -64(AX)(R8*1)
  15408. MOVOU X13, -48(AX)(R8*1)
  15409. MOVOU X14, -32(AX)(R8*1)
  15410. MOVOU X15, -16(AX)(R8*1)
  15411. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsmAvx
  15412. emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_129through256:
  15413. MOVOU (SI), X0
  15414. MOVOU 16(SI), X1
  15415. MOVOU 32(SI), X2
  15416. MOVOU 48(SI), X3
  15417. MOVOU 64(SI), X4
  15418. MOVOU 80(SI), X5
  15419. MOVOU 96(SI), X6
  15420. MOVOU 112(SI), X7
  15421. MOVOU -128(SI)(R8*1), X8
  15422. MOVOU -112(SI)(R8*1), X9
  15423. MOVOU -96(SI)(R8*1), X10
  15424. MOVOU -80(SI)(R8*1), X11
  15425. MOVOU -64(SI)(R8*1), X12
  15426. MOVOU -48(SI)(R8*1), X13
  15427. MOVOU -32(SI)(R8*1), X14
  15428. MOVOU -16(SI)(R8*1), X15
  15429. MOVOU X0, (AX)
  15430. MOVOU X1, 16(AX)
  15431. MOVOU X2, 32(AX)
  15432. MOVOU X3, 48(AX)
  15433. MOVOU X4, 64(AX)
  15434. MOVOU X5, 80(AX)
  15435. MOVOU X6, 96(AX)
  15436. MOVOU X7, 112(AX)
  15437. MOVOU X8, -128(AX)(R8*1)
  15438. MOVOU X9, -112(AX)(R8*1)
  15439. MOVOU X10, -96(AX)(R8*1)
  15440. MOVOU X11, -80(AX)(R8*1)
  15441. MOVOU X12, -64(AX)(R8*1)
  15442. MOVOU X13, -48(AX)(R8*1)
  15443. MOVOU X14, -32(AX)(R8*1)
  15444. MOVOU X15, -16(AX)(R8*1)
  15445. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsmAvx
  15446. emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_256through2048:
  15447. LEAQ -256(R8), R8
  15448. MOVOU (SI), X0
  15449. MOVOU 16(SI), X1
  15450. MOVOU 32(SI), X2
  15451. MOVOU 48(SI), X3
  15452. MOVOU 64(SI), X4
  15453. MOVOU 80(SI), X5
  15454. MOVOU 96(SI), X6
  15455. MOVOU 112(SI), X7
  15456. MOVOU 128(SI), X8
  15457. MOVOU 144(SI), X9
  15458. MOVOU 160(SI), X10
  15459. MOVOU 176(SI), X11
  15460. MOVOU 192(SI), X12
  15461. MOVOU 208(SI), X13
  15462. MOVOU 224(SI), X14
  15463. MOVOU 240(SI), X15
  15464. MOVOU X0, (AX)
  15465. MOVOU X1, 16(AX)
  15466. MOVOU X2, 32(AX)
  15467. MOVOU X3, 48(AX)
  15468. MOVOU X4, 64(AX)
  15469. MOVOU X5, 80(AX)
  15470. MOVOU X6, 96(AX)
  15471. MOVOU X7, 112(AX)
  15472. MOVOU X8, 128(AX)
  15473. MOVOU X9, 144(AX)
  15474. MOVOU X10, 160(AX)
  15475. MOVOU X11, 176(AX)
  15476. MOVOU X12, 192(AX)
  15477. MOVOU X13, 208(AX)
  15478. MOVOU X14, 224(AX)
  15479. MOVOU X15, 240(AX)
  15480. CMPQ R8, $0x00000100
  15481. LEAQ 256(SI), SI
  15482. LEAQ 256(AX), AX
  15483. JGE emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_256through2048
  15484. JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_tail
  15485. emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_avxUnaligned:
  15486. LEAQ (SI)(R8*1), R10
  15487. MOVQ AX, R12
  15488. MOVOU -128(R10), X5
  15489. MOVOU -112(R10), X6
  15490. MOVQ $0x00000080, R9
  15491. ANDQ $0xffffffe0, AX
  15492. ADDQ $0x20, AX
  15493. MOVOU -96(R10), X7
  15494. MOVOU -80(R10), X8
  15495. MOVQ AX, R11
  15496. SUBQ R12, R11
  15497. MOVOU -64(R10), X9
  15498. MOVOU -48(R10), X10
  15499. SUBQ R11, R8
  15500. MOVOU -32(R10), X11
  15501. MOVOU -16(R10), X12
  15502. VMOVDQU (SI), Y4
  15503. ADDQ R11, SI
  15504. SUBQ R9, R8
  15505. emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_gobble_128_loop:
  15506. VMOVDQU (SI), Y0
  15507. VMOVDQU 32(SI), Y1
  15508. VMOVDQU 64(SI), Y2
  15509. VMOVDQU 96(SI), Y3
  15510. ADDQ R9, SI
  15511. VMOVDQA Y0, (AX)
  15512. VMOVDQA Y1, 32(AX)
  15513. VMOVDQA Y2, 64(AX)
  15514. VMOVDQA Y3, 96(AX)
  15515. ADDQ R9, AX
  15516. SUBQ R9, R8
  15517. JA emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_gobble_128_loop
  15518. ADDQ R9, R8
  15519. ADDQ AX, R8
  15520. VMOVDQU Y4, (R12)
  15521. VZEROUPPER
  15522. MOVOU X5, -128(R8)
  15523. MOVOU X6, -112(R8)
  15524. MOVOU X7, -96(R8)
  15525. MOVOU X8, -80(R8)
  15526. MOVOU X9, -64(R8)
  15527. MOVOU X10, -48(R8)
  15528. MOVOU X11, -32(R8)
  15529. MOVOU X12, -16(R8)
  15530. memmove_end_copy_match_emit_encodeSnappyBlockAsmAvx:
  15531. MOVQ DI, AX
  15532. emit_literal_done_match_emit_encodeSnappyBlockAsmAvx:
  15533. match_nolit_loop_encodeSnappyBlockAsmAvx:
  15534. MOVL CX, SI
  15535. SUBL BP, SI
  15536. MOVL SI, 16(SP)
  15537. ADDL $0x04, CX
  15538. ADDL $0x04, BP
  15539. MOVQ src_len+32(FP), SI
  15540. SUBL CX, SI
  15541. LEAQ (DX)(CX*1), DI
  15542. LEAQ (DX)(BP*1), BP
  15543. XORL R9, R9
  15544. CMPL SI, $0x08
  15545. JL matchlen_single_match_nolit_encodeSnappyBlockAsmAvx
  15546. matchlen_loopback_match_nolit_encodeSnappyBlockAsmAvx:
  15547. MOVQ (DI)(R9*1), R8
  15548. XORQ (BP)(R9*1), R8
  15549. TESTQ R8, R8
  15550. JZ matchlen_loop_match_nolit_encodeSnappyBlockAsmAvx
  15551. BSFQ R8, R8
  15552. SARQ $0x03, R8
  15553. LEAL (R9)(R8*1), R9
  15554. JMP match_nolit_end_encodeSnappyBlockAsmAvx
  15555. matchlen_loop_match_nolit_encodeSnappyBlockAsmAvx:
  15556. LEAL -8(SI), SI
  15557. LEAL 8(R9), R9
  15558. CMPL SI, $0x08
  15559. JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsmAvx
  15560. matchlen_single_match_nolit_encodeSnappyBlockAsmAvx:
  15561. TESTL SI, SI
  15562. JZ match_nolit_end_encodeSnappyBlockAsmAvx
  15563. matchlen_single_loopback_match_nolit_encodeSnappyBlockAsmAvx:
  15564. MOVB (DI)(R9*1), R8
  15565. CMPB (BP)(R9*1), R8
  15566. JNE match_nolit_end_encodeSnappyBlockAsmAvx
  15567. LEAL 1(R9), R9
  15568. DECL SI
  15569. JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsmAvx
  15570. match_nolit_end_encodeSnappyBlockAsmAvx:
  15571. ADDL R9, CX
  15572. MOVL 16(SP), BP
  15573. ADDL $0x04, R9
  15574. CMPL BP, $0x00010000
  15575. JL two_byte_offset_match_nolit_encodeSnappyBlockAsmAvx
  15576. four_bytes_loop_back_match_nolit_encodeSnappyBlockAsmAvx:
  15577. CMPL R9, $0x40
  15578. JLE four_bytes_remain_match_nolit_encodeSnappyBlockAsmAvx
  15579. MOVB $0xff, (AX)
  15580. MOVL BP, 1(AX)
  15581. LEAL -64(R9), R9
  15582. ADDQ $0x05, AX
  15583. CMPL R9, $0x04
  15584. JL four_bytes_remain_match_nolit_encodeSnappyBlockAsmAvx
  15585. JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsmAvx
  15586. four_bytes_remain_match_nolit_encodeSnappyBlockAsmAvx:
  15587. TESTL R9, R9
  15588. JZ match_nolit_emitcopy_end_encodeSnappyBlockAsmAvx
  15589. MOVB $0x03, BL
  15590. LEAL -4(BX)(R9*4), R9
  15591. MOVB R9, (AX)
  15592. MOVL BP, 1(AX)
  15593. ADDQ $0x05, AX
  15594. JMP match_nolit_emitcopy_end_encodeSnappyBlockAsmAvx
  15595. two_byte_offset_match_nolit_encodeSnappyBlockAsmAvx:
  15596. CMPL R9, $0x40
  15597. JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsmAvx
  15598. MOVB $0xee, (AX)
  15599. MOVW BP, 1(AX)
  15600. LEAL -60(R9), R9
  15601. ADDQ $0x03, AX
  15602. JMP two_byte_offset_match_nolit_encodeSnappyBlockAsmAvx
  15603. two_byte_offset_short_match_nolit_encodeSnappyBlockAsmAvx:
  15604. CMPL R9, $0x0c
  15605. JGE emit_copy_three_match_nolit_encodeSnappyBlockAsmAvx
  15606. CMPL BP, $0x00000800
  15607. JGE emit_copy_three_match_nolit_encodeSnappyBlockAsmAvx
  15608. MOVB $0x01, BL
  15609. LEAL -16(BX)(R9*4), R9
  15610. MOVB BP, 1(AX)
  15611. SHRL $0x08, BP
  15612. SHLL $0x05, BP
  15613. ORL BP, R9
  15614. MOVB R9, (AX)
  15615. ADDQ $0x02, AX
  15616. JMP match_nolit_emitcopy_end_encodeSnappyBlockAsmAvx
  15617. emit_copy_three_match_nolit_encodeSnappyBlockAsmAvx:
  15618. MOVB $0x02, BL
  15619. LEAL -4(BX)(R9*4), R9
  15620. MOVB R9, (AX)
  15621. MOVW BP, 1(AX)
  15622. ADDQ $0x03, AX
  15623. match_nolit_emitcopy_end_encodeSnappyBlockAsmAvx:
  15624. MOVL CX, 12(SP)
  15625. CMPL CX, 8(SP)
  15626. JGE emit_remainder_encodeSnappyBlockAsmAvx
  15627. CMPQ AX, (SP)
  15628. JL match_nolit_dst_ok_encodeSnappyBlockAsmAvx
  15629. MOVQ $0x00000000, ret+48(FP)
  15630. RET
  15631. match_nolit_dst_ok_encodeSnappyBlockAsmAvx:
  15632. MOVQ -2(DX)(CX*1), SI
  15633. MOVQ $0x0000cf1bbcdcbf9b, BP
  15634. MOVQ SI, DI
  15635. SHRQ $0x10, SI
  15636. MOVQ SI, R8
  15637. SHLQ $0x10, DI
  15638. IMULQ BP, DI
  15639. SHRQ $0x32, DI
  15640. SHLQ $0x10, R8
  15641. IMULQ BP, R8
  15642. SHRQ $0x32, R8
  15643. LEAL -2(CX), R9
  15644. MOVL 24(SP)(R8*4), BP
  15645. MOVL R9, 24(SP)(DI*4)
  15646. MOVL CX, 24(SP)(R8*4)
  15647. CMPL (DX)(BP*1), SI
  15648. JEQ match_nolit_loop_encodeSnappyBlockAsmAvx
  15649. INCL CX
  15650. JMP search_loop_encodeSnappyBlockAsmAvx
  15651. emit_remainder_encodeSnappyBlockAsmAvx:
  15652. MOVQ src_len+32(FP), CX
  15653. SUBL 12(SP), CX
  15654. LEAQ 4(AX)(CX*1), CX
  15655. CMPQ CX, (SP)
  15656. JL emit_remainder_ok_encodeSnappyBlockAsmAvx
  15657. MOVQ $0x00000000, ret+48(FP)
  15658. RET
  15659. emit_remainder_ok_encodeSnappyBlockAsmAvx:
  15660. MOVQ src_len+32(FP), CX
  15661. MOVL 12(SP), BX
  15662. CMPL BX, CX
  15663. JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsmAvx
  15664. MOVL CX, BP
  15665. MOVL CX, 12(SP)
  15666. LEAQ (DX)(BX*1), CX
  15667. SUBL BX, BP
  15668. MOVL BP, DX
  15669. SUBL $0x01, DX
  15670. JC emit_literal_done_emit_remainder_encodeSnappyBlockAsmAvx
  15671. CMPL DX, $0x3c
  15672. JLT one_byte_emit_remainder_encodeSnappyBlockAsmAvx
  15673. CMPL DX, $0x00000100
  15674. JLT two_bytes_emit_remainder_encodeSnappyBlockAsmAvx
  15675. CMPL DX, $0x00010000
  15676. JLT three_bytes_emit_remainder_encodeSnappyBlockAsmAvx
  15677. CMPL DX, $0x01000000
  15678. JLT four_bytes_emit_remainder_encodeSnappyBlockAsmAvx
  15679. MOVB $0xfc, (AX)
  15680. MOVL DX, 1(AX)
  15681. ADDQ $0x05, AX
  15682. JMP memmove_emit_remainder_encodeSnappyBlockAsmAvx
  15683. four_bytes_emit_remainder_encodeSnappyBlockAsmAvx:
  15684. MOVL DX, BX
  15685. SHRL $0x10, BX
  15686. MOVB $0xf8, (AX)
  15687. MOVW DX, 1(AX)
  15688. MOVB BL, 3(AX)
  15689. ADDQ $0x04, AX
  15690. JMP memmove_emit_remainder_encodeSnappyBlockAsmAvx
  15691. three_bytes_emit_remainder_encodeSnappyBlockAsmAvx:
  15692. MOVB $0xf4, (AX)
  15693. MOVW DX, 1(AX)
  15694. ADDQ $0x03, AX
  15695. JMP memmove_emit_remainder_encodeSnappyBlockAsmAvx
  15696. two_bytes_emit_remainder_encodeSnappyBlockAsmAvx:
  15697. MOVB $0xf0, (AX)
  15698. MOVB DL, 1(AX)
  15699. ADDQ $0x02, AX
  15700. JMP memmove_emit_remainder_encodeSnappyBlockAsmAvx
  15701. one_byte_emit_remainder_encodeSnappyBlockAsmAvx:
  15702. SHLB $0x02, DL
  15703. MOVB DL, (AX)
  15704. ADDQ $0x01, AX
  15705. memmove_emit_remainder_encodeSnappyBlockAsmAvx:
  15706. LEAQ (AX)(BP*1), DX
  15707. MOVL BP, BX
  15708. NOP
  15709. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_tail:
  15710. TESTQ BX, BX
  15711. JEQ memmove_end_copy_emit_remainder_encodeSnappyBlockAsmAvx
  15712. CMPQ BX, $0x02
  15713. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_1or2
  15714. CMPQ BX, $0x04
  15715. JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_3
  15716. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_4
  15717. CMPQ BX, $0x08
  15718. JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_5through7
  15719. JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_8
  15720. CMPQ BX, $0x10
  15721. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_9through16
  15722. CMPQ BX, $0x20
  15723. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_17through32
  15724. CMPQ BX, $0x40
  15725. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_33through64
  15726. CMPQ BX, $0x80
  15727. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_65through128
  15728. CMPQ BX, $0x00000100
  15729. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_129through256
  15730. JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_avxUnaligned
  15731. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_1or2:
  15732. MOVB (CX), BP
  15733. MOVB -1(CX)(BX*1), SI
  15734. MOVB BP, (AX)
  15735. MOVB SI, -1(AX)(BX*1)
  15736. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsmAvx
  15737. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_4:
  15738. MOVL (CX), BP
  15739. MOVL BP, (AX)
  15740. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsmAvx
  15741. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_3:
  15742. MOVW (CX), BP
  15743. MOVB 2(CX), SI
  15744. MOVW BP, (AX)
  15745. MOVB SI, 2(AX)
  15746. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsmAvx
  15747. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_5through7:
  15748. MOVL (CX), BP
  15749. MOVL -4(CX)(BX*1), SI
  15750. MOVL BP, (AX)
  15751. MOVL SI, -4(AX)(BX*1)
  15752. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsmAvx
  15753. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_8:
  15754. MOVQ (CX), BP
  15755. MOVQ BP, (AX)
  15756. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsmAvx
  15757. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_9through16:
  15758. MOVQ (CX), BP
  15759. MOVQ -8(CX)(BX*1), SI
  15760. MOVQ BP, (AX)
  15761. MOVQ SI, -8(AX)(BX*1)
  15762. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsmAvx
  15763. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_17through32:
  15764. MOVOU (CX), X0
  15765. MOVOU -16(CX)(BX*1), X1
  15766. MOVOU X0, (AX)
  15767. MOVOU X1, -16(AX)(BX*1)
  15768. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsmAvx
  15769. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_33through64:
  15770. MOVOU (CX), X0
  15771. MOVOU 16(CX), X1
  15772. MOVOU -32(CX)(BX*1), X2
  15773. MOVOU -16(CX)(BX*1), X3
  15774. MOVOU X0, (AX)
  15775. MOVOU X1, 16(AX)
  15776. MOVOU X2, -32(AX)(BX*1)
  15777. MOVOU X3, -16(AX)(BX*1)
  15778. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsmAvx
  15779. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_65through128:
  15780. MOVOU (CX), X0
  15781. MOVOU 16(CX), X1
  15782. MOVOU 32(CX), X2
  15783. MOVOU 48(CX), X3
  15784. MOVOU -64(CX)(BX*1), X12
  15785. MOVOU -48(CX)(BX*1), X13
  15786. MOVOU -32(CX)(BX*1), X14
  15787. MOVOU -16(CX)(BX*1), X15
  15788. MOVOU X0, (AX)
  15789. MOVOU X1, 16(AX)
  15790. MOVOU X2, 32(AX)
  15791. MOVOU X3, 48(AX)
  15792. MOVOU X12, -64(AX)(BX*1)
  15793. MOVOU X13, -48(AX)(BX*1)
  15794. MOVOU X14, -32(AX)(BX*1)
  15795. MOVOU X15, -16(AX)(BX*1)
  15796. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsmAvx
  15797. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_129through256:
  15798. MOVOU (CX), X0
  15799. MOVOU 16(CX), X1
  15800. MOVOU 32(CX), X2
  15801. MOVOU 48(CX), X3
  15802. MOVOU 64(CX), X4
  15803. MOVOU 80(CX), X5
  15804. MOVOU 96(CX), X6
  15805. MOVOU 112(CX), X7
  15806. MOVOU -128(CX)(BX*1), X8
  15807. MOVOU -112(CX)(BX*1), X9
  15808. MOVOU -96(CX)(BX*1), X10
  15809. MOVOU -80(CX)(BX*1), X11
  15810. MOVOU -64(CX)(BX*1), X12
  15811. MOVOU -48(CX)(BX*1), X13
  15812. MOVOU -32(CX)(BX*1), X14
  15813. MOVOU -16(CX)(BX*1), X15
  15814. MOVOU X0, (AX)
  15815. MOVOU X1, 16(AX)
  15816. MOVOU X2, 32(AX)
  15817. MOVOU X3, 48(AX)
  15818. MOVOU X4, 64(AX)
  15819. MOVOU X5, 80(AX)
  15820. MOVOU X6, 96(AX)
  15821. MOVOU X7, 112(AX)
  15822. MOVOU X8, -128(AX)(BX*1)
  15823. MOVOU X9, -112(AX)(BX*1)
  15824. MOVOU X10, -96(AX)(BX*1)
  15825. MOVOU X11, -80(AX)(BX*1)
  15826. MOVOU X12, -64(AX)(BX*1)
  15827. MOVOU X13, -48(AX)(BX*1)
  15828. MOVOU X14, -32(AX)(BX*1)
  15829. MOVOU X15, -16(AX)(BX*1)
  15830. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsmAvx
  15831. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_256through2048:
  15832. LEAQ -256(BX), BX
  15833. MOVOU (CX), X0
  15834. MOVOU 16(CX), X1
  15835. MOVOU 32(CX), X2
  15836. MOVOU 48(CX), X3
  15837. MOVOU 64(CX), X4
  15838. MOVOU 80(CX), X5
  15839. MOVOU 96(CX), X6
  15840. MOVOU 112(CX), X7
  15841. MOVOU 128(CX), X8
  15842. MOVOU 144(CX), X9
  15843. MOVOU 160(CX), X10
  15844. MOVOU 176(CX), X11
  15845. MOVOU 192(CX), X12
  15846. MOVOU 208(CX), X13
  15847. MOVOU 224(CX), X14
  15848. MOVOU 240(CX), X15
  15849. MOVOU X0, (AX)
  15850. MOVOU X1, 16(AX)
  15851. MOVOU X2, 32(AX)
  15852. MOVOU X3, 48(AX)
  15853. MOVOU X4, 64(AX)
  15854. MOVOU X5, 80(AX)
  15855. MOVOU X6, 96(AX)
  15856. MOVOU X7, 112(AX)
  15857. MOVOU X8, 128(AX)
  15858. MOVOU X9, 144(AX)
  15859. MOVOU X10, 160(AX)
  15860. MOVOU X11, 176(AX)
  15861. MOVOU X12, 192(AX)
  15862. MOVOU X13, 208(AX)
  15863. MOVOU X14, 224(AX)
  15864. MOVOU X15, 240(AX)
  15865. CMPQ BX, $0x00000100
  15866. LEAQ 256(CX), CX
  15867. LEAQ 256(AX), AX
  15868. JGE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_256through2048
  15869. JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_tail
  15870. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_avxUnaligned:
  15871. LEAQ (CX)(BX*1), SI
  15872. MOVQ AX, R8
  15873. MOVOU -128(SI), X5
  15874. MOVOU -112(SI), X6
  15875. MOVQ $0x00000080, BP
  15876. ANDQ $0xffffffe0, AX
  15877. ADDQ $0x20, AX
  15878. MOVOU -96(SI), X7
  15879. MOVOU -80(SI), X8
  15880. MOVQ AX, DI
  15881. SUBQ R8, DI
  15882. MOVOU -64(SI), X9
  15883. MOVOU -48(SI), X10
  15884. SUBQ DI, BX
  15885. MOVOU -32(SI), X11
  15886. MOVOU -16(SI), X12
  15887. VMOVDQU (CX), Y4
  15888. ADDQ DI, CX
  15889. SUBQ BP, BX
  15890. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_gobble_128_loop:
  15891. VMOVDQU (CX), Y0
  15892. VMOVDQU 32(CX), Y1
  15893. VMOVDQU 64(CX), Y2
  15894. VMOVDQU 96(CX), Y3
  15895. ADDQ BP, CX
  15896. VMOVDQA Y0, (AX)
  15897. VMOVDQA Y1, 32(AX)
  15898. VMOVDQA Y2, 64(AX)
  15899. VMOVDQA Y3, 96(AX)
  15900. ADDQ BP, AX
  15901. SUBQ BP, BX
  15902. JA emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_gobble_128_loop
  15903. ADDQ BP, BX
  15904. ADDQ AX, BX
  15905. VMOVDQU Y4, (R8)
  15906. VZEROUPPER
  15907. MOVOU X5, -128(BX)
  15908. MOVOU X6, -112(BX)
  15909. MOVOU X7, -96(BX)
  15910. MOVOU X8, -80(BX)
  15911. MOVOU X9, -64(BX)
  15912. MOVOU X10, -48(BX)
  15913. MOVOU X11, -32(BX)
  15914. MOVOU X12, -16(BX)
  15915. memmove_end_copy_emit_remainder_encodeSnappyBlockAsmAvx:
  15916. MOVQ DX, AX
  15917. emit_literal_done_emit_remainder_encodeSnappyBlockAsmAvx:
  15918. MOVQ dst_base+0(FP), CX
  15919. SUBQ CX, AX
  15920. MOVQ AX, ret+48(FP)
  15921. RET
  15922. // func encodeSnappyBlockAsm12BAvx(dst []byte, src []byte) int
  15923. // Requires: AVX, SSE2
  15924. TEXT ·encodeSnappyBlockAsm12BAvx(SB), $16408-56
  15925. MOVQ dst_base+0(FP), AX
  15926. MOVQ $0x00000080, CX
  15927. LEAQ 24(SP), DX
  15928. PXOR X0, X0
  15929. zero_loop_encodeSnappyBlockAsm12BAvx:
  15930. MOVOU X0, (DX)
  15931. MOVOU X0, 16(DX)
  15932. MOVOU X0, 32(DX)
  15933. MOVOU X0, 48(DX)
  15934. MOVOU X0, 64(DX)
  15935. MOVOU X0, 80(DX)
  15936. MOVOU X0, 96(DX)
  15937. MOVOU X0, 112(DX)
  15938. ADDQ $0x80, DX
  15939. DECQ CX
  15940. JNZ zero_loop_encodeSnappyBlockAsm12BAvx
  15941. MOVL $0x00000000, 12(SP)
  15942. MOVQ src_len+32(FP), CX
  15943. LEAQ -5(CX), DX
  15944. LEAQ -8(CX), BP
  15945. MOVL BP, 8(SP)
  15946. SHRQ $0x05, CX
  15947. SUBL CX, DX
  15948. LEAQ (AX)(DX*1), DX
  15949. MOVQ DX, (SP)
  15950. MOVL $0x00000001, CX
  15951. MOVL CX, 16(SP)
  15952. MOVQ src_base+24(FP), DX
  15953. search_loop_encodeSnappyBlockAsm12BAvx:
  15954. MOVQ (DX)(CX*1), SI
  15955. MOVL CX, BP
  15956. SUBL 12(SP), BP
  15957. SHRL $0x05, BP
  15958. LEAL 4(CX)(BP*1), BP
  15959. MOVL 8(SP), DI
  15960. CMPL BP, DI
  15961. JGT emit_remainder_encodeSnappyBlockAsm12BAvx
  15962. MOVL BP, 20(SP)
  15963. MOVQ $0x000000cf1bbcdcbb, R8
  15964. MOVQ SI, R9
  15965. MOVQ SI, R10
  15966. SHRQ $0x08, R10
  15967. SHLQ $0x18, R9
  15968. IMULQ R8, R9
  15969. SHRQ $0x34, R9
  15970. SHLQ $0x18, R10
  15971. IMULQ R8, R10
  15972. SHRQ $0x34, R10
  15973. MOVL 24(SP)(R9*4), BP
  15974. MOVL 24(SP)(R10*4), DI
  15975. MOVL CX, 24(SP)(R9*4)
  15976. LEAL 1(CX), R9
  15977. MOVL R9, 24(SP)(R10*4)
  15978. MOVQ SI, R9
  15979. SHRQ $0x10, R9
  15980. SHLQ $0x18, R9
  15981. IMULQ R8, R9
  15982. SHRQ $0x34, R9
  15983. MOVL CX, R8
  15984. SUBL 16(SP), R8
  15985. MOVL 1(DX)(R8*1), R10
  15986. MOVQ SI, R8
  15987. SHRQ $0x08, R8
  15988. CMPL R8, R10
  15989. JNE no_repeat_found_encodeSnappyBlockAsm12BAvx
  15990. LEAL 1(CX), SI
  15991. MOVL 12(SP), BP
  15992. MOVL SI, DI
  15993. SUBL 16(SP), DI
  15994. JZ repeat_extend_back_end_encodeSnappyBlockAsm12BAvx
  15995. repeat_extend_back_loop_encodeSnappyBlockAsm12BAvx:
  15996. CMPL SI, BP
  15997. JLE repeat_extend_back_end_encodeSnappyBlockAsm12BAvx
  15998. MOVB -1(DX)(DI*1), BL
  15999. MOVB -1(DX)(SI*1), R8
  16000. CMPB BL, R8
  16001. JNE repeat_extend_back_end_encodeSnappyBlockAsm12BAvx
  16002. LEAL -1(SI), SI
  16003. DECL DI
  16004. JNZ repeat_extend_back_loop_encodeSnappyBlockAsm12BAvx
  16005. repeat_extend_back_end_encodeSnappyBlockAsm12BAvx:
  16006. MOVL 12(SP), BP
  16007. CMPL BP, SI
  16008. JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm12BAvx
  16009. MOVL SI, DI
  16010. MOVL SI, 12(SP)
  16011. LEAQ (DX)(BP*1), R8
  16012. SUBL BP, DI
  16013. MOVL DI, BP
  16014. SUBL $0x01, BP
  16015. JC emit_literal_done_repeat_emit_encodeSnappyBlockAsm12BAvx
  16016. CMPL BP, $0x3c
  16017. JLT one_byte_repeat_emit_encodeSnappyBlockAsm12BAvx
  16018. CMPL BP, $0x00000100
  16019. JLT two_bytes_repeat_emit_encodeSnappyBlockAsm12BAvx
  16020. CMPL BP, $0x00010000
  16021. JLT three_bytes_repeat_emit_encodeSnappyBlockAsm12BAvx
  16022. CMPL BP, $0x01000000
  16023. JLT four_bytes_repeat_emit_encodeSnappyBlockAsm12BAvx
  16024. MOVB $0xfc, (AX)
  16025. MOVL BP, 1(AX)
  16026. ADDQ $0x05, AX
  16027. JMP memmove_repeat_emit_encodeSnappyBlockAsm12BAvx
  16028. four_bytes_repeat_emit_encodeSnappyBlockAsm12BAvx:
  16029. MOVL BP, R9
  16030. SHRL $0x10, R9
  16031. MOVB $0xf8, (AX)
  16032. MOVW BP, 1(AX)
  16033. MOVB R9, 3(AX)
  16034. ADDQ $0x04, AX
  16035. JMP memmove_repeat_emit_encodeSnappyBlockAsm12BAvx
  16036. three_bytes_repeat_emit_encodeSnappyBlockAsm12BAvx:
  16037. MOVB $0xf4, (AX)
  16038. MOVW BP, 1(AX)
  16039. ADDQ $0x03, AX
  16040. JMP memmove_repeat_emit_encodeSnappyBlockAsm12BAvx
  16041. two_bytes_repeat_emit_encodeSnappyBlockAsm12BAvx:
  16042. MOVB $0xf0, (AX)
  16043. MOVB BP, 1(AX)
  16044. ADDQ $0x02, AX
  16045. JMP memmove_repeat_emit_encodeSnappyBlockAsm12BAvx
  16046. one_byte_repeat_emit_encodeSnappyBlockAsm12BAvx:
  16047. SHLB $0x02, BP
  16048. MOVB BP, (AX)
  16049. ADDQ $0x01, AX
  16050. memmove_repeat_emit_encodeSnappyBlockAsm12BAvx:
  16051. LEAQ (AX)(DI*1), BP
  16052. NOP
  16053. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_tail:
  16054. TESTQ DI, DI
  16055. JEQ memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12BAvx
  16056. CMPQ DI, $0x02
  16057. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_1or2
  16058. CMPQ DI, $0x04
  16059. JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_3
  16060. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_4
  16061. CMPQ DI, $0x08
  16062. JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_5through7
  16063. JE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_8
  16064. CMPQ DI, $0x10
  16065. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_9through16
  16066. CMPQ DI, $0x20
  16067. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_17through32
  16068. CMPQ DI, $0x40
  16069. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_33through64
  16070. CMPQ DI, $0x80
  16071. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_65through128
  16072. CMPQ DI, $0x00000100
  16073. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_129through256
  16074. JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_avxUnaligned
  16075. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_1or2:
  16076. MOVB (R8), R9
  16077. MOVB -1(R8)(DI*1), R10
  16078. MOVB R9, (AX)
  16079. MOVB R10, -1(AX)(DI*1)
  16080. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12BAvx
  16081. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_4:
  16082. MOVL (R8), R9
  16083. MOVL R9, (AX)
  16084. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12BAvx
  16085. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_3:
  16086. MOVW (R8), R9
  16087. MOVB 2(R8), R10
  16088. MOVW R9, (AX)
  16089. MOVB R10, 2(AX)
  16090. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12BAvx
  16091. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_5through7:
  16092. MOVL (R8), R9
  16093. MOVL -4(R8)(DI*1), R10
  16094. MOVL R9, (AX)
  16095. MOVL R10, -4(AX)(DI*1)
  16096. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12BAvx
  16097. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_8:
  16098. MOVQ (R8), R9
  16099. MOVQ R9, (AX)
  16100. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12BAvx
  16101. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_9through16:
  16102. MOVQ (R8), R9
  16103. MOVQ -8(R8)(DI*1), R10
  16104. MOVQ R9, (AX)
  16105. MOVQ R10, -8(AX)(DI*1)
  16106. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12BAvx
  16107. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_17through32:
  16108. MOVOU (R8), X0
  16109. MOVOU -16(R8)(DI*1), X1
  16110. MOVOU X0, (AX)
  16111. MOVOU X1, -16(AX)(DI*1)
  16112. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12BAvx
  16113. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_33through64:
  16114. MOVOU (R8), X0
  16115. MOVOU 16(R8), X1
  16116. MOVOU -32(R8)(DI*1), X2
  16117. MOVOU -16(R8)(DI*1), X3
  16118. MOVOU X0, (AX)
  16119. MOVOU X1, 16(AX)
  16120. MOVOU X2, -32(AX)(DI*1)
  16121. MOVOU X3, -16(AX)(DI*1)
  16122. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12BAvx
  16123. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_65through128:
  16124. MOVOU (R8), X0
  16125. MOVOU 16(R8), X1
  16126. MOVOU 32(R8), X2
  16127. MOVOU 48(R8), X3
  16128. MOVOU -64(R8)(DI*1), X12
  16129. MOVOU -48(R8)(DI*1), X13
  16130. MOVOU -32(R8)(DI*1), X14
  16131. MOVOU -16(R8)(DI*1), X15
  16132. MOVOU X0, (AX)
  16133. MOVOU X1, 16(AX)
  16134. MOVOU X2, 32(AX)
  16135. MOVOU X3, 48(AX)
  16136. MOVOU X12, -64(AX)(DI*1)
  16137. MOVOU X13, -48(AX)(DI*1)
  16138. MOVOU X14, -32(AX)(DI*1)
  16139. MOVOU X15, -16(AX)(DI*1)
  16140. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12BAvx
  16141. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_129through256:
  16142. MOVOU (R8), X0
  16143. MOVOU 16(R8), X1
  16144. MOVOU 32(R8), X2
  16145. MOVOU 48(R8), X3
  16146. MOVOU 64(R8), X4
  16147. MOVOU 80(R8), X5
  16148. MOVOU 96(R8), X6
  16149. MOVOU 112(R8), X7
  16150. MOVOU -128(R8)(DI*1), X8
  16151. MOVOU -112(R8)(DI*1), X9
  16152. MOVOU -96(R8)(DI*1), X10
  16153. MOVOU -80(R8)(DI*1), X11
  16154. MOVOU -64(R8)(DI*1), X12
  16155. MOVOU -48(R8)(DI*1), X13
  16156. MOVOU -32(R8)(DI*1), X14
  16157. MOVOU -16(R8)(DI*1), X15
  16158. MOVOU X0, (AX)
  16159. MOVOU X1, 16(AX)
  16160. MOVOU X2, 32(AX)
  16161. MOVOU X3, 48(AX)
  16162. MOVOU X4, 64(AX)
  16163. MOVOU X5, 80(AX)
  16164. MOVOU X6, 96(AX)
  16165. MOVOU X7, 112(AX)
  16166. MOVOU X8, -128(AX)(DI*1)
  16167. MOVOU X9, -112(AX)(DI*1)
  16168. MOVOU X10, -96(AX)(DI*1)
  16169. MOVOU X11, -80(AX)(DI*1)
  16170. MOVOU X12, -64(AX)(DI*1)
  16171. MOVOU X13, -48(AX)(DI*1)
  16172. MOVOU X14, -32(AX)(DI*1)
  16173. MOVOU X15, -16(AX)(DI*1)
  16174. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12BAvx
  16175. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_256through2048:
  16176. LEAQ -256(DI), DI
  16177. MOVOU (R8), X0
  16178. MOVOU 16(R8), X1
  16179. MOVOU 32(R8), X2
  16180. MOVOU 48(R8), X3
  16181. MOVOU 64(R8), X4
  16182. MOVOU 80(R8), X5
  16183. MOVOU 96(R8), X6
  16184. MOVOU 112(R8), X7
  16185. MOVOU 128(R8), X8
  16186. MOVOU 144(R8), X9
  16187. MOVOU 160(R8), X10
  16188. MOVOU 176(R8), X11
  16189. MOVOU 192(R8), X12
  16190. MOVOU 208(R8), X13
  16191. MOVOU 224(R8), X14
  16192. MOVOU 240(R8), X15
  16193. MOVOU X0, (AX)
  16194. MOVOU X1, 16(AX)
  16195. MOVOU X2, 32(AX)
  16196. MOVOU X3, 48(AX)
  16197. MOVOU X4, 64(AX)
  16198. MOVOU X5, 80(AX)
  16199. MOVOU X6, 96(AX)
  16200. MOVOU X7, 112(AX)
  16201. MOVOU X8, 128(AX)
  16202. MOVOU X9, 144(AX)
  16203. MOVOU X10, 160(AX)
  16204. MOVOU X11, 176(AX)
  16205. MOVOU X12, 192(AX)
  16206. MOVOU X13, 208(AX)
  16207. MOVOU X14, 224(AX)
  16208. MOVOU X15, 240(AX)
  16209. CMPQ DI, $0x00000100
  16210. LEAQ 256(R8), R8
  16211. LEAQ 256(AX), AX
  16212. JGE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_256through2048
  16213. JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_tail
  16214. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_avxUnaligned:
  16215. LEAQ (R8)(DI*1), R10
  16216. MOVQ AX, R12
  16217. MOVOU -128(R10), X5
  16218. MOVOU -112(R10), X6
  16219. MOVQ $0x00000080, R9
  16220. ANDQ $0xffffffe0, AX
  16221. ADDQ $0x20, AX
  16222. MOVOU -96(R10), X7
  16223. MOVOU -80(R10), X8
  16224. MOVQ AX, R11
  16225. SUBQ R12, R11
  16226. MOVOU -64(R10), X9
  16227. MOVOU -48(R10), X10
  16228. SUBQ R11, DI
  16229. MOVOU -32(R10), X11
  16230. MOVOU -16(R10), X12
  16231. VMOVDQU (R8), Y4
  16232. ADDQ R11, R8
  16233. SUBQ R9, DI
  16234. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_gobble_128_loop:
  16235. VMOVDQU (R8), Y0
  16236. VMOVDQU 32(R8), Y1
  16237. VMOVDQU 64(R8), Y2
  16238. VMOVDQU 96(R8), Y3
  16239. ADDQ R9, R8
  16240. VMOVDQA Y0, (AX)
  16241. VMOVDQA Y1, 32(AX)
  16242. VMOVDQA Y2, 64(AX)
  16243. VMOVDQA Y3, 96(AX)
  16244. ADDQ R9, AX
  16245. SUBQ R9, DI
  16246. JA emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_gobble_128_loop
  16247. ADDQ R9, DI
  16248. ADDQ AX, DI
  16249. VMOVDQU Y4, (R12)
  16250. VZEROUPPER
  16251. MOVOU X5, -128(DI)
  16252. MOVOU X6, -112(DI)
  16253. MOVOU X7, -96(DI)
  16254. MOVOU X8, -80(DI)
  16255. MOVOU X9, -64(DI)
  16256. MOVOU X10, -48(DI)
  16257. MOVOU X11, -32(DI)
  16258. MOVOU X12, -16(DI)
  16259. memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12BAvx:
  16260. MOVQ BP, AX
  16261. emit_literal_done_repeat_emit_encodeSnappyBlockAsm12BAvx:
  16262. ADDL $0x05, CX
  16263. MOVL CX, BP
  16264. SUBL 16(SP), BP
  16265. MOVQ src_len+32(FP), DI
  16266. SUBL CX, DI
  16267. LEAQ (DX)(CX*1), R8
  16268. LEAQ (DX)(BP*1), BP
  16269. XORL R10, R10
  16270. CMPL DI, $0x08
  16271. JL matchlen_single_repeat_extend
  16272. matchlen_loopback_repeat_extend:
  16273. MOVQ (R8)(R10*1), R9
  16274. XORQ (BP)(R10*1), R9
  16275. TESTQ R9, R9
  16276. JZ matchlen_loop_repeat_extend
  16277. BSFQ R9, R9
  16278. SARQ $0x03, R9
  16279. LEAL (R10)(R9*1), R10
  16280. JMP repeat_extend_forward_end_encodeSnappyBlockAsm12BAvx
  16281. matchlen_loop_repeat_extend:
  16282. LEAL -8(DI), DI
  16283. LEAL 8(R10), R10
  16284. CMPL DI, $0x08
  16285. JGE matchlen_loopback_repeat_extend
  16286. matchlen_single_repeat_extend:
  16287. TESTL DI, DI
  16288. JZ repeat_extend_forward_end_encodeSnappyBlockAsm12BAvx
  16289. matchlen_single_loopback_repeat_extend:
  16290. MOVB (R8)(R10*1), R9
  16291. CMPB (BP)(R10*1), R9
  16292. JNE repeat_extend_forward_end_encodeSnappyBlockAsm12BAvx
  16293. LEAL 1(R10), R10
  16294. DECL DI
  16295. JNZ matchlen_single_loopback_repeat_extend
  16296. repeat_extend_forward_end_encodeSnappyBlockAsm12BAvx:
  16297. ADDL R10, CX
  16298. MOVL CX, BP
  16299. SUBL SI, BP
  16300. MOVL 16(SP), SI
  16301. CMPL SI, $0x00010000
  16302. JL two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12BAvx
  16303. four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm12BAvx:
  16304. CMPL BP, $0x40
  16305. JLE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm12BAvx
  16306. MOVB $0xff, (AX)
  16307. MOVL SI, 1(AX)
  16308. LEAL -64(BP), BP
  16309. ADDQ $0x05, AX
  16310. CMPL BP, $0x04
  16311. JL four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm12BAvx
  16312. JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm12BAvx
  16313. four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm12BAvx:
  16314. TESTL BP, BP
  16315. JZ repeat_end_emit_encodeSnappyBlockAsm12BAvx
  16316. MOVB $0x03, BL
  16317. LEAL -4(BX)(BP*4), BP
  16318. MOVB BP, (AX)
  16319. MOVL SI, 1(AX)
  16320. ADDQ $0x05, AX
  16321. JMP repeat_end_emit_encodeSnappyBlockAsm12BAvx
  16322. two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12BAvx:
  16323. CMPL BP, $0x40
  16324. JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12BAvx
  16325. MOVB $0xee, (AX)
  16326. MOVW SI, 1(AX)
  16327. LEAL -60(BP), BP
  16328. ADDQ $0x03, AX
  16329. JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12BAvx
  16330. two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12BAvx:
  16331. CMPL BP, $0x0c
  16332. JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12BAvx
  16333. CMPL SI, $0x00000800
  16334. JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12BAvx
  16335. MOVB $0x01, BL
  16336. LEAL -16(BX)(BP*4), BP
  16337. MOVB SI, 1(AX)
  16338. SHRL $0x08, SI
  16339. SHLL $0x05, SI
  16340. ORL SI, BP
  16341. MOVB BP, (AX)
  16342. ADDQ $0x02, AX
  16343. JMP repeat_end_emit_encodeSnappyBlockAsm12BAvx
  16344. emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12BAvx:
  16345. MOVB $0x02, BL
  16346. LEAL -4(BX)(BP*4), BP
  16347. MOVB BP, (AX)
  16348. MOVW SI, 1(AX)
  16349. ADDQ $0x03, AX
  16350. repeat_end_emit_encodeSnappyBlockAsm12BAvx:
  16351. MOVL CX, 12(SP)
  16352. CMPL CX, 8(SP)
  16353. JGE emit_remainder_encodeSnappyBlockAsm12BAvx
  16354. JMP search_loop_encodeSnappyBlockAsm12BAvx
  16355. no_repeat_found_encodeSnappyBlockAsm12BAvx:
  16356. CMPL (DX)(BP*1), SI
  16357. JEQ candidate_match_encodeSnappyBlockAsm12BAvx
  16358. SHRQ $0x08, SI
  16359. MOVL 24(SP)(R9*4), BP
  16360. LEAL 2(CX), R8
  16361. CMPL (DX)(DI*1), SI
  16362. JEQ candidate2_match_encodeSnappyBlockAsm12BAvx
  16363. MOVL R8, 24(SP)(R9*4)
  16364. SHRQ $0x08, SI
  16365. CMPL (DX)(BP*1), SI
  16366. JEQ candidate3_match_encodeSnappyBlockAsm12BAvx
  16367. MOVL 20(SP), CX
  16368. JMP search_loop_encodeSnappyBlockAsm12BAvx
  16369. candidate3_match_encodeSnappyBlockAsm12BAvx:
  16370. ADDL $0x02, CX
  16371. JMP candidate_match_encodeSnappyBlockAsm12BAvx
  16372. candidate2_match_encodeSnappyBlockAsm12BAvx:
  16373. MOVL R8, 24(SP)(R9*4)
  16374. INCL CX
  16375. MOVL DI, BP
  16376. candidate_match_encodeSnappyBlockAsm12BAvx:
  16377. MOVL 12(SP), SI
  16378. TESTL BP, BP
  16379. JZ match_extend_back_end_encodeSnappyBlockAsm12BAvx
  16380. match_extend_back_loop_encodeSnappyBlockAsm12BAvx:
  16381. CMPL CX, SI
  16382. JLE match_extend_back_end_encodeSnappyBlockAsm12BAvx
  16383. MOVB -1(DX)(BP*1), BL
  16384. MOVB -1(DX)(CX*1), DI
  16385. CMPB BL, DI
  16386. JNE match_extend_back_end_encodeSnappyBlockAsm12BAvx
  16387. LEAL -1(CX), CX
  16388. DECL BP
  16389. JZ match_extend_back_end_encodeSnappyBlockAsm12BAvx
  16390. JMP match_extend_back_loop_encodeSnappyBlockAsm12BAvx
  16391. match_extend_back_end_encodeSnappyBlockAsm12BAvx:
  16392. MOVL CX, SI
  16393. SUBL 12(SP), SI
  16394. LEAQ 4(AX)(SI*1), SI
  16395. CMPQ SI, (SP)
  16396. JL match_dst_size_check_encodeSnappyBlockAsm12BAvx
  16397. MOVQ $0x00000000, ret+48(FP)
  16398. RET
  16399. match_dst_size_check_encodeSnappyBlockAsm12BAvx:
  16400. MOVL CX, SI
  16401. MOVL 12(SP), DI
  16402. CMPL DI, SI
  16403. JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm12BAvx
  16404. MOVL SI, R8
  16405. MOVL SI, 12(SP)
  16406. LEAQ (DX)(DI*1), SI
  16407. SUBL DI, R8
  16408. MOVL R8, DI
  16409. SUBL $0x01, DI
  16410. JC emit_literal_done_match_emit_encodeSnappyBlockAsm12BAvx
  16411. CMPL DI, $0x3c
  16412. JLT one_byte_match_emit_encodeSnappyBlockAsm12BAvx
  16413. CMPL DI, $0x00000100
  16414. JLT two_bytes_match_emit_encodeSnappyBlockAsm12BAvx
  16415. CMPL DI, $0x00010000
  16416. JLT three_bytes_match_emit_encodeSnappyBlockAsm12BAvx
  16417. CMPL DI, $0x01000000
  16418. JLT four_bytes_match_emit_encodeSnappyBlockAsm12BAvx
  16419. MOVB $0xfc, (AX)
  16420. MOVL DI, 1(AX)
  16421. ADDQ $0x05, AX
  16422. JMP memmove_match_emit_encodeSnappyBlockAsm12BAvx
  16423. four_bytes_match_emit_encodeSnappyBlockAsm12BAvx:
  16424. MOVL DI, R9
  16425. SHRL $0x10, R9
  16426. MOVB $0xf8, (AX)
  16427. MOVW DI, 1(AX)
  16428. MOVB R9, 3(AX)
  16429. ADDQ $0x04, AX
  16430. JMP memmove_match_emit_encodeSnappyBlockAsm12BAvx
  16431. three_bytes_match_emit_encodeSnappyBlockAsm12BAvx:
  16432. MOVB $0xf4, (AX)
  16433. MOVW DI, 1(AX)
  16434. ADDQ $0x03, AX
  16435. JMP memmove_match_emit_encodeSnappyBlockAsm12BAvx
  16436. two_bytes_match_emit_encodeSnappyBlockAsm12BAvx:
  16437. MOVB $0xf0, (AX)
  16438. MOVB DI, 1(AX)
  16439. ADDQ $0x02, AX
  16440. JMP memmove_match_emit_encodeSnappyBlockAsm12BAvx
  16441. one_byte_match_emit_encodeSnappyBlockAsm12BAvx:
  16442. SHLB $0x02, DI
  16443. MOVB DI, (AX)
  16444. ADDQ $0x01, AX
  16445. memmove_match_emit_encodeSnappyBlockAsm12BAvx:
  16446. LEAQ (AX)(R8*1), DI
  16447. NOP
  16448. emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_tail:
  16449. TESTQ R8, R8
  16450. JEQ memmove_end_copy_match_emit_encodeSnappyBlockAsm12BAvx
  16451. CMPQ R8, $0x02
  16452. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_1or2
  16453. CMPQ R8, $0x04
  16454. JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_3
  16455. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_4
  16456. CMPQ R8, $0x08
  16457. JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_5through7
  16458. JE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_8
  16459. CMPQ R8, $0x10
  16460. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_9through16
  16461. CMPQ R8, $0x20
  16462. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_17through32
  16463. CMPQ R8, $0x40
  16464. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_33through64
  16465. CMPQ R8, $0x80
  16466. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_65through128
  16467. CMPQ R8, $0x00000100
  16468. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_129through256
  16469. JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_avxUnaligned
  16470. emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_1or2:
  16471. MOVB (SI), R9
  16472. MOVB -1(SI)(R8*1), R10
  16473. MOVB R9, (AX)
  16474. MOVB R10, -1(AX)(R8*1)
  16475. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12BAvx
  16476. emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_4:
  16477. MOVL (SI), R9
  16478. MOVL R9, (AX)
  16479. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12BAvx
  16480. emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_3:
  16481. MOVW (SI), R9
  16482. MOVB 2(SI), R10
  16483. MOVW R9, (AX)
  16484. MOVB R10, 2(AX)
  16485. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12BAvx
  16486. emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_5through7:
  16487. MOVL (SI), R9
  16488. MOVL -4(SI)(R8*1), R10
  16489. MOVL R9, (AX)
  16490. MOVL R10, -4(AX)(R8*1)
  16491. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12BAvx
  16492. emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_8:
  16493. MOVQ (SI), R9
  16494. MOVQ R9, (AX)
  16495. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12BAvx
  16496. emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_9through16:
  16497. MOVQ (SI), R9
  16498. MOVQ -8(SI)(R8*1), R10
  16499. MOVQ R9, (AX)
  16500. MOVQ R10, -8(AX)(R8*1)
  16501. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12BAvx
  16502. emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_17through32:
  16503. MOVOU (SI), X0
  16504. MOVOU -16(SI)(R8*1), X1
  16505. MOVOU X0, (AX)
  16506. MOVOU X1, -16(AX)(R8*1)
  16507. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12BAvx
  16508. emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_33through64:
  16509. MOVOU (SI), X0
  16510. MOVOU 16(SI), X1
  16511. MOVOU -32(SI)(R8*1), X2
  16512. MOVOU -16(SI)(R8*1), X3
  16513. MOVOU X0, (AX)
  16514. MOVOU X1, 16(AX)
  16515. MOVOU X2, -32(AX)(R8*1)
  16516. MOVOU X3, -16(AX)(R8*1)
  16517. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12BAvx
  16518. emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_65through128:
  16519. MOVOU (SI), X0
  16520. MOVOU 16(SI), X1
  16521. MOVOU 32(SI), X2
  16522. MOVOU 48(SI), X3
  16523. MOVOU -64(SI)(R8*1), X12
  16524. MOVOU -48(SI)(R8*1), X13
  16525. MOVOU -32(SI)(R8*1), X14
  16526. MOVOU -16(SI)(R8*1), X15
  16527. MOVOU X0, (AX)
  16528. MOVOU X1, 16(AX)
  16529. MOVOU X2, 32(AX)
  16530. MOVOU X3, 48(AX)
  16531. MOVOU X12, -64(AX)(R8*1)
  16532. MOVOU X13, -48(AX)(R8*1)
  16533. MOVOU X14, -32(AX)(R8*1)
  16534. MOVOU X15, -16(AX)(R8*1)
  16535. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12BAvx
  16536. emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_129through256:
  16537. MOVOU (SI), X0
  16538. MOVOU 16(SI), X1
  16539. MOVOU 32(SI), X2
  16540. MOVOU 48(SI), X3
  16541. MOVOU 64(SI), X4
  16542. MOVOU 80(SI), X5
  16543. MOVOU 96(SI), X6
  16544. MOVOU 112(SI), X7
  16545. MOVOU -128(SI)(R8*1), X8
  16546. MOVOU -112(SI)(R8*1), X9
  16547. MOVOU -96(SI)(R8*1), X10
  16548. MOVOU -80(SI)(R8*1), X11
  16549. MOVOU -64(SI)(R8*1), X12
  16550. MOVOU -48(SI)(R8*1), X13
  16551. MOVOU -32(SI)(R8*1), X14
  16552. MOVOU -16(SI)(R8*1), X15
  16553. MOVOU X0, (AX)
  16554. MOVOU X1, 16(AX)
  16555. MOVOU X2, 32(AX)
  16556. MOVOU X3, 48(AX)
  16557. MOVOU X4, 64(AX)
  16558. MOVOU X5, 80(AX)
  16559. MOVOU X6, 96(AX)
  16560. MOVOU X7, 112(AX)
  16561. MOVOU X8, -128(AX)(R8*1)
  16562. MOVOU X9, -112(AX)(R8*1)
  16563. MOVOU X10, -96(AX)(R8*1)
  16564. MOVOU X11, -80(AX)(R8*1)
  16565. MOVOU X12, -64(AX)(R8*1)
  16566. MOVOU X13, -48(AX)(R8*1)
  16567. MOVOU X14, -32(AX)(R8*1)
  16568. MOVOU X15, -16(AX)(R8*1)
  16569. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12BAvx
  16570. emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_256through2048:
  16571. LEAQ -256(R8), R8
  16572. MOVOU (SI), X0
  16573. MOVOU 16(SI), X1
  16574. MOVOU 32(SI), X2
  16575. MOVOU 48(SI), X3
  16576. MOVOU 64(SI), X4
  16577. MOVOU 80(SI), X5
  16578. MOVOU 96(SI), X6
  16579. MOVOU 112(SI), X7
  16580. MOVOU 128(SI), X8
  16581. MOVOU 144(SI), X9
  16582. MOVOU 160(SI), X10
  16583. MOVOU 176(SI), X11
  16584. MOVOU 192(SI), X12
  16585. MOVOU 208(SI), X13
  16586. MOVOU 224(SI), X14
  16587. MOVOU 240(SI), X15
  16588. MOVOU X0, (AX)
  16589. MOVOU X1, 16(AX)
  16590. MOVOU X2, 32(AX)
  16591. MOVOU X3, 48(AX)
  16592. MOVOU X4, 64(AX)
  16593. MOVOU X5, 80(AX)
  16594. MOVOU X6, 96(AX)
  16595. MOVOU X7, 112(AX)
  16596. MOVOU X8, 128(AX)
  16597. MOVOU X9, 144(AX)
  16598. MOVOU X10, 160(AX)
  16599. MOVOU X11, 176(AX)
  16600. MOVOU X12, 192(AX)
  16601. MOVOU X13, 208(AX)
  16602. MOVOU X14, 224(AX)
  16603. MOVOU X15, 240(AX)
  16604. CMPQ R8, $0x00000100
  16605. LEAQ 256(SI), SI
  16606. LEAQ 256(AX), AX
  16607. JGE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_256through2048
  16608. JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_tail
  16609. emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_avxUnaligned:
  16610. LEAQ (SI)(R8*1), R10
  16611. MOVQ AX, R12
  16612. MOVOU -128(R10), X5
  16613. MOVOU -112(R10), X6
  16614. MOVQ $0x00000080, R9
  16615. ANDQ $0xffffffe0, AX
  16616. ADDQ $0x20, AX
  16617. MOVOU -96(R10), X7
  16618. MOVOU -80(R10), X8
  16619. MOVQ AX, R11
  16620. SUBQ R12, R11
  16621. MOVOU -64(R10), X9
  16622. MOVOU -48(R10), X10
  16623. SUBQ R11, R8
  16624. MOVOU -32(R10), X11
  16625. MOVOU -16(R10), X12
  16626. VMOVDQU (SI), Y4
  16627. ADDQ R11, SI
  16628. SUBQ R9, R8
  16629. emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_gobble_128_loop:
  16630. VMOVDQU (SI), Y0
  16631. VMOVDQU 32(SI), Y1
  16632. VMOVDQU 64(SI), Y2
  16633. VMOVDQU 96(SI), Y3
  16634. ADDQ R9, SI
  16635. VMOVDQA Y0, (AX)
  16636. VMOVDQA Y1, 32(AX)
  16637. VMOVDQA Y2, 64(AX)
  16638. VMOVDQA Y3, 96(AX)
  16639. ADDQ R9, AX
  16640. SUBQ R9, R8
  16641. JA emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_gobble_128_loop
  16642. ADDQ R9, R8
  16643. ADDQ AX, R8
  16644. VMOVDQU Y4, (R12)
  16645. VZEROUPPER
  16646. MOVOU X5, -128(R8)
  16647. MOVOU X6, -112(R8)
  16648. MOVOU X7, -96(R8)
  16649. MOVOU X8, -80(R8)
  16650. MOVOU X9, -64(R8)
  16651. MOVOU X10, -48(R8)
  16652. MOVOU X11, -32(R8)
  16653. MOVOU X12, -16(R8)
  16654. memmove_end_copy_match_emit_encodeSnappyBlockAsm12BAvx:
  16655. MOVQ DI, AX
  16656. emit_literal_done_match_emit_encodeSnappyBlockAsm12BAvx:
  16657. match_nolit_loop_encodeSnappyBlockAsm12BAvx:
  16658. MOVL CX, SI
  16659. SUBL BP, SI
  16660. MOVL SI, 16(SP)
  16661. ADDL $0x04, CX
  16662. ADDL $0x04, BP
  16663. MOVQ src_len+32(FP), SI
  16664. SUBL CX, SI
  16665. LEAQ (DX)(CX*1), DI
  16666. LEAQ (DX)(BP*1), BP
  16667. XORL R9, R9
  16668. CMPL SI, $0x08
  16669. JL matchlen_single_match_nolit_encodeSnappyBlockAsm12BAvx
  16670. matchlen_loopback_match_nolit_encodeSnappyBlockAsm12BAvx:
  16671. MOVQ (DI)(R9*1), R8
  16672. XORQ (BP)(R9*1), R8
  16673. TESTQ R8, R8
  16674. JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm12BAvx
  16675. BSFQ R8, R8
  16676. SARQ $0x03, R8
  16677. LEAL (R9)(R8*1), R9
  16678. JMP match_nolit_end_encodeSnappyBlockAsm12BAvx
  16679. matchlen_loop_match_nolit_encodeSnappyBlockAsm12BAvx:
  16680. LEAL -8(SI), SI
  16681. LEAL 8(R9), R9
  16682. CMPL SI, $0x08
  16683. JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm12BAvx
  16684. matchlen_single_match_nolit_encodeSnappyBlockAsm12BAvx:
  16685. TESTL SI, SI
  16686. JZ match_nolit_end_encodeSnappyBlockAsm12BAvx
  16687. matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm12BAvx:
  16688. MOVB (DI)(R9*1), R8
  16689. CMPB (BP)(R9*1), R8
  16690. JNE match_nolit_end_encodeSnappyBlockAsm12BAvx
  16691. LEAL 1(R9), R9
  16692. DECL SI
  16693. JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm12BAvx
  16694. match_nolit_end_encodeSnappyBlockAsm12BAvx:
  16695. ADDL R9, CX
  16696. MOVL 16(SP), BP
  16697. ADDL $0x04, R9
  16698. CMPL BP, $0x00010000
  16699. JL two_byte_offset_match_nolit_encodeSnappyBlockAsm12BAvx
  16700. four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm12BAvx:
  16701. CMPL R9, $0x40
  16702. JLE four_bytes_remain_match_nolit_encodeSnappyBlockAsm12BAvx
  16703. MOVB $0xff, (AX)
  16704. MOVL BP, 1(AX)
  16705. LEAL -64(R9), R9
  16706. ADDQ $0x05, AX
  16707. CMPL R9, $0x04
  16708. JL four_bytes_remain_match_nolit_encodeSnappyBlockAsm12BAvx
  16709. JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm12BAvx
  16710. four_bytes_remain_match_nolit_encodeSnappyBlockAsm12BAvx:
  16711. TESTL R9, R9
  16712. JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm12BAvx
  16713. MOVB $0x03, BL
  16714. LEAL -4(BX)(R9*4), R9
  16715. MOVB R9, (AX)
  16716. MOVL BP, 1(AX)
  16717. ADDQ $0x05, AX
  16718. JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm12BAvx
  16719. two_byte_offset_match_nolit_encodeSnappyBlockAsm12BAvx:
  16720. CMPL R9, $0x40
  16721. JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12BAvx
  16722. MOVB $0xee, (AX)
  16723. MOVW BP, 1(AX)
  16724. LEAL -60(R9), R9
  16725. ADDQ $0x03, AX
  16726. JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm12BAvx
  16727. two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12BAvx:
  16728. CMPL R9, $0x0c
  16729. JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm12BAvx
  16730. CMPL BP, $0x00000800
  16731. JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm12BAvx
  16732. MOVB $0x01, BL
  16733. LEAL -16(BX)(R9*4), R9
  16734. MOVB BP, 1(AX)
  16735. SHRL $0x08, BP
  16736. SHLL $0x05, BP
  16737. ORL BP, R9
  16738. MOVB R9, (AX)
  16739. ADDQ $0x02, AX
  16740. JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm12BAvx
  16741. emit_copy_three_match_nolit_encodeSnappyBlockAsm12BAvx:
  16742. MOVB $0x02, BL
  16743. LEAL -4(BX)(R9*4), R9
  16744. MOVB R9, (AX)
  16745. MOVW BP, 1(AX)
  16746. ADDQ $0x03, AX
  16747. match_nolit_emitcopy_end_encodeSnappyBlockAsm12BAvx:
  16748. MOVL CX, 12(SP)
  16749. CMPL CX, 8(SP)
  16750. JGE emit_remainder_encodeSnappyBlockAsm12BAvx
  16751. CMPQ AX, (SP)
  16752. JL match_nolit_dst_ok_encodeSnappyBlockAsm12BAvx
  16753. MOVQ $0x00000000, ret+48(FP)
  16754. RET
  16755. match_nolit_dst_ok_encodeSnappyBlockAsm12BAvx:
  16756. MOVQ -2(DX)(CX*1), SI
  16757. MOVQ $0x000000cf1bbcdcbb, BP
  16758. MOVQ SI, DI
  16759. SHRQ $0x10, SI
  16760. MOVQ SI, R8
  16761. SHLQ $0x18, DI
  16762. IMULQ BP, DI
  16763. SHRQ $0x34, DI
  16764. SHLQ $0x18, R8
  16765. IMULQ BP, R8
  16766. SHRQ $0x34, R8
  16767. LEAL -2(CX), R9
  16768. MOVL 24(SP)(R8*4), BP
  16769. MOVL R9, 24(SP)(DI*4)
  16770. MOVL CX, 24(SP)(R8*4)
  16771. CMPL (DX)(BP*1), SI
  16772. JEQ match_nolit_loop_encodeSnappyBlockAsm12BAvx
  16773. INCL CX
  16774. JMP search_loop_encodeSnappyBlockAsm12BAvx
  16775. emit_remainder_encodeSnappyBlockAsm12BAvx:
  16776. MOVQ src_len+32(FP), CX
  16777. SUBL 12(SP), CX
  16778. LEAQ 4(AX)(CX*1), CX
  16779. CMPQ CX, (SP)
  16780. JL emit_remainder_ok_encodeSnappyBlockAsm12BAvx
  16781. MOVQ $0x00000000, ret+48(FP)
  16782. RET
  16783. emit_remainder_ok_encodeSnappyBlockAsm12BAvx:
  16784. MOVQ src_len+32(FP), CX
  16785. MOVL 12(SP), BX
  16786. CMPL BX, CX
  16787. JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm12BAvx
  16788. MOVL CX, BP
  16789. MOVL CX, 12(SP)
  16790. LEAQ (DX)(BX*1), CX
  16791. SUBL BX, BP
  16792. MOVL BP, DX
  16793. SUBL $0x01, DX
  16794. JC emit_literal_done_emit_remainder_encodeSnappyBlockAsm12BAvx
  16795. CMPL DX, $0x3c
  16796. JLT one_byte_emit_remainder_encodeSnappyBlockAsm12BAvx
  16797. CMPL DX, $0x00000100
  16798. JLT two_bytes_emit_remainder_encodeSnappyBlockAsm12BAvx
  16799. CMPL DX, $0x00010000
  16800. JLT three_bytes_emit_remainder_encodeSnappyBlockAsm12BAvx
  16801. CMPL DX, $0x01000000
  16802. JLT four_bytes_emit_remainder_encodeSnappyBlockAsm12BAvx
  16803. MOVB $0xfc, (AX)
  16804. MOVL DX, 1(AX)
  16805. ADDQ $0x05, AX
  16806. JMP memmove_emit_remainder_encodeSnappyBlockAsm12BAvx
  16807. four_bytes_emit_remainder_encodeSnappyBlockAsm12BAvx:
  16808. MOVL DX, BX
  16809. SHRL $0x10, BX
  16810. MOVB $0xf8, (AX)
  16811. MOVW DX, 1(AX)
  16812. MOVB BL, 3(AX)
  16813. ADDQ $0x04, AX
  16814. JMP memmove_emit_remainder_encodeSnappyBlockAsm12BAvx
  16815. three_bytes_emit_remainder_encodeSnappyBlockAsm12BAvx:
  16816. MOVB $0xf4, (AX)
  16817. MOVW DX, 1(AX)
  16818. ADDQ $0x03, AX
  16819. JMP memmove_emit_remainder_encodeSnappyBlockAsm12BAvx
  16820. two_bytes_emit_remainder_encodeSnappyBlockAsm12BAvx:
  16821. MOVB $0xf0, (AX)
  16822. MOVB DL, 1(AX)
  16823. ADDQ $0x02, AX
  16824. JMP memmove_emit_remainder_encodeSnappyBlockAsm12BAvx
  16825. one_byte_emit_remainder_encodeSnappyBlockAsm12BAvx:
  16826. SHLB $0x02, DL
  16827. MOVB DL, (AX)
  16828. ADDQ $0x01, AX
  16829. memmove_emit_remainder_encodeSnappyBlockAsm12BAvx:
  16830. LEAQ (AX)(BP*1), DX
  16831. MOVL BP, BX
  16832. NOP
  16833. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_tail:
  16834. TESTQ BX, BX
  16835. JEQ memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12BAvx
  16836. CMPQ BX, $0x02
  16837. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_1or2
  16838. CMPQ BX, $0x04
  16839. JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_3
  16840. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_4
  16841. CMPQ BX, $0x08
  16842. JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_5through7
  16843. JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_8
  16844. CMPQ BX, $0x10
  16845. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_9through16
  16846. CMPQ BX, $0x20
  16847. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_17through32
  16848. CMPQ BX, $0x40
  16849. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_33through64
  16850. CMPQ BX, $0x80
  16851. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_65through128
  16852. CMPQ BX, $0x00000100
  16853. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_129through256
  16854. JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_avxUnaligned
  16855. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_1or2:
  16856. MOVB (CX), BP
  16857. MOVB -1(CX)(BX*1), SI
  16858. MOVB BP, (AX)
  16859. MOVB SI, -1(AX)(BX*1)
  16860. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12BAvx
  16861. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_4:
  16862. MOVL (CX), BP
  16863. MOVL BP, (AX)
  16864. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12BAvx
  16865. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_3:
  16866. MOVW (CX), BP
  16867. MOVB 2(CX), SI
  16868. MOVW BP, (AX)
  16869. MOVB SI, 2(AX)
  16870. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12BAvx
  16871. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_5through7:
  16872. MOVL (CX), BP
  16873. MOVL -4(CX)(BX*1), SI
  16874. MOVL BP, (AX)
  16875. MOVL SI, -4(AX)(BX*1)
  16876. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12BAvx
  16877. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_8:
  16878. MOVQ (CX), BP
  16879. MOVQ BP, (AX)
  16880. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12BAvx
  16881. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_9through16:
  16882. MOVQ (CX), BP
  16883. MOVQ -8(CX)(BX*1), SI
  16884. MOVQ BP, (AX)
  16885. MOVQ SI, -8(AX)(BX*1)
  16886. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12BAvx
  16887. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_17through32:
  16888. MOVOU (CX), X0
  16889. MOVOU -16(CX)(BX*1), X1
  16890. MOVOU X0, (AX)
  16891. MOVOU X1, -16(AX)(BX*1)
  16892. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12BAvx
  16893. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_33through64:
  16894. MOVOU (CX), X0
  16895. MOVOU 16(CX), X1
  16896. MOVOU -32(CX)(BX*1), X2
  16897. MOVOU -16(CX)(BX*1), X3
  16898. MOVOU X0, (AX)
  16899. MOVOU X1, 16(AX)
  16900. MOVOU X2, -32(AX)(BX*1)
  16901. MOVOU X3, -16(AX)(BX*1)
  16902. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12BAvx
  16903. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_65through128:
  16904. MOVOU (CX), X0
  16905. MOVOU 16(CX), X1
  16906. MOVOU 32(CX), X2
  16907. MOVOU 48(CX), X3
  16908. MOVOU -64(CX)(BX*1), X12
  16909. MOVOU -48(CX)(BX*1), X13
  16910. MOVOU -32(CX)(BX*1), X14
  16911. MOVOU -16(CX)(BX*1), X15
  16912. MOVOU X0, (AX)
  16913. MOVOU X1, 16(AX)
  16914. MOVOU X2, 32(AX)
  16915. MOVOU X3, 48(AX)
  16916. MOVOU X12, -64(AX)(BX*1)
  16917. MOVOU X13, -48(AX)(BX*1)
  16918. MOVOU X14, -32(AX)(BX*1)
  16919. MOVOU X15, -16(AX)(BX*1)
  16920. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12BAvx
  16921. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_129through256:
  16922. MOVOU (CX), X0
  16923. MOVOU 16(CX), X1
  16924. MOVOU 32(CX), X2
  16925. MOVOU 48(CX), X3
  16926. MOVOU 64(CX), X4
  16927. MOVOU 80(CX), X5
  16928. MOVOU 96(CX), X6
  16929. MOVOU 112(CX), X7
  16930. MOVOU -128(CX)(BX*1), X8
  16931. MOVOU -112(CX)(BX*1), X9
  16932. MOVOU -96(CX)(BX*1), X10
  16933. MOVOU -80(CX)(BX*1), X11
  16934. MOVOU -64(CX)(BX*1), X12
  16935. MOVOU -48(CX)(BX*1), X13
  16936. MOVOU -32(CX)(BX*1), X14
  16937. MOVOU -16(CX)(BX*1), X15
  16938. MOVOU X0, (AX)
  16939. MOVOU X1, 16(AX)
  16940. MOVOU X2, 32(AX)
  16941. MOVOU X3, 48(AX)
  16942. MOVOU X4, 64(AX)
  16943. MOVOU X5, 80(AX)
  16944. MOVOU X6, 96(AX)
  16945. MOVOU X7, 112(AX)
  16946. MOVOU X8, -128(AX)(BX*1)
  16947. MOVOU X9, -112(AX)(BX*1)
  16948. MOVOU X10, -96(AX)(BX*1)
  16949. MOVOU X11, -80(AX)(BX*1)
  16950. MOVOU X12, -64(AX)(BX*1)
  16951. MOVOU X13, -48(AX)(BX*1)
  16952. MOVOU X14, -32(AX)(BX*1)
  16953. MOVOU X15, -16(AX)(BX*1)
  16954. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12BAvx
  16955. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_256through2048:
  16956. LEAQ -256(BX), BX
  16957. MOVOU (CX), X0
  16958. MOVOU 16(CX), X1
  16959. MOVOU 32(CX), X2
  16960. MOVOU 48(CX), X3
  16961. MOVOU 64(CX), X4
  16962. MOVOU 80(CX), X5
  16963. MOVOU 96(CX), X6
  16964. MOVOU 112(CX), X7
  16965. MOVOU 128(CX), X8
  16966. MOVOU 144(CX), X9
  16967. MOVOU 160(CX), X10
  16968. MOVOU 176(CX), X11
  16969. MOVOU 192(CX), X12
  16970. MOVOU 208(CX), X13
  16971. MOVOU 224(CX), X14
  16972. MOVOU 240(CX), X15
  16973. MOVOU X0, (AX)
  16974. MOVOU X1, 16(AX)
  16975. MOVOU X2, 32(AX)
  16976. MOVOU X3, 48(AX)
  16977. MOVOU X4, 64(AX)
  16978. MOVOU X5, 80(AX)
  16979. MOVOU X6, 96(AX)
  16980. MOVOU X7, 112(AX)
  16981. MOVOU X8, 128(AX)
  16982. MOVOU X9, 144(AX)
  16983. MOVOU X10, 160(AX)
  16984. MOVOU X11, 176(AX)
  16985. MOVOU X12, 192(AX)
  16986. MOVOU X13, 208(AX)
  16987. MOVOU X14, 224(AX)
  16988. MOVOU X15, 240(AX)
  16989. CMPQ BX, $0x00000100
  16990. LEAQ 256(CX), CX
  16991. LEAQ 256(AX), AX
  16992. JGE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_256through2048
  16993. JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_tail
  16994. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_avxUnaligned:
  16995. LEAQ (CX)(BX*1), SI
  16996. MOVQ AX, R8
  16997. MOVOU -128(SI), X5
  16998. MOVOU -112(SI), X6
  16999. MOVQ $0x00000080, BP
  17000. ANDQ $0xffffffe0, AX
  17001. ADDQ $0x20, AX
  17002. MOVOU -96(SI), X7
  17003. MOVOU -80(SI), X8
  17004. MOVQ AX, DI
  17005. SUBQ R8, DI
  17006. MOVOU -64(SI), X9
  17007. MOVOU -48(SI), X10
  17008. SUBQ DI, BX
  17009. MOVOU -32(SI), X11
  17010. MOVOU -16(SI), X12
  17011. VMOVDQU (CX), Y4
  17012. ADDQ DI, CX
  17013. SUBQ BP, BX
  17014. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_gobble_128_loop:
  17015. VMOVDQU (CX), Y0
  17016. VMOVDQU 32(CX), Y1
  17017. VMOVDQU 64(CX), Y2
  17018. VMOVDQU 96(CX), Y3
  17019. ADDQ BP, CX
  17020. VMOVDQA Y0, (AX)
  17021. VMOVDQA Y1, 32(AX)
  17022. VMOVDQA Y2, 64(AX)
  17023. VMOVDQA Y3, 96(AX)
  17024. ADDQ BP, AX
  17025. SUBQ BP, BX
  17026. JA emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_gobble_128_loop
  17027. ADDQ BP, BX
  17028. ADDQ AX, BX
  17029. VMOVDQU Y4, (R8)
  17030. VZEROUPPER
  17031. MOVOU X5, -128(BX)
  17032. MOVOU X6, -112(BX)
  17033. MOVOU X7, -96(BX)
  17034. MOVOU X8, -80(BX)
  17035. MOVOU X9, -64(BX)
  17036. MOVOU X10, -48(BX)
  17037. MOVOU X11, -32(BX)
  17038. MOVOU X12, -16(BX)
  17039. memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12BAvx:
  17040. MOVQ DX, AX
  17041. emit_literal_done_emit_remainder_encodeSnappyBlockAsm12BAvx:
  17042. MOVQ dst_base+0(FP), CX
  17043. SUBQ CX, AX
  17044. MOVQ AX, ret+48(FP)
  17045. RET
  17046. // func encodeSnappyBlockAsm10BAvx(dst []byte, src []byte) int
  17047. // Requires: AVX, SSE2
  17048. TEXT ·encodeSnappyBlockAsm10BAvx(SB), $4120-56
  17049. MOVQ dst_base+0(FP), AX
  17050. MOVQ $0x00000020, CX
  17051. LEAQ 24(SP), DX
  17052. PXOR X0, X0
  17053. zero_loop_encodeSnappyBlockAsm10BAvx:
  17054. MOVOU X0, (DX)
  17055. MOVOU X0, 16(DX)
  17056. MOVOU X0, 32(DX)
  17057. MOVOU X0, 48(DX)
  17058. MOVOU X0, 64(DX)
  17059. MOVOU X0, 80(DX)
  17060. MOVOU X0, 96(DX)
  17061. MOVOU X0, 112(DX)
  17062. ADDQ $0x80, DX
  17063. DECQ CX
  17064. JNZ zero_loop_encodeSnappyBlockAsm10BAvx
  17065. MOVL $0x00000000, 12(SP)
  17066. MOVQ src_len+32(FP), CX
  17067. LEAQ -5(CX), DX
  17068. LEAQ -8(CX), BP
  17069. MOVL BP, 8(SP)
  17070. SHRQ $0x05, CX
  17071. SUBL CX, DX
  17072. LEAQ (AX)(DX*1), DX
  17073. MOVQ DX, (SP)
  17074. MOVL $0x00000001, CX
  17075. MOVL CX, 16(SP)
  17076. MOVQ src_base+24(FP), DX
  17077. search_loop_encodeSnappyBlockAsm10BAvx:
  17078. MOVQ (DX)(CX*1), SI
  17079. MOVL CX, BP
  17080. SUBL 12(SP), BP
  17081. SHRL $0x05, BP
  17082. LEAL 4(CX)(BP*1), BP
  17083. MOVL 8(SP), DI
  17084. CMPL BP, DI
  17085. JGT emit_remainder_encodeSnappyBlockAsm10BAvx
  17086. MOVL BP, 20(SP)
  17087. MOVQ $0x9e3779b1, R8
  17088. MOVQ SI, R9
  17089. MOVQ SI, R10
  17090. SHRQ $0x08, R10
  17091. SHLQ $0x20, R9
  17092. IMULQ R8, R9
  17093. SHRQ $0x36, R9
  17094. SHLQ $0x20, R10
  17095. IMULQ R8, R10
  17096. SHRQ $0x36, R10
  17097. MOVL 24(SP)(R9*4), BP
  17098. MOVL 24(SP)(R10*4), DI
  17099. MOVL CX, 24(SP)(R9*4)
  17100. LEAL 1(CX), R9
  17101. MOVL R9, 24(SP)(R10*4)
  17102. MOVQ SI, R9
  17103. SHRQ $0x10, R9
  17104. SHLQ $0x20, R9
  17105. IMULQ R8, R9
  17106. SHRQ $0x36, R9
  17107. MOVL CX, R8
  17108. SUBL 16(SP), R8
  17109. MOVL 1(DX)(R8*1), R10
  17110. MOVQ SI, R8
  17111. SHRQ $0x08, R8
  17112. CMPL R8, R10
  17113. JNE no_repeat_found_encodeSnappyBlockAsm10BAvx
  17114. LEAL 1(CX), SI
  17115. MOVL 12(SP), BP
  17116. MOVL SI, DI
  17117. SUBL 16(SP), DI
  17118. JZ repeat_extend_back_end_encodeSnappyBlockAsm10BAvx
  17119. repeat_extend_back_loop_encodeSnappyBlockAsm10BAvx:
  17120. CMPL SI, BP
  17121. JLE repeat_extend_back_end_encodeSnappyBlockAsm10BAvx
  17122. MOVB -1(DX)(DI*1), BL
  17123. MOVB -1(DX)(SI*1), R8
  17124. CMPB BL, R8
  17125. JNE repeat_extend_back_end_encodeSnappyBlockAsm10BAvx
  17126. LEAL -1(SI), SI
  17127. DECL DI
  17128. JNZ repeat_extend_back_loop_encodeSnappyBlockAsm10BAvx
  17129. repeat_extend_back_end_encodeSnappyBlockAsm10BAvx:
  17130. MOVL 12(SP), BP
  17131. CMPL BP, SI
  17132. JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm10BAvx
  17133. MOVL SI, DI
  17134. MOVL SI, 12(SP)
  17135. LEAQ (DX)(BP*1), R8
  17136. SUBL BP, DI
  17137. MOVL DI, BP
  17138. SUBL $0x01, BP
  17139. JC emit_literal_done_repeat_emit_encodeSnappyBlockAsm10BAvx
  17140. CMPL BP, $0x3c
  17141. JLT one_byte_repeat_emit_encodeSnappyBlockAsm10BAvx
  17142. CMPL BP, $0x00000100
  17143. JLT two_bytes_repeat_emit_encodeSnappyBlockAsm10BAvx
  17144. CMPL BP, $0x00010000
  17145. JLT three_bytes_repeat_emit_encodeSnappyBlockAsm10BAvx
  17146. CMPL BP, $0x01000000
  17147. JLT four_bytes_repeat_emit_encodeSnappyBlockAsm10BAvx
  17148. MOVB $0xfc, (AX)
  17149. MOVL BP, 1(AX)
  17150. ADDQ $0x05, AX
  17151. JMP memmove_repeat_emit_encodeSnappyBlockAsm10BAvx
  17152. four_bytes_repeat_emit_encodeSnappyBlockAsm10BAvx:
  17153. MOVL BP, R9
  17154. SHRL $0x10, R9
  17155. MOVB $0xf8, (AX)
  17156. MOVW BP, 1(AX)
  17157. MOVB R9, 3(AX)
  17158. ADDQ $0x04, AX
  17159. JMP memmove_repeat_emit_encodeSnappyBlockAsm10BAvx
  17160. three_bytes_repeat_emit_encodeSnappyBlockAsm10BAvx:
  17161. MOVB $0xf4, (AX)
  17162. MOVW BP, 1(AX)
  17163. ADDQ $0x03, AX
  17164. JMP memmove_repeat_emit_encodeSnappyBlockAsm10BAvx
  17165. two_bytes_repeat_emit_encodeSnappyBlockAsm10BAvx:
  17166. MOVB $0xf0, (AX)
  17167. MOVB BP, 1(AX)
  17168. ADDQ $0x02, AX
  17169. JMP memmove_repeat_emit_encodeSnappyBlockAsm10BAvx
  17170. one_byte_repeat_emit_encodeSnappyBlockAsm10BAvx:
  17171. SHLB $0x02, BP
  17172. MOVB BP, (AX)
  17173. ADDQ $0x01, AX
  17174. memmove_repeat_emit_encodeSnappyBlockAsm10BAvx:
  17175. LEAQ (AX)(DI*1), BP
  17176. NOP
  17177. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_tail:
  17178. TESTQ DI, DI
  17179. JEQ memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10BAvx
  17180. CMPQ DI, $0x02
  17181. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_1or2
  17182. CMPQ DI, $0x04
  17183. JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_3
  17184. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_4
  17185. CMPQ DI, $0x08
  17186. JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_5through7
  17187. JE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_8
  17188. CMPQ DI, $0x10
  17189. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_9through16
  17190. CMPQ DI, $0x20
  17191. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_17through32
  17192. CMPQ DI, $0x40
  17193. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_33through64
  17194. CMPQ DI, $0x80
  17195. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_65through128
  17196. CMPQ DI, $0x00000100
  17197. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_129through256
  17198. JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_avxUnaligned
  17199. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_1or2:
  17200. MOVB (R8), R9
  17201. MOVB -1(R8)(DI*1), R10
  17202. MOVB R9, (AX)
  17203. MOVB R10, -1(AX)(DI*1)
  17204. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10BAvx
  17205. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_4:
  17206. MOVL (R8), R9
  17207. MOVL R9, (AX)
  17208. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10BAvx
  17209. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_3:
  17210. MOVW (R8), R9
  17211. MOVB 2(R8), R10
  17212. MOVW R9, (AX)
  17213. MOVB R10, 2(AX)
  17214. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10BAvx
  17215. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_5through7:
  17216. MOVL (R8), R9
  17217. MOVL -4(R8)(DI*1), R10
  17218. MOVL R9, (AX)
  17219. MOVL R10, -4(AX)(DI*1)
  17220. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10BAvx
  17221. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_8:
  17222. MOVQ (R8), R9
  17223. MOVQ R9, (AX)
  17224. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10BAvx
  17225. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_9through16:
  17226. MOVQ (R8), R9
  17227. MOVQ -8(R8)(DI*1), R10
  17228. MOVQ R9, (AX)
  17229. MOVQ R10, -8(AX)(DI*1)
  17230. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10BAvx
  17231. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_17through32:
  17232. MOVOU (R8), X0
  17233. MOVOU -16(R8)(DI*1), X1
  17234. MOVOU X0, (AX)
  17235. MOVOU X1, -16(AX)(DI*1)
  17236. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10BAvx
  17237. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_33through64:
  17238. MOVOU (R8), X0
  17239. MOVOU 16(R8), X1
  17240. MOVOU -32(R8)(DI*1), X2
  17241. MOVOU -16(R8)(DI*1), X3
  17242. MOVOU X0, (AX)
  17243. MOVOU X1, 16(AX)
  17244. MOVOU X2, -32(AX)(DI*1)
  17245. MOVOU X3, -16(AX)(DI*1)
  17246. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10BAvx
  17247. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_65through128:
  17248. MOVOU (R8), X0
  17249. MOVOU 16(R8), X1
  17250. MOVOU 32(R8), X2
  17251. MOVOU 48(R8), X3
  17252. MOVOU -64(R8)(DI*1), X12
  17253. MOVOU -48(R8)(DI*1), X13
  17254. MOVOU -32(R8)(DI*1), X14
  17255. MOVOU -16(R8)(DI*1), X15
  17256. MOVOU X0, (AX)
  17257. MOVOU X1, 16(AX)
  17258. MOVOU X2, 32(AX)
  17259. MOVOU X3, 48(AX)
  17260. MOVOU X12, -64(AX)(DI*1)
  17261. MOVOU X13, -48(AX)(DI*1)
  17262. MOVOU X14, -32(AX)(DI*1)
  17263. MOVOU X15, -16(AX)(DI*1)
  17264. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10BAvx
  17265. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_129through256:
  17266. MOVOU (R8), X0
  17267. MOVOU 16(R8), X1
  17268. MOVOU 32(R8), X2
  17269. MOVOU 48(R8), X3
  17270. MOVOU 64(R8), X4
  17271. MOVOU 80(R8), X5
  17272. MOVOU 96(R8), X6
  17273. MOVOU 112(R8), X7
  17274. MOVOU -128(R8)(DI*1), X8
  17275. MOVOU -112(R8)(DI*1), X9
  17276. MOVOU -96(R8)(DI*1), X10
  17277. MOVOU -80(R8)(DI*1), X11
  17278. MOVOU -64(R8)(DI*1), X12
  17279. MOVOU -48(R8)(DI*1), X13
  17280. MOVOU -32(R8)(DI*1), X14
  17281. MOVOU -16(R8)(DI*1), X15
  17282. MOVOU X0, (AX)
  17283. MOVOU X1, 16(AX)
  17284. MOVOU X2, 32(AX)
  17285. MOVOU X3, 48(AX)
  17286. MOVOU X4, 64(AX)
  17287. MOVOU X5, 80(AX)
  17288. MOVOU X6, 96(AX)
  17289. MOVOU X7, 112(AX)
  17290. MOVOU X8, -128(AX)(DI*1)
  17291. MOVOU X9, -112(AX)(DI*1)
  17292. MOVOU X10, -96(AX)(DI*1)
  17293. MOVOU X11, -80(AX)(DI*1)
  17294. MOVOU X12, -64(AX)(DI*1)
  17295. MOVOU X13, -48(AX)(DI*1)
  17296. MOVOU X14, -32(AX)(DI*1)
  17297. MOVOU X15, -16(AX)(DI*1)
  17298. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10BAvx
  17299. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_256through2048:
  17300. LEAQ -256(DI), DI
  17301. MOVOU (R8), X0
  17302. MOVOU 16(R8), X1
  17303. MOVOU 32(R8), X2
  17304. MOVOU 48(R8), X3
  17305. MOVOU 64(R8), X4
  17306. MOVOU 80(R8), X5
  17307. MOVOU 96(R8), X6
  17308. MOVOU 112(R8), X7
  17309. MOVOU 128(R8), X8
  17310. MOVOU 144(R8), X9
  17311. MOVOU 160(R8), X10
  17312. MOVOU 176(R8), X11
  17313. MOVOU 192(R8), X12
  17314. MOVOU 208(R8), X13
  17315. MOVOU 224(R8), X14
  17316. MOVOU 240(R8), X15
  17317. MOVOU X0, (AX)
  17318. MOVOU X1, 16(AX)
  17319. MOVOU X2, 32(AX)
  17320. MOVOU X3, 48(AX)
  17321. MOVOU X4, 64(AX)
  17322. MOVOU X5, 80(AX)
  17323. MOVOU X6, 96(AX)
  17324. MOVOU X7, 112(AX)
  17325. MOVOU X8, 128(AX)
  17326. MOVOU X9, 144(AX)
  17327. MOVOU X10, 160(AX)
  17328. MOVOU X11, 176(AX)
  17329. MOVOU X12, 192(AX)
  17330. MOVOU X13, 208(AX)
  17331. MOVOU X14, 224(AX)
  17332. MOVOU X15, 240(AX)
  17333. CMPQ DI, $0x00000100
  17334. LEAQ 256(R8), R8
  17335. LEAQ 256(AX), AX
  17336. JGE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_256through2048
  17337. JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_tail
  17338. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_avxUnaligned:
  17339. LEAQ (R8)(DI*1), R10
  17340. MOVQ AX, R12
  17341. MOVOU -128(R10), X5
  17342. MOVOU -112(R10), X6
  17343. MOVQ $0x00000080, R9
  17344. ANDQ $0xffffffe0, AX
  17345. ADDQ $0x20, AX
  17346. MOVOU -96(R10), X7
  17347. MOVOU -80(R10), X8
  17348. MOVQ AX, R11
  17349. SUBQ R12, R11
  17350. MOVOU -64(R10), X9
  17351. MOVOU -48(R10), X10
  17352. SUBQ R11, DI
  17353. MOVOU -32(R10), X11
  17354. MOVOU -16(R10), X12
  17355. VMOVDQU (R8), Y4
  17356. ADDQ R11, R8
  17357. SUBQ R9, DI
  17358. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_gobble_128_loop:
  17359. VMOVDQU (R8), Y0
  17360. VMOVDQU 32(R8), Y1
  17361. VMOVDQU 64(R8), Y2
  17362. VMOVDQU 96(R8), Y3
  17363. ADDQ R9, R8
  17364. VMOVDQA Y0, (AX)
  17365. VMOVDQA Y1, 32(AX)
  17366. VMOVDQA Y2, 64(AX)
  17367. VMOVDQA Y3, 96(AX)
  17368. ADDQ R9, AX
  17369. SUBQ R9, DI
  17370. JA emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_gobble_128_loop
  17371. ADDQ R9, DI
  17372. ADDQ AX, DI
  17373. VMOVDQU Y4, (R12)
  17374. VZEROUPPER
  17375. MOVOU X5, -128(DI)
  17376. MOVOU X6, -112(DI)
  17377. MOVOU X7, -96(DI)
  17378. MOVOU X8, -80(DI)
  17379. MOVOU X9, -64(DI)
  17380. MOVOU X10, -48(DI)
  17381. MOVOU X11, -32(DI)
  17382. MOVOU X12, -16(DI)
  17383. memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10BAvx:
  17384. MOVQ BP, AX
  17385. emit_literal_done_repeat_emit_encodeSnappyBlockAsm10BAvx:
  17386. ADDL $0x05, CX
  17387. MOVL CX, BP
  17388. SUBL 16(SP), BP
  17389. MOVQ src_len+32(FP), DI
  17390. SUBL CX, DI
  17391. LEAQ (DX)(CX*1), R8
  17392. LEAQ (DX)(BP*1), BP
  17393. XORL R10, R10
  17394. CMPL DI, $0x08
  17395. JL matchlen_single_repeat_extend
  17396. matchlen_loopback_repeat_extend:
  17397. MOVQ (R8)(R10*1), R9
  17398. XORQ (BP)(R10*1), R9
  17399. TESTQ R9, R9
  17400. JZ matchlen_loop_repeat_extend
  17401. BSFQ R9, R9
  17402. SARQ $0x03, R9
  17403. LEAL (R10)(R9*1), R10
  17404. JMP repeat_extend_forward_end_encodeSnappyBlockAsm10BAvx
  17405. matchlen_loop_repeat_extend:
  17406. LEAL -8(DI), DI
  17407. LEAL 8(R10), R10
  17408. CMPL DI, $0x08
  17409. JGE matchlen_loopback_repeat_extend
  17410. matchlen_single_repeat_extend:
  17411. TESTL DI, DI
  17412. JZ repeat_extend_forward_end_encodeSnappyBlockAsm10BAvx
  17413. matchlen_single_loopback_repeat_extend:
  17414. MOVB (R8)(R10*1), R9
  17415. CMPB (BP)(R10*1), R9
  17416. JNE repeat_extend_forward_end_encodeSnappyBlockAsm10BAvx
  17417. LEAL 1(R10), R10
  17418. DECL DI
  17419. JNZ matchlen_single_loopback_repeat_extend
  17420. repeat_extend_forward_end_encodeSnappyBlockAsm10BAvx:
  17421. ADDL R10, CX
  17422. MOVL CX, BP
  17423. SUBL SI, BP
  17424. MOVL 16(SP), SI
  17425. CMPL SI, $0x00010000
  17426. JL two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10BAvx
  17427. four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm10BAvx:
  17428. CMPL BP, $0x40
  17429. JLE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm10BAvx
  17430. MOVB $0xff, (AX)
  17431. MOVL SI, 1(AX)
  17432. LEAL -64(BP), BP
  17433. ADDQ $0x05, AX
  17434. CMPL BP, $0x04
  17435. JL four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm10BAvx
  17436. JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm10BAvx
  17437. four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm10BAvx:
  17438. TESTL BP, BP
  17439. JZ repeat_end_emit_encodeSnappyBlockAsm10BAvx
  17440. MOVB $0x03, BL
  17441. LEAL -4(BX)(BP*4), BP
  17442. MOVB BP, (AX)
  17443. MOVL SI, 1(AX)
  17444. ADDQ $0x05, AX
  17445. JMP repeat_end_emit_encodeSnappyBlockAsm10BAvx
  17446. two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10BAvx:
  17447. CMPL BP, $0x40
  17448. JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10BAvx
  17449. MOVB $0xee, (AX)
  17450. MOVW SI, 1(AX)
  17451. LEAL -60(BP), BP
  17452. ADDQ $0x03, AX
  17453. JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10BAvx
  17454. two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10BAvx:
  17455. CMPL BP, $0x0c
  17456. JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10BAvx
  17457. CMPL SI, $0x00000800
  17458. JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10BAvx
  17459. MOVB $0x01, BL
  17460. LEAL -16(BX)(BP*4), BP
  17461. MOVB SI, 1(AX)
  17462. SHRL $0x08, SI
  17463. SHLL $0x05, SI
  17464. ORL SI, BP
  17465. MOVB BP, (AX)
  17466. ADDQ $0x02, AX
  17467. JMP repeat_end_emit_encodeSnappyBlockAsm10BAvx
  17468. emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10BAvx:
  17469. MOVB $0x02, BL
  17470. LEAL -4(BX)(BP*4), BP
  17471. MOVB BP, (AX)
  17472. MOVW SI, 1(AX)
  17473. ADDQ $0x03, AX
  17474. repeat_end_emit_encodeSnappyBlockAsm10BAvx:
  17475. MOVL CX, 12(SP)
  17476. CMPL CX, 8(SP)
  17477. JGE emit_remainder_encodeSnappyBlockAsm10BAvx
  17478. JMP search_loop_encodeSnappyBlockAsm10BAvx
  17479. no_repeat_found_encodeSnappyBlockAsm10BAvx:
  17480. CMPL (DX)(BP*1), SI
  17481. JEQ candidate_match_encodeSnappyBlockAsm10BAvx
  17482. SHRQ $0x08, SI
  17483. MOVL 24(SP)(R9*4), BP
  17484. LEAL 2(CX), R8
  17485. CMPL (DX)(DI*1), SI
  17486. JEQ candidate2_match_encodeSnappyBlockAsm10BAvx
  17487. MOVL R8, 24(SP)(R9*4)
  17488. SHRQ $0x08, SI
  17489. CMPL (DX)(BP*1), SI
  17490. JEQ candidate3_match_encodeSnappyBlockAsm10BAvx
  17491. MOVL 20(SP), CX
  17492. JMP search_loop_encodeSnappyBlockAsm10BAvx
  17493. candidate3_match_encodeSnappyBlockAsm10BAvx:
  17494. ADDL $0x02, CX
  17495. JMP candidate_match_encodeSnappyBlockAsm10BAvx
  17496. candidate2_match_encodeSnappyBlockAsm10BAvx:
  17497. MOVL R8, 24(SP)(R9*4)
  17498. INCL CX
  17499. MOVL DI, BP
  17500. candidate_match_encodeSnappyBlockAsm10BAvx:
  17501. MOVL 12(SP), SI
  17502. TESTL BP, BP
  17503. JZ match_extend_back_end_encodeSnappyBlockAsm10BAvx
  17504. match_extend_back_loop_encodeSnappyBlockAsm10BAvx:
  17505. CMPL CX, SI
  17506. JLE match_extend_back_end_encodeSnappyBlockAsm10BAvx
  17507. MOVB -1(DX)(BP*1), BL
  17508. MOVB -1(DX)(CX*1), DI
  17509. CMPB BL, DI
  17510. JNE match_extend_back_end_encodeSnappyBlockAsm10BAvx
  17511. LEAL -1(CX), CX
  17512. DECL BP
  17513. JZ match_extend_back_end_encodeSnappyBlockAsm10BAvx
  17514. JMP match_extend_back_loop_encodeSnappyBlockAsm10BAvx
  17515. match_extend_back_end_encodeSnappyBlockAsm10BAvx:
  17516. MOVL CX, SI
  17517. SUBL 12(SP), SI
  17518. LEAQ 4(AX)(SI*1), SI
  17519. CMPQ SI, (SP)
  17520. JL match_dst_size_check_encodeSnappyBlockAsm10BAvx
  17521. MOVQ $0x00000000, ret+48(FP)
  17522. RET
  17523. match_dst_size_check_encodeSnappyBlockAsm10BAvx:
  17524. MOVL CX, SI
  17525. MOVL 12(SP), DI
  17526. CMPL DI, SI
  17527. JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm10BAvx
  17528. MOVL SI, R8
  17529. MOVL SI, 12(SP)
  17530. LEAQ (DX)(DI*1), SI
  17531. SUBL DI, R8
  17532. MOVL R8, DI
  17533. SUBL $0x01, DI
  17534. JC emit_literal_done_match_emit_encodeSnappyBlockAsm10BAvx
  17535. CMPL DI, $0x3c
  17536. JLT one_byte_match_emit_encodeSnappyBlockAsm10BAvx
  17537. CMPL DI, $0x00000100
  17538. JLT two_bytes_match_emit_encodeSnappyBlockAsm10BAvx
  17539. CMPL DI, $0x00010000
  17540. JLT three_bytes_match_emit_encodeSnappyBlockAsm10BAvx
  17541. CMPL DI, $0x01000000
  17542. JLT four_bytes_match_emit_encodeSnappyBlockAsm10BAvx
  17543. MOVB $0xfc, (AX)
  17544. MOVL DI, 1(AX)
  17545. ADDQ $0x05, AX
  17546. JMP memmove_match_emit_encodeSnappyBlockAsm10BAvx
  17547. four_bytes_match_emit_encodeSnappyBlockAsm10BAvx:
  17548. MOVL DI, R9
  17549. SHRL $0x10, R9
  17550. MOVB $0xf8, (AX)
  17551. MOVW DI, 1(AX)
  17552. MOVB R9, 3(AX)
  17553. ADDQ $0x04, AX
  17554. JMP memmove_match_emit_encodeSnappyBlockAsm10BAvx
  17555. three_bytes_match_emit_encodeSnappyBlockAsm10BAvx:
  17556. MOVB $0xf4, (AX)
  17557. MOVW DI, 1(AX)
  17558. ADDQ $0x03, AX
  17559. JMP memmove_match_emit_encodeSnappyBlockAsm10BAvx
  17560. two_bytes_match_emit_encodeSnappyBlockAsm10BAvx:
  17561. MOVB $0xf0, (AX)
  17562. MOVB DI, 1(AX)
  17563. ADDQ $0x02, AX
  17564. JMP memmove_match_emit_encodeSnappyBlockAsm10BAvx
  17565. one_byte_match_emit_encodeSnappyBlockAsm10BAvx:
  17566. SHLB $0x02, DI
  17567. MOVB DI, (AX)
  17568. ADDQ $0x01, AX
  17569. memmove_match_emit_encodeSnappyBlockAsm10BAvx:
  17570. LEAQ (AX)(R8*1), DI
  17571. NOP
  17572. emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_tail:
  17573. TESTQ R8, R8
  17574. JEQ memmove_end_copy_match_emit_encodeSnappyBlockAsm10BAvx
  17575. CMPQ R8, $0x02
  17576. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_1or2
  17577. CMPQ R8, $0x04
  17578. JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_3
  17579. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_4
  17580. CMPQ R8, $0x08
  17581. JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_5through7
  17582. JE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_8
  17583. CMPQ R8, $0x10
  17584. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_9through16
  17585. CMPQ R8, $0x20
  17586. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_17through32
  17587. CMPQ R8, $0x40
  17588. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_33through64
  17589. CMPQ R8, $0x80
  17590. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_65through128
  17591. CMPQ R8, $0x00000100
  17592. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_129through256
  17593. JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_avxUnaligned
  17594. emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_1or2:
  17595. MOVB (SI), R9
  17596. MOVB -1(SI)(R8*1), R10
  17597. MOVB R9, (AX)
  17598. MOVB R10, -1(AX)(R8*1)
  17599. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10BAvx
  17600. emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_4:
  17601. MOVL (SI), R9
  17602. MOVL R9, (AX)
  17603. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10BAvx
  17604. emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_3:
  17605. MOVW (SI), R9
  17606. MOVB 2(SI), R10
  17607. MOVW R9, (AX)
  17608. MOVB R10, 2(AX)
  17609. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10BAvx
  17610. emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_5through7:
  17611. MOVL (SI), R9
  17612. MOVL -4(SI)(R8*1), R10
  17613. MOVL R9, (AX)
  17614. MOVL R10, -4(AX)(R8*1)
  17615. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10BAvx
  17616. emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_8:
  17617. MOVQ (SI), R9
  17618. MOVQ R9, (AX)
  17619. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10BAvx
  17620. emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_9through16:
  17621. MOVQ (SI), R9
  17622. MOVQ -8(SI)(R8*1), R10
  17623. MOVQ R9, (AX)
  17624. MOVQ R10, -8(AX)(R8*1)
  17625. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10BAvx
  17626. emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_17through32:
  17627. MOVOU (SI), X0
  17628. MOVOU -16(SI)(R8*1), X1
  17629. MOVOU X0, (AX)
  17630. MOVOU X1, -16(AX)(R8*1)
  17631. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10BAvx
  17632. emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_33through64:
  17633. MOVOU (SI), X0
  17634. MOVOU 16(SI), X1
  17635. MOVOU -32(SI)(R8*1), X2
  17636. MOVOU -16(SI)(R8*1), X3
  17637. MOVOU X0, (AX)
  17638. MOVOU X1, 16(AX)
  17639. MOVOU X2, -32(AX)(R8*1)
  17640. MOVOU X3, -16(AX)(R8*1)
  17641. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10BAvx
  17642. emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_65through128:
  17643. MOVOU (SI), X0
  17644. MOVOU 16(SI), X1
  17645. MOVOU 32(SI), X2
  17646. MOVOU 48(SI), X3
  17647. MOVOU -64(SI)(R8*1), X12
  17648. MOVOU -48(SI)(R8*1), X13
  17649. MOVOU -32(SI)(R8*1), X14
  17650. MOVOU -16(SI)(R8*1), X15
  17651. MOVOU X0, (AX)
  17652. MOVOU X1, 16(AX)
  17653. MOVOU X2, 32(AX)
  17654. MOVOU X3, 48(AX)
  17655. MOVOU X12, -64(AX)(R8*1)
  17656. MOVOU X13, -48(AX)(R8*1)
  17657. MOVOU X14, -32(AX)(R8*1)
  17658. MOVOU X15, -16(AX)(R8*1)
  17659. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10BAvx
  17660. emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_129through256:
  17661. MOVOU (SI), X0
  17662. MOVOU 16(SI), X1
  17663. MOVOU 32(SI), X2
  17664. MOVOU 48(SI), X3
  17665. MOVOU 64(SI), X4
  17666. MOVOU 80(SI), X5
  17667. MOVOU 96(SI), X6
  17668. MOVOU 112(SI), X7
  17669. MOVOU -128(SI)(R8*1), X8
  17670. MOVOU -112(SI)(R8*1), X9
  17671. MOVOU -96(SI)(R8*1), X10
  17672. MOVOU -80(SI)(R8*1), X11
  17673. MOVOU -64(SI)(R8*1), X12
  17674. MOVOU -48(SI)(R8*1), X13
  17675. MOVOU -32(SI)(R8*1), X14
  17676. MOVOU -16(SI)(R8*1), X15
  17677. MOVOU X0, (AX)
  17678. MOVOU X1, 16(AX)
  17679. MOVOU X2, 32(AX)
  17680. MOVOU X3, 48(AX)
  17681. MOVOU X4, 64(AX)
  17682. MOVOU X5, 80(AX)
  17683. MOVOU X6, 96(AX)
  17684. MOVOU X7, 112(AX)
  17685. MOVOU X8, -128(AX)(R8*1)
  17686. MOVOU X9, -112(AX)(R8*1)
  17687. MOVOU X10, -96(AX)(R8*1)
  17688. MOVOU X11, -80(AX)(R8*1)
  17689. MOVOU X12, -64(AX)(R8*1)
  17690. MOVOU X13, -48(AX)(R8*1)
  17691. MOVOU X14, -32(AX)(R8*1)
  17692. MOVOU X15, -16(AX)(R8*1)
  17693. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10BAvx
  17694. emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_256through2048:
  17695. LEAQ -256(R8), R8
  17696. MOVOU (SI), X0
  17697. MOVOU 16(SI), X1
  17698. MOVOU 32(SI), X2
  17699. MOVOU 48(SI), X3
  17700. MOVOU 64(SI), X4
  17701. MOVOU 80(SI), X5
  17702. MOVOU 96(SI), X6
  17703. MOVOU 112(SI), X7
  17704. MOVOU 128(SI), X8
  17705. MOVOU 144(SI), X9
  17706. MOVOU 160(SI), X10
  17707. MOVOU 176(SI), X11
  17708. MOVOU 192(SI), X12
  17709. MOVOU 208(SI), X13
  17710. MOVOU 224(SI), X14
  17711. MOVOU 240(SI), X15
  17712. MOVOU X0, (AX)
  17713. MOVOU X1, 16(AX)
  17714. MOVOU X2, 32(AX)
  17715. MOVOU X3, 48(AX)
  17716. MOVOU X4, 64(AX)
  17717. MOVOU X5, 80(AX)
  17718. MOVOU X6, 96(AX)
  17719. MOVOU X7, 112(AX)
  17720. MOVOU X8, 128(AX)
  17721. MOVOU X9, 144(AX)
  17722. MOVOU X10, 160(AX)
  17723. MOVOU X11, 176(AX)
  17724. MOVOU X12, 192(AX)
  17725. MOVOU X13, 208(AX)
  17726. MOVOU X14, 224(AX)
  17727. MOVOU X15, 240(AX)
  17728. CMPQ R8, $0x00000100
  17729. LEAQ 256(SI), SI
  17730. LEAQ 256(AX), AX
  17731. JGE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_256through2048
  17732. JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_tail
  17733. emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_avxUnaligned:
  17734. LEAQ (SI)(R8*1), R10
  17735. MOVQ AX, R12
  17736. MOVOU -128(R10), X5
  17737. MOVOU -112(R10), X6
  17738. MOVQ $0x00000080, R9
  17739. ANDQ $0xffffffe0, AX
  17740. ADDQ $0x20, AX
  17741. MOVOU -96(R10), X7
  17742. MOVOU -80(R10), X8
  17743. MOVQ AX, R11
  17744. SUBQ R12, R11
  17745. MOVOU -64(R10), X9
  17746. MOVOU -48(R10), X10
  17747. SUBQ R11, R8
  17748. MOVOU -32(R10), X11
  17749. MOVOU -16(R10), X12
  17750. VMOVDQU (SI), Y4
  17751. ADDQ R11, SI
  17752. SUBQ R9, R8
  17753. emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_gobble_128_loop:
  17754. VMOVDQU (SI), Y0
  17755. VMOVDQU 32(SI), Y1
  17756. VMOVDQU 64(SI), Y2
  17757. VMOVDQU 96(SI), Y3
  17758. ADDQ R9, SI
  17759. VMOVDQA Y0, (AX)
  17760. VMOVDQA Y1, 32(AX)
  17761. VMOVDQA Y2, 64(AX)
  17762. VMOVDQA Y3, 96(AX)
  17763. ADDQ R9, AX
  17764. SUBQ R9, R8
  17765. JA emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_gobble_128_loop
  17766. ADDQ R9, R8
  17767. ADDQ AX, R8
  17768. VMOVDQU Y4, (R12)
  17769. VZEROUPPER
  17770. MOVOU X5, -128(R8)
  17771. MOVOU X6, -112(R8)
  17772. MOVOU X7, -96(R8)
  17773. MOVOU X8, -80(R8)
  17774. MOVOU X9, -64(R8)
  17775. MOVOU X10, -48(R8)
  17776. MOVOU X11, -32(R8)
  17777. MOVOU X12, -16(R8)
  17778. memmove_end_copy_match_emit_encodeSnappyBlockAsm10BAvx:
  17779. MOVQ DI, AX
  17780. emit_literal_done_match_emit_encodeSnappyBlockAsm10BAvx:
  17781. match_nolit_loop_encodeSnappyBlockAsm10BAvx:
  17782. MOVL CX, SI
  17783. SUBL BP, SI
  17784. MOVL SI, 16(SP)
  17785. ADDL $0x04, CX
  17786. ADDL $0x04, BP
  17787. MOVQ src_len+32(FP), SI
  17788. SUBL CX, SI
  17789. LEAQ (DX)(CX*1), DI
  17790. LEAQ (DX)(BP*1), BP
  17791. XORL R9, R9
  17792. CMPL SI, $0x08
  17793. JL matchlen_single_match_nolit_encodeSnappyBlockAsm10BAvx
  17794. matchlen_loopback_match_nolit_encodeSnappyBlockAsm10BAvx:
  17795. MOVQ (DI)(R9*1), R8
  17796. XORQ (BP)(R9*1), R8
  17797. TESTQ R8, R8
  17798. JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm10BAvx
  17799. BSFQ R8, R8
  17800. SARQ $0x03, R8
  17801. LEAL (R9)(R8*1), R9
  17802. JMP match_nolit_end_encodeSnappyBlockAsm10BAvx
  17803. matchlen_loop_match_nolit_encodeSnappyBlockAsm10BAvx:
  17804. LEAL -8(SI), SI
  17805. LEAL 8(R9), R9
  17806. CMPL SI, $0x08
  17807. JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm10BAvx
  17808. matchlen_single_match_nolit_encodeSnappyBlockAsm10BAvx:
  17809. TESTL SI, SI
  17810. JZ match_nolit_end_encodeSnappyBlockAsm10BAvx
  17811. matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm10BAvx:
  17812. MOVB (DI)(R9*1), R8
  17813. CMPB (BP)(R9*1), R8
  17814. JNE match_nolit_end_encodeSnappyBlockAsm10BAvx
  17815. LEAL 1(R9), R9
  17816. DECL SI
  17817. JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm10BAvx
  17818. match_nolit_end_encodeSnappyBlockAsm10BAvx:
  17819. ADDL R9, CX
  17820. MOVL 16(SP), BP
  17821. ADDL $0x04, R9
  17822. CMPL BP, $0x00010000
  17823. JL two_byte_offset_match_nolit_encodeSnappyBlockAsm10BAvx
  17824. four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm10BAvx:
  17825. CMPL R9, $0x40
  17826. JLE four_bytes_remain_match_nolit_encodeSnappyBlockAsm10BAvx
  17827. MOVB $0xff, (AX)
  17828. MOVL BP, 1(AX)
  17829. LEAL -64(R9), R9
  17830. ADDQ $0x05, AX
  17831. CMPL R9, $0x04
  17832. JL four_bytes_remain_match_nolit_encodeSnappyBlockAsm10BAvx
  17833. JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm10BAvx
  17834. four_bytes_remain_match_nolit_encodeSnappyBlockAsm10BAvx:
  17835. TESTL R9, R9
  17836. JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm10BAvx
  17837. MOVB $0x03, BL
  17838. LEAL -4(BX)(R9*4), R9
  17839. MOVB R9, (AX)
  17840. MOVL BP, 1(AX)
  17841. ADDQ $0x05, AX
  17842. JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm10BAvx
  17843. two_byte_offset_match_nolit_encodeSnappyBlockAsm10BAvx:
  17844. CMPL R9, $0x40
  17845. JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10BAvx
  17846. MOVB $0xee, (AX)
  17847. MOVW BP, 1(AX)
  17848. LEAL -60(R9), R9
  17849. ADDQ $0x03, AX
  17850. JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm10BAvx
  17851. two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10BAvx:
  17852. CMPL R9, $0x0c
  17853. JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm10BAvx
  17854. CMPL BP, $0x00000800
  17855. JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm10BAvx
  17856. MOVB $0x01, BL
  17857. LEAL -16(BX)(R9*4), R9
  17858. MOVB BP, 1(AX)
  17859. SHRL $0x08, BP
  17860. SHLL $0x05, BP
  17861. ORL BP, R9
  17862. MOVB R9, (AX)
  17863. ADDQ $0x02, AX
  17864. JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm10BAvx
  17865. emit_copy_three_match_nolit_encodeSnappyBlockAsm10BAvx:
  17866. MOVB $0x02, BL
  17867. LEAL -4(BX)(R9*4), R9
  17868. MOVB R9, (AX)
  17869. MOVW BP, 1(AX)
  17870. ADDQ $0x03, AX
  17871. match_nolit_emitcopy_end_encodeSnappyBlockAsm10BAvx:
  17872. MOVL CX, 12(SP)
  17873. CMPL CX, 8(SP)
  17874. JGE emit_remainder_encodeSnappyBlockAsm10BAvx
  17875. CMPQ AX, (SP)
  17876. JL match_nolit_dst_ok_encodeSnappyBlockAsm10BAvx
  17877. MOVQ $0x00000000, ret+48(FP)
  17878. RET
  17879. match_nolit_dst_ok_encodeSnappyBlockAsm10BAvx:
  17880. MOVQ -2(DX)(CX*1), SI
  17881. MOVQ $0x9e3779b1, BP
  17882. MOVQ SI, DI
  17883. SHRQ $0x10, SI
  17884. MOVQ SI, R8
  17885. SHLQ $0x20, DI
  17886. IMULQ BP, DI
  17887. SHRQ $0x36, DI
  17888. SHLQ $0x20, R8
  17889. IMULQ BP, R8
  17890. SHRQ $0x36, R8
  17891. LEAL -2(CX), R9
  17892. MOVL 24(SP)(R8*4), BP
  17893. MOVL R9, 24(SP)(DI*4)
  17894. MOVL CX, 24(SP)(R8*4)
  17895. CMPL (DX)(BP*1), SI
  17896. JEQ match_nolit_loop_encodeSnappyBlockAsm10BAvx
  17897. INCL CX
  17898. JMP search_loop_encodeSnappyBlockAsm10BAvx
  17899. emit_remainder_encodeSnappyBlockAsm10BAvx:
  17900. MOVQ src_len+32(FP), CX
  17901. SUBL 12(SP), CX
  17902. LEAQ 4(AX)(CX*1), CX
  17903. CMPQ CX, (SP)
  17904. JL emit_remainder_ok_encodeSnappyBlockAsm10BAvx
  17905. MOVQ $0x00000000, ret+48(FP)
  17906. RET
  17907. emit_remainder_ok_encodeSnappyBlockAsm10BAvx:
  17908. MOVQ src_len+32(FP), CX
  17909. MOVL 12(SP), BX
  17910. CMPL BX, CX
  17911. JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm10BAvx
  17912. MOVL CX, BP
  17913. MOVL CX, 12(SP)
  17914. LEAQ (DX)(BX*1), CX
  17915. SUBL BX, BP
  17916. MOVL BP, DX
  17917. SUBL $0x01, DX
  17918. JC emit_literal_done_emit_remainder_encodeSnappyBlockAsm10BAvx
  17919. CMPL DX, $0x3c
  17920. JLT one_byte_emit_remainder_encodeSnappyBlockAsm10BAvx
  17921. CMPL DX, $0x00000100
  17922. JLT two_bytes_emit_remainder_encodeSnappyBlockAsm10BAvx
  17923. CMPL DX, $0x00010000
  17924. JLT three_bytes_emit_remainder_encodeSnappyBlockAsm10BAvx
  17925. CMPL DX, $0x01000000
  17926. JLT four_bytes_emit_remainder_encodeSnappyBlockAsm10BAvx
  17927. MOVB $0xfc, (AX)
  17928. MOVL DX, 1(AX)
  17929. ADDQ $0x05, AX
  17930. JMP memmove_emit_remainder_encodeSnappyBlockAsm10BAvx
  17931. four_bytes_emit_remainder_encodeSnappyBlockAsm10BAvx:
  17932. MOVL DX, BX
  17933. SHRL $0x10, BX
  17934. MOVB $0xf8, (AX)
  17935. MOVW DX, 1(AX)
  17936. MOVB BL, 3(AX)
  17937. ADDQ $0x04, AX
  17938. JMP memmove_emit_remainder_encodeSnappyBlockAsm10BAvx
  17939. three_bytes_emit_remainder_encodeSnappyBlockAsm10BAvx:
  17940. MOVB $0xf4, (AX)
  17941. MOVW DX, 1(AX)
  17942. ADDQ $0x03, AX
  17943. JMP memmove_emit_remainder_encodeSnappyBlockAsm10BAvx
  17944. two_bytes_emit_remainder_encodeSnappyBlockAsm10BAvx:
  17945. MOVB $0xf0, (AX)
  17946. MOVB DL, 1(AX)
  17947. ADDQ $0x02, AX
  17948. JMP memmove_emit_remainder_encodeSnappyBlockAsm10BAvx
  17949. one_byte_emit_remainder_encodeSnappyBlockAsm10BAvx:
  17950. SHLB $0x02, DL
  17951. MOVB DL, (AX)
  17952. ADDQ $0x01, AX
  17953. memmove_emit_remainder_encodeSnappyBlockAsm10BAvx:
  17954. LEAQ (AX)(BP*1), DX
  17955. MOVL BP, BX
  17956. NOP
  17957. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_tail:
  17958. TESTQ BX, BX
  17959. JEQ memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10BAvx
  17960. CMPQ BX, $0x02
  17961. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_1or2
  17962. CMPQ BX, $0x04
  17963. JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_3
  17964. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_4
  17965. CMPQ BX, $0x08
  17966. JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_5through7
  17967. JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_8
  17968. CMPQ BX, $0x10
  17969. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_9through16
  17970. CMPQ BX, $0x20
  17971. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_17through32
  17972. CMPQ BX, $0x40
  17973. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_33through64
  17974. CMPQ BX, $0x80
  17975. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_65through128
  17976. CMPQ BX, $0x00000100
  17977. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_129through256
  17978. JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_avxUnaligned
  17979. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_1or2:
  17980. MOVB (CX), BP
  17981. MOVB -1(CX)(BX*1), SI
  17982. MOVB BP, (AX)
  17983. MOVB SI, -1(AX)(BX*1)
  17984. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10BAvx
  17985. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_4:
  17986. MOVL (CX), BP
  17987. MOVL BP, (AX)
  17988. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10BAvx
  17989. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_3:
  17990. MOVW (CX), BP
  17991. MOVB 2(CX), SI
  17992. MOVW BP, (AX)
  17993. MOVB SI, 2(AX)
  17994. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10BAvx
  17995. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_5through7:
  17996. MOVL (CX), BP
  17997. MOVL -4(CX)(BX*1), SI
  17998. MOVL BP, (AX)
  17999. MOVL SI, -4(AX)(BX*1)
  18000. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10BAvx
  18001. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_8:
  18002. MOVQ (CX), BP
  18003. MOVQ BP, (AX)
  18004. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10BAvx
  18005. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_9through16:
  18006. MOVQ (CX), BP
  18007. MOVQ -8(CX)(BX*1), SI
  18008. MOVQ BP, (AX)
  18009. MOVQ SI, -8(AX)(BX*1)
  18010. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10BAvx
  18011. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_17through32:
  18012. MOVOU (CX), X0
  18013. MOVOU -16(CX)(BX*1), X1
  18014. MOVOU X0, (AX)
  18015. MOVOU X1, -16(AX)(BX*1)
  18016. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10BAvx
  18017. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_33through64:
  18018. MOVOU (CX), X0
  18019. MOVOU 16(CX), X1
  18020. MOVOU -32(CX)(BX*1), X2
  18021. MOVOU -16(CX)(BX*1), X3
  18022. MOVOU X0, (AX)
  18023. MOVOU X1, 16(AX)
  18024. MOVOU X2, -32(AX)(BX*1)
  18025. MOVOU X3, -16(AX)(BX*1)
  18026. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10BAvx
  18027. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_65through128:
  18028. MOVOU (CX), X0
  18029. MOVOU 16(CX), X1
  18030. MOVOU 32(CX), X2
  18031. MOVOU 48(CX), X3
  18032. MOVOU -64(CX)(BX*1), X12
  18033. MOVOU -48(CX)(BX*1), X13
  18034. MOVOU -32(CX)(BX*1), X14
  18035. MOVOU -16(CX)(BX*1), X15
  18036. MOVOU X0, (AX)
  18037. MOVOU X1, 16(AX)
  18038. MOVOU X2, 32(AX)
  18039. MOVOU X3, 48(AX)
  18040. MOVOU X12, -64(AX)(BX*1)
  18041. MOVOU X13, -48(AX)(BX*1)
  18042. MOVOU X14, -32(AX)(BX*1)
  18043. MOVOU X15, -16(AX)(BX*1)
  18044. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10BAvx
  18045. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_129through256:
  18046. MOVOU (CX), X0
  18047. MOVOU 16(CX), X1
  18048. MOVOU 32(CX), X2
  18049. MOVOU 48(CX), X3
  18050. MOVOU 64(CX), X4
  18051. MOVOU 80(CX), X5
  18052. MOVOU 96(CX), X6
  18053. MOVOU 112(CX), X7
  18054. MOVOU -128(CX)(BX*1), X8
  18055. MOVOU -112(CX)(BX*1), X9
  18056. MOVOU -96(CX)(BX*1), X10
  18057. MOVOU -80(CX)(BX*1), X11
  18058. MOVOU -64(CX)(BX*1), X12
  18059. MOVOU -48(CX)(BX*1), X13
  18060. MOVOU -32(CX)(BX*1), X14
  18061. MOVOU -16(CX)(BX*1), X15
  18062. MOVOU X0, (AX)
  18063. MOVOU X1, 16(AX)
  18064. MOVOU X2, 32(AX)
  18065. MOVOU X3, 48(AX)
  18066. MOVOU X4, 64(AX)
  18067. MOVOU X5, 80(AX)
  18068. MOVOU X6, 96(AX)
  18069. MOVOU X7, 112(AX)
  18070. MOVOU X8, -128(AX)(BX*1)
  18071. MOVOU X9, -112(AX)(BX*1)
  18072. MOVOU X10, -96(AX)(BX*1)
  18073. MOVOU X11, -80(AX)(BX*1)
  18074. MOVOU X12, -64(AX)(BX*1)
  18075. MOVOU X13, -48(AX)(BX*1)
  18076. MOVOU X14, -32(AX)(BX*1)
  18077. MOVOU X15, -16(AX)(BX*1)
  18078. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10BAvx
  18079. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_256through2048:
  18080. LEAQ -256(BX), BX
  18081. MOVOU (CX), X0
  18082. MOVOU 16(CX), X1
  18083. MOVOU 32(CX), X2
  18084. MOVOU 48(CX), X3
  18085. MOVOU 64(CX), X4
  18086. MOVOU 80(CX), X5
  18087. MOVOU 96(CX), X6
  18088. MOVOU 112(CX), X7
  18089. MOVOU 128(CX), X8
  18090. MOVOU 144(CX), X9
  18091. MOVOU 160(CX), X10
  18092. MOVOU 176(CX), X11
  18093. MOVOU 192(CX), X12
  18094. MOVOU 208(CX), X13
  18095. MOVOU 224(CX), X14
  18096. MOVOU 240(CX), X15
  18097. MOVOU X0, (AX)
  18098. MOVOU X1, 16(AX)
  18099. MOVOU X2, 32(AX)
  18100. MOVOU X3, 48(AX)
  18101. MOVOU X4, 64(AX)
  18102. MOVOU X5, 80(AX)
  18103. MOVOU X6, 96(AX)
  18104. MOVOU X7, 112(AX)
  18105. MOVOU X8, 128(AX)
  18106. MOVOU X9, 144(AX)
  18107. MOVOU X10, 160(AX)
  18108. MOVOU X11, 176(AX)
  18109. MOVOU X12, 192(AX)
  18110. MOVOU X13, 208(AX)
  18111. MOVOU X14, 224(AX)
  18112. MOVOU X15, 240(AX)
  18113. CMPQ BX, $0x00000100
  18114. LEAQ 256(CX), CX
  18115. LEAQ 256(AX), AX
  18116. JGE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_256through2048
  18117. JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_tail
  18118. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_avxUnaligned:
  18119. LEAQ (CX)(BX*1), SI
  18120. MOVQ AX, R8
  18121. MOVOU -128(SI), X5
  18122. MOVOU -112(SI), X6
  18123. MOVQ $0x00000080, BP
  18124. ANDQ $0xffffffe0, AX
  18125. ADDQ $0x20, AX
  18126. MOVOU -96(SI), X7
  18127. MOVOU -80(SI), X8
  18128. MOVQ AX, DI
  18129. SUBQ R8, DI
  18130. MOVOU -64(SI), X9
  18131. MOVOU -48(SI), X10
  18132. SUBQ DI, BX
  18133. MOVOU -32(SI), X11
  18134. MOVOU -16(SI), X12
  18135. VMOVDQU (CX), Y4
  18136. ADDQ DI, CX
  18137. SUBQ BP, BX
  18138. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_gobble_128_loop:
  18139. VMOVDQU (CX), Y0
  18140. VMOVDQU 32(CX), Y1
  18141. VMOVDQU 64(CX), Y2
  18142. VMOVDQU 96(CX), Y3
  18143. ADDQ BP, CX
  18144. VMOVDQA Y0, (AX)
  18145. VMOVDQA Y1, 32(AX)
  18146. VMOVDQA Y2, 64(AX)
  18147. VMOVDQA Y3, 96(AX)
  18148. ADDQ BP, AX
  18149. SUBQ BP, BX
  18150. JA emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_gobble_128_loop
  18151. ADDQ BP, BX
  18152. ADDQ AX, BX
  18153. VMOVDQU Y4, (R8)
  18154. VZEROUPPER
  18155. MOVOU X5, -128(BX)
  18156. MOVOU X6, -112(BX)
  18157. MOVOU X7, -96(BX)
  18158. MOVOU X8, -80(BX)
  18159. MOVOU X9, -64(BX)
  18160. MOVOU X10, -48(BX)
  18161. MOVOU X11, -32(BX)
  18162. MOVOU X12, -16(BX)
  18163. memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10BAvx:
  18164. MOVQ DX, AX
  18165. emit_literal_done_emit_remainder_encodeSnappyBlockAsm10BAvx:
  18166. MOVQ dst_base+0(FP), CX
  18167. SUBQ CX, AX
  18168. MOVQ AX, ret+48(FP)
  18169. RET
  18170. // func encodeSnappyBlockAsm8BAvx(dst []byte, src []byte) int
  18171. // Requires: AVX, SSE2
  18172. TEXT ·encodeSnappyBlockAsm8BAvx(SB), $1048-56
  18173. MOVQ dst_base+0(FP), AX
  18174. MOVQ $0x00000008, CX
  18175. LEAQ 24(SP), DX
  18176. PXOR X0, X0
  18177. zero_loop_encodeSnappyBlockAsm8BAvx:
  18178. MOVOU X0, (DX)
  18179. MOVOU X0, 16(DX)
  18180. MOVOU X0, 32(DX)
  18181. MOVOU X0, 48(DX)
  18182. MOVOU X0, 64(DX)
  18183. MOVOU X0, 80(DX)
  18184. MOVOU X0, 96(DX)
  18185. MOVOU X0, 112(DX)
  18186. ADDQ $0x80, DX
  18187. DECQ CX
  18188. JNZ zero_loop_encodeSnappyBlockAsm8BAvx
  18189. MOVL $0x00000000, 12(SP)
  18190. MOVQ src_len+32(FP), CX
  18191. LEAQ -5(CX), DX
  18192. LEAQ -8(CX), BP
  18193. MOVL BP, 8(SP)
  18194. SHRQ $0x05, CX
  18195. SUBL CX, DX
  18196. LEAQ (AX)(DX*1), DX
  18197. MOVQ DX, (SP)
  18198. MOVL $0x00000001, CX
  18199. MOVL CX, 16(SP)
  18200. MOVQ src_base+24(FP), DX
  18201. search_loop_encodeSnappyBlockAsm8BAvx:
  18202. MOVQ (DX)(CX*1), SI
  18203. MOVL CX, BP
  18204. SUBL 12(SP), BP
  18205. SHRL $0x04, BP
  18206. LEAL 4(CX)(BP*1), BP
  18207. MOVL 8(SP), DI
  18208. CMPL BP, DI
  18209. JGT emit_remainder_encodeSnappyBlockAsm8BAvx
  18210. MOVL BP, 20(SP)
  18211. MOVQ $0x9e3779b1, R8
  18212. MOVQ SI, R9
  18213. MOVQ SI, R10
  18214. SHRQ $0x08, R10
  18215. SHLQ $0x20, R9
  18216. IMULQ R8, R9
  18217. SHRQ $0x38, R9
  18218. SHLQ $0x20, R10
  18219. IMULQ R8, R10
  18220. SHRQ $0x38, R10
  18221. MOVL 24(SP)(R9*4), BP
  18222. MOVL 24(SP)(R10*4), DI
  18223. MOVL CX, 24(SP)(R9*4)
  18224. LEAL 1(CX), R9
  18225. MOVL R9, 24(SP)(R10*4)
  18226. MOVQ SI, R9
  18227. SHRQ $0x10, R9
  18228. SHLQ $0x20, R9
  18229. IMULQ R8, R9
  18230. SHRQ $0x38, R9
  18231. MOVL CX, R8
  18232. SUBL 16(SP), R8
  18233. MOVL 1(DX)(R8*1), R10
  18234. MOVQ SI, R8
  18235. SHRQ $0x08, R8
  18236. CMPL R8, R10
  18237. JNE no_repeat_found_encodeSnappyBlockAsm8BAvx
  18238. LEAL 1(CX), SI
  18239. MOVL 12(SP), BP
  18240. MOVL SI, DI
  18241. SUBL 16(SP), DI
  18242. JZ repeat_extend_back_end_encodeSnappyBlockAsm8BAvx
  18243. repeat_extend_back_loop_encodeSnappyBlockAsm8BAvx:
  18244. CMPL SI, BP
  18245. JLE repeat_extend_back_end_encodeSnappyBlockAsm8BAvx
  18246. MOVB -1(DX)(DI*1), BL
  18247. MOVB -1(DX)(SI*1), R8
  18248. CMPB BL, R8
  18249. JNE repeat_extend_back_end_encodeSnappyBlockAsm8BAvx
  18250. LEAL -1(SI), SI
  18251. DECL DI
  18252. JNZ repeat_extend_back_loop_encodeSnappyBlockAsm8BAvx
  18253. repeat_extend_back_end_encodeSnappyBlockAsm8BAvx:
  18254. MOVL 12(SP), BP
  18255. CMPL BP, SI
  18256. JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm8BAvx
  18257. MOVL SI, DI
  18258. MOVL SI, 12(SP)
  18259. LEAQ (DX)(BP*1), R8
  18260. SUBL BP, DI
  18261. MOVL DI, BP
  18262. SUBL $0x01, BP
  18263. JC emit_literal_done_repeat_emit_encodeSnappyBlockAsm8BAvx
  18264. CMPL BP, $0x3c
  18265. JLT one_byte_repeat_emit_encodeSnappyBlockAsm8BAvx
  18266. CMPL BP, $0x00000100
  18267. JLT two_bytes_repeat_emit_encodeSnappyBlockAsm8BAvx
  18268. CMPL BP, $0x00010000
  18269. JLT three_bytes_repeat_emit_encodeSnappyBlockAsm8BAvx
  18270. CMPL BP, $0x01000000
  18271. JLT four_bytes_repeat_emit_encodeSnappyBlockAsm8BAvx
  18272. MOVB $0xfc, (AX)
  18273. MOVL BP, 1(AX)
  18274. ADDQ $0x05, AX
  18275. JMP memmove_repeat_emit_encodeSnappyBlockAsm8BAvx
  18276. four_bytes_repeat_emit_encodeSnappyBlockAsm8BAvx:
  18277. MOVL BP, R9
  18278. SHRL $0x10, R9
  18279. MOVB $0xf8, (AX)
  18280. MOVW BP, 1(AX)
  18281. MOVB R9, 3(AX)
  18282. ADDQ $0x04, AX
  18283. JMP memmove_repeat_emit_encodeSnappyBlockAsm8BAvx
  18284. three_bytes_repeat_emit_encodeSnappyBlockAsm8BAvx:
  18285. MOVB $0xf4, (AX)
  18286. MOVW BP, 1(AX)
  18287. ADDQ $0x03, AX
  18288. JMP memmove_repeat_emit_encodeSnappyBlockAsm8BAvx
  18289. two_bytes_repeat_emit_encodeSnappyBlockAsm8BAvx:
  18290. MOVB $0xf0, (AX)
  18291. MOVB BP, 1(AX)
  18292. ADDQ $0x02, AX
  18293. JMP memmove_repeat_emit_encodeSnappyBlockAsm8BAvx
  18294. one_byte_repeat_emit_encodeSnappyBlockAsm8BAvx:
  18295. SHLB $0x02, BP
  18296. MOVB BP, (AX)
  18297. ADDQ $0x01, AX
  18298. memmove_repeat_emit_encodeSnappyBlockAsm8BAvx:
  18299. LEAQ (AX)(DI*1), BP
  18300. NOP
  18301. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_tail:
  18302. TESTQ DI, DI
  18303. JEQ memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8BAvx
  18304. CMPQ DI, $0x02
  18305. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_1or2
  18306. CMPQ DI, $0x04
  18307. JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_3
  18308. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_4
  18309. CMPQ DI, $0x08
  18310. JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_5through7
  18311. JE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_8
  18312. CMPQ DI, $0x10
  18313. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_9through16
  18314. CMPQ DI, $0x20
  18315. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_17through32
  18316. CMPQ DI, $0x40
  18317. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_33through64
  18318. CMPQ DI, $0x80
  18319. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_65through128
  18320. CMPQ DI, $0x00000100
  18321. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_129through256
  18322. JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_avxUnaligned
  18323. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_1or2:
  18324. MOVB (R8), R9
  18325. MOVB -1(R8)(DI*1), R10
  18326. MOVB R9, (AX)
  18327. MOVB R10, -1(AX)(DI*1)
  18328. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8BAvx
  18329. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_4:
  18330. MOVL (R8), R9
  18331. MOVL R9, (AX)
  18332. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8BAvx
  18333. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_3:
  18334. MOVW (R8), R9
  18335. MOVB 2(R8), R10
  18336. MOVW R9, (AX)
  18337. MOVB R10, 2(AX)
  18338. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8BAvx
  18339. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_5through7:
  18340. MOVL (R8), R9
  18341. MOVL -4(R8)(DI*1), R10
  18342. MOVL R9, (AX)
  18343. MOVL R10, -4(AX)(DI*1)
  18344. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8BAvx
  18345. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_8:
  18346. MOVQ (R8), R9
  18347. MOVQ R9, (AX)
  18348. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8BAvx
  18349. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_9through16:
  18350. MOVQ (R8), R9
  18351. MOVQ -8(R8)(DI*1), R10
  18352. MOVQ R9, (AX)
  18353. MOVQ R10, -8(AX)(DI*1)
  18354. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8BAvx
  18355. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_17through32:
  18356. MOVOU (R8), X0
  18357. MOVOU -16(R8)(DI*1), X1
  18358. MOVOU X0, (AX)
  18359. MOVOU X1, -16(AX)(DI*1)
  18360. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8BAvx
  18361. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_33through64:
  18362. MOVOU (R8), X0
  18363. MOVOU 16(R8), X1
  18364. MOVOU -32(R8)(DI*1), X2
  18365. MOVOU -16(R8)(DI*1), X3
  18366. MOVOU X0, (AX)
  18367. MOVOU X1, 16(AX)
  18368. MOVOU X2, -32(AX)(DI*1)
  18369. MOVOU X3, -16(AX)(DI*1)
  18370. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8BAvx
  18371. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_65through128:
  18372. MOVOU (R8), X0
  18373. MOVOU 16(R8), X1
  18374. MOVOU 32(R8), X2
  18375. MOVOU 48(R8), X3
  18376. MOVOU -64(R8)(DI*1), X12
  18377. MOVOU -48(R8)(DI*1), X13
  18378. MOVOU -32(R8)(DI*1), X14
  18379. MOVOU -16(R8)(DI*1), X15
  18380. MOVOU X0, (AX)
  18381. MOVOU X1, 16(AX)
  18382. MOVOU X2, 32(AX)
  18383. MOVOU X3, 48(AX)
  18384. MOVOU X12, -64(AX)(DI*1)
  18385. MOVOU X13, -48(AX)(DI*1)
  18386. MOVOU X14, -32(AX)(DI*1)
  18387. MOVOU X15, -16(AX)(DI*1)
  18388. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8BAvx
  18389. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_129through256:
  18390. MOVOU (R8), X0
  18391. MOVOU 16(R8), X1
  18392. MOVOU 32(R8), X2
  18393. MOVOU 48(R8), X3
  18394. MOVOU 64(R8), X4
  18395. MOVOU 80(R8), X5
  18396. MOVOU 96(R8), X6
  18397. MOVOU 112(R8), X7
  18398. MOVOU -128(R8)(DI*1), X8
  18399. MOVOU -112(R8)(DI*1), X9
  18400. MOVOU -96(R8)(DI*1), X10
  18401. MOVOU -80(R8)(DI*1), X11
  18402. MOVOU -64(R8)(DI*1), X12
  18403. MOVOU -48(R8)(DI*1), X13
  18404. MOVOU -32(R8)(DI*1), X14
  18405. MOVOU -16(R8)(DI*1), X15
  18406. MOVOU X0, (AX)
  18407. MOVOU X1, 16(AX)
  18408. MOVOU X2, 32(AX)
  18409. MOVOU X3, 48(AX)
  18410. MOVOU X4, 64(AX)
  18411. MOVOU X5, 80(AX)
  18412. MOVOU X6, 96(AX)
  18413. MOVOU X7, 112(AX)
  18414. MOVOU X8, -128(AX)(DI*1)
  18415. MOVOU X9, -112(AX)(DI*1)
  18416. MOVOU X10, -96(AX)(DI*1)
  18417. MOVOU X11, -80(AX)(DI*1)
  18418. MOVOU X12, -64(AX)(DI*1)
  18419. MOVOU X13, -48(AX)(DI*1)
  18420. MOVOU X14, -32(AX)(DI*1)
  18421. MOVOU X15, -16(AX)(DI*1)
  18422. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8BAvx
  18423. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_256through2048:
  18424. LEAQ -256(DI), DI
  18425. MOVOU (R8), X0
  18426. MOVOU 16(R8), X1
  18427. MOVOU 32(R8), X2
  18428. MOVOU 48(R8), X3
  18429. MOVOU 64(R8), X4
  18430. MOVOU 80(R8), X5
  18431. MOVOU 96(R8), X6
  18432. MOVOU 112(R8), X7
  18433. MOVOU 128(R8), X8
  18434. MOVOU 144(R8), X9
  18435. MOVOU 160(R8), X10
  18436. MOVOU 176(R8), X11
  18437. MOVOU 192(R8), X12
  18438. MOVOU 208(R8), X13
  18439. MOVOU 224(R8), X14
  18440. MOVOU 240(R8), X15
  18441. MOVOU X0, (AX)
  18442. MOVOU X1, 16(AX)
  18443. MOVOU X2, 32(AX)
  18444. MOVOU X3, 48(AX)
  18445. MOVOU X4, 64(AX)
  18446. MOVOU X5, 80(AX)
  18447. MOVOU X6, 96(AX)
  18448. MOVOU X7, 112(AX)
  18449. MOVOU X8, 128(AX)
  18450. MOVOU X9, 144(AX)
  18451. MOVOU X10, 160(AX)
  18452. MOVOU X11, 176(AX)
  18453. MOVOU X12, 192(AX)
  18454. MOVOU X13, 208(AX)
  18455. MOVOU X14, 224(AX)
  18456. MOVOU X15, 240(AX)
  18457. CMPQ DI, $0x00000100
  18458. LEAQ 256(R8), R8
  18459. LEAQ 256(AX), AX
  18460. JGE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_256through2048
  18461. JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_tail
  18462. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_avxUnaligned:
  18463. LEAQ (R8)(DI*1), R10
  18464. MOVQ AX, R12
  18465. MOVOU -128(R10), X5
  18466. MOVOU -112(R10), X6
  18467. MOVQ $0x00000080, R9
  18468. ANDQ $0xffffffe0, AX
  18469. ADDQ $0x20, AX
  18470. MOVOU -96(R10), X7
  18471. MOVOU -80(R10), X8
  18472. MOVQ AX, R11
  18473. SUBQ R12, R11
  18474. MOVOU -64(R10), X9
  18475. MOVOU -48(R10), X10
  18476. SUBQ R11, DI
  18477. MOVOU -32(R10), X11
  18478. MOVOU -16(R10), X12
  18479. VMOVDQU (R8), Y4
  18480. ADDQ R11, R8
  18481. SUBQ R9, DI
  18482. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_gobble_128_loop:
  18483. VMOVDQU (R8), Y0
  18484. VMOVDQU 32(R8), Y1
  18485. VMOVDQU 64(R8), Y2
  18486. VMOVDQU 96(R8), Y3
  18487. ADDQ R9, R8
  18488. VMOVDQA Y0, (AX)
  18489. VMOVDQA Y1, 32(AX)
  18490. VMOVDQA Y2, 64(AX)
  18491. VMOVDQA Y3, 96(AX)
  18492. ADDQ R9, AX
  18493. SUBQ R9, DI
  18494. JA emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_gobble_128_loop
  18495. ADDQ R9, DI
  18496. ADDQ AX, DI
  18497. VMOVDQU Y4, (R12)
  18498. VZEROUPPER
  18499. MOVOU X5, -128(DI)
  18500. MOVOU X6, -112(DI)
  18501. MOVOU X7, -96(DI)
  18502. MOVOU X8, -80(DI)
  18503. MOVOU X9, -64(DI)
  18504. MOVOU X10, -48(DI)
  18505. MOVOU X11, -32(DI)
  18506. MOVOU X12, -16(DI)
  18507. memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8BAvx:
  18508. MOVQ BP, AX
  18509. emit_literal_done_repeat_emit_encodeSnappyBlockAsm8BAvx:
  18510. ADDL $0x05, CX
  18511. MOVL CX, BP
  18512. SUBL 16(SP), BP
  18513. MOVQ src_len+32(FP), DI
  18514. SUBL CX, DI
  18515. LEAQ (DX)(CX*1), R8
  18516. LEAQ (DX)(BP*1), BP
  18517. XORL R10, R10
  18518. CMPL DI, $0x08
  18519. JL matchlen_single_repeat_extend
  18520. matchlen_loopback_repeat_extend:
  18521. MOVQ (R8)(R10*1), R9
  18522. XORQ (BP)(R10*1), R9
  18523. TESTQ R9, R9
  18524. JZ matchlen_loop_repeat_extend
  18525. BSFQ R9, R9
  18526. SARQ $0x03, R9
  18527. LEAL (R10)(R9*1), R10
  18528. JMP repeat_extend_forward_end_encodeSnappyBlockAsm8BAvx
  18529. matchlen_loop_repeat_extend:
  18530. LEAL -8(DI), DI
  18531. LEAL 8(R10), R10
  18532. CMPL DI, $0x08
  18533. JGE matchlen_loopback_repeat_extend
  18534. matchlen_single_repeat_extend:
  18535. TESTL DI, DI
  18536. JZ repeat_extend_forward_end_encodeSnappyBlockAsm8BAvx
  18537. matchlen_single_loopback_repeat_extend:
  18538. MOVB (R8)(R10*1), R9
  18539. CMPB (BP)(R10*1), R9
  18540. JNE repeat_extend_forward_end_encodeSnappyBlockAsm8BAvx
  18541. LEAL 1(R10), R10
  18542. DECL DI
  18543. JNZ matchlen_single_loopback_repeat_extend
  18544. repeat_extend_forward_end_encodeSnappyBlockAsm8BAvx:
  18545. ADDL R10, CX
  18546. MOVL CX, BP
  18547. SUBL SI, BP
  18548. MOVL 16(SP), SI
  18549. CMPL SI, $0x00010000
  18550. JL two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8BAvx
  18551. four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm8BAvx:
  18552. CMPL BP, $0x40
  18553. JLE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm8BAvx
  18554. MOVB $0xff, (AX)
  18555. MOVL SI, 1(AX)
  18556. LEAL -64(BP), BP
  18557. ADDQ $0x05, AX
  18558. CMPL BP, $0x04
  18559. JL four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm8BAvx
  18560. JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm8BAvx
  18561. four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm8BAvx:
  18562. TESTL BP, BP
  18563. JZ repeat_end_emit_encodeSnappyBlockAsm8BAvx
  18564. MOVB $0x03, BL
  18565. LEAL -4(BX)(BP*4), BP
  18566. MOVB BP, (AX)
  18567. MOVL SI, 1(AX)
  18568. ADDQ $0x05, AX
  18569. JMP repeat_end_emit_encodeSnappyBlockAsm8BAvx
  18570. two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8BAvx:
  18571. CMPL BP, $0x40
  18572. JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8BAvx
  18573. MOVB $0xee, (AX)
  18574. MOVW SI, 1(AX)
  18575. LEAL -60(BP), BP
  18576. ADDQ $0x03, AX
  18577. JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8BAvx
  18578. two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8BAvx:
  18579. CMPL BP, $0x0c
  18580. JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8BAvx
  18581. CMPL SI, $0x00000800
  18582. JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8BAvx
  18583. MOVB $0x01, BL
  18584. LEAL -16(BX)(BP*4), BP
  18585. MOVB SI, 1(AX)
  18586. SHRL $0x08, SI
  18587. SHLL $0x05, SI
  18588. ORL SI, BP
  18589. MOVB BP, (AX)
  18590. ADDQ $0x02, AX
  18591. JMP repeat_end_emit_encodeSnappyBlockAsm8BAvx
  18592. emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8BAvx:
  18593. MOVB $0x02, BL
  18594. LEAL -4(BX)(BP*4), BP
  18595. MOVB BP, (AX)
  18596. MOVW SI, 1(AX)
  18597. ADDQ $0x03, AX
  18598. repeat_end_emit_encodeSnappyBlockAsm8BAvx:
  18599. MOVL CX, 12(SP)
  18600. CMPL CX, 8(SP)
  18601. JGE emit_remainder_encodeSnappyBlockAsm8BAvx
  18602. JMP search_loop_encodeSnappyBlockAsm8BAvx
  18603. no_repeat_found_encodeSnappyBlockAsm8BAvx:
  18604. CMPL (DX)(BP*1), SI
  18605. JEQ candidate_match_encodeSnappyBlockAsm8BAvx
  18606. SHRQ $0x08, SI
  18607. MOVL 24(SP)(R9*4), BP
  18608. LEAL 2(CX), R8
  18609. CMPL (DX)(DI*1), SI
  18610. JEQ candidate2_match_encodeSnappyBlockAsm8BAvx
  18611. MOVL R8, 24(SP)(R9*4)
  18612. SHRQ $0x08, SI
  18613. CMPL (DX)(BP*1), SI
  18614. JEQ candidate3_match_encodeSnappyBlockAsm8BAvx
  18615. MOVL 20(SP), CX
  18616. JMP search_loop_encodeSnappyBlockAsm8BAvx
  18617. candidate3_match_encodeSnappyBlockAsm8BAvx:
  18618. ADDL $0x02, CX
  18619. JMP candidate_match_encodeSnappyBlockAsm8BAvx
  18620. candidate2_match_encodeSnappyBlockAsm8BAvx:
  18621. MOVL R8, 24(SP)(R9*4)
  18622. INCL CX
  18623. MOVL DI, BP
  18624. candidate_match_encodeSnappyBlockAsm8BAvx:
  18625. MOVL 12(SP), SI
  18626. TESTL BP, BP
  18627. JZ match_extend_back_end_encodeSnappyBlockAsm8BAvx
  18628. match_extend_back_loop_encodeSnappyBlockAsm8BAvx:
  18629. CMPL CX, SI
  18630. JLE match_extend_back_end_encodeSnappyBlockAsm8BAvx
  18631. MOVB -1(DX)(BP*1), BL
  18632. MOVB -1(DX)(CX*1), DI
  18633. CMPB BL, DI
  18634. JNE match_extend_back_end_encodeSnappyBlockAsm8BAvx
  18635. LEAL -1(CX), CX
  18636. DECL BP
  18637. JZ match_extend_back_end_encodeSnappyBlockAsm8BAvx
  18638. JMP match_extend_back_loop_encodeSnappyBlockAsm8BAvx
  18639. match_extend_back_end_encodeSnappyBlockAsm8BAvx:
  18640. MOVL CX, SI
  18641. SUBL 12(SP), SI
  18642. LEAQ 4(AX)(SI*1), SI
  18643. CMPQ SI, (SP)
  18644. JL match_dst_size_check_encodeSnappyBlockAsm8BAvx
  18645. MOVQ $0x00000000, ret+48(FP)
  18646. RET
  18647. match_dst_size_check_encodeSnappyBlockAsm8BAvx:
  18648. MOVL CX, SI
  18649. MOVL 12(SP), DI
  18650. CMPL DI, SI
  18651. JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm8BAvx
  18652. MOVL SI, R8
  18653. MOVL SI, 12(SP)
  18654. LEAQ (DX)(DI*1), SI
  18655. SUBL DI, R8
  18656. MOVL R8, DI
  18657. SUBL $0x01, DI
  18658. JC emit_literal_done_match_emit_encodeSnappyBlockAsm8BAvx
  18659. CMPL DI, $0x3c
  18660. JLT one_byte_match_emit_encodeSnappyBlockAsm8BAvx
  18661. CMPL DI, $0x00000100
  18662. JLT two_bytes_match_emit_encodeSnappyBlockAsm8BAvx
  18663. CMPL DI, $0x00010000
  18664. JLT three_bytes_match_emit_encodeSnappyBlockAsm8BAvx
  18665. CMPL DI, $0x01000000
  18666. JLT four_bytes_match_emit_encodeSnappyBlockAsm8BAvx
  18667. MOVB $0xfc, (AX)
  18668. MOVL DI, 1(AX)
  18669. ADDQ $0x05, AX
  18670. JMP memmove_match_emit_encodeSnappyBlockAsm8BAvx
  18671. four_bytes_match_emit_encodeSnappyBlockAsm8BAvx:
  18672. MOVL DI, R9
  18673. SHRL $0x10, R9
  18674. MOVB $0xf8, (AX)
  18675. MOVW DI, 1(AX)
  18676. MOVB R9, 3(AX)
  18677. ADDQ $0x04, AX
  18678. JMP memmove_match_emit_encodeSnappyBlockAsm8BAvx
  18679. three_bytes_match_emit_encodeSnappyBlockAsm8BAvx:
  18680. MOVB $0xf4, (AX)
  18681. MOVW DI, 1(AX)
  18682. ADDQ $0x03, AX
  18683. JMP memmove_match_emit_encodeSnappyBlockAsm8BAvx
  18684. two_bytes_match_emit_encodeSnappyBlockAsm8BAvx:
  18685. MOVB $0xf0, (AX)
  18686. MOVB DI, 1(AX)
  18687. ADDQ $0x02, AX
  18688. JMP memmove_match_emit_encodeSnappyBlockAsm8BAvx
  18689. one_byte_match_emit_encodeSnappyBlockAsm8BAvx:
  18690. SHLB $0x02, DI
  18691. MOVB DI, (AX)
  18692. ADDQ $0x01, AX
  18693. memmove_match_emit_encodeSnappyBlockAsm8BAvx:
  18694. LEAQ (AX)(R8*1), DI
  18695. NOP
  18696. emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_tail:
  18697. TESTQ R8, R8
  18698. JEQ memmove_end_copy_match_emit_encodeSnappyBlockAsm8BAvx
  18699. CMPQ R8, $0x02
  18700. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_1or2
  18701. CMPQ R8, $0x04
  18702. JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_3
  18703. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_4
  18704. CMPQ R8, $0x08
  18705. JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_5through7
  18706. JE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_8
  18707. CMPQ R8, $0x10
  18708. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_9through16
  18709. CMPQ R8, $0x20
  18710. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_17through32
  18711. CMPQ R8, $0x40
  18712. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_33through64
  18713. CMPQ R8, $0x80
  18714. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_65through128
  18715. CMPQ R8, $0x00000100
  18716. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_129through256
  18717. JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_avxUnaligned
  18718. emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_1or2:
  18719. MOVB (SI), R9
  18720. MOVB -1(SI)(R8*1), R10
  18721. MOVB R9, (AX)
  18722. MOVB R10, -1(AX)(R8*1)
  18723. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8BAvx
  18724. emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_4:
  18725. MOVL (SI), R9
  18726. MOVL R9, (AX)
  18727. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8BAvx
  18728. emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_3:
  18729. MOVW (SI), R9
  18730. MOVB 2(SI), R10
  18731. MOVW R9, (AX)
  18732. MOVB R10, 2(AX)
  18733. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8BAvx
  18734. emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_5through7:
  18735. MOVL (SI), R9
  18736. MOVL -4(SI)(R8*1), R10
  18737. MOVL R9, (AX)
  18738. MOVL R10, -4(AX)(R8*1)
  18739. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8BAvx
  18740. emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_8:
  18741. MOVQ (SI), R9
  18742. MOVQ R9, (AX)
  18743. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8BAvx
  18744. emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_9through16:
  18745. MOVQ (SI), R9
  18746. MOVQ -8(SI)(R8*1), R10
  18747. MOVQ R9, (AX)
  18748. MOVQ R10, -8(AX)(R8*1)
  18749. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8BAvx
  18750. emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_17through32:
  18751. MOVOU (SI), X0
  18752. MOVOU -16(SI)(R8*1), X1
  18753. MOVOU X0, (AX)
  18754. MOVOU X1, -16(AX)(R8*1)
  18755. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8BAvx
  18756. emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_33through64:
  18757. MOVOU (SI), X0
  18758. MOVOU 16(SI), X1
  18759. MOVOU -32(SI)(R8*1), X2
  18760. MOVOU -16(SI)(R8*1), X3
  18761. MOVOU X0, (AX)
  18762. MOVOU X1, 16(AX)
  18763. MOVOU X2, -32(AX)(R8*1)
  18764. MOVOU X3, -16(AX)(R8*1)
  18765. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8BAvx
  18766. emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_65through128:
  18767. MOVOU (SI), X0
  18768. MOVOU 16(SI), X1
  18769. MOVOU 32(SI), X2
  18770. MOVOU 48(SI), X3
  18771. MOVOU -64(SI)(R8*1), X12
  18772. MOVOU -48(SI)(R8*1), X13
  18773. MOVOU -32(SI)(R8*1), X14
  18774. MOVOU -16(SI)(R8*1), X15
  18775. MOVOU X0, (AX)
  18776. MOVOU X1, 16(AX)
  18777. MOVOU X2, 32(AX)
  18778. MOVOU X3, 48(AX)
  18779. MOVOU X12, -64(AX)(R8*1)
  18780. MOVOU X13, -48(AX)(R8*1)
  18781. MOVOU X14, -32(AX)(R8*1)
  18782. MOVOU X15, -16(AX)(R8*1)
  18783. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8BAvx
  18784. emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_129through256:
  18785. MOVOU (SI), X0
  18786. MOVOU 16(SI), X1
  18787. MOVOU 32(SI), X2
  18788. MOVOU 48(SI), X3
  18789. MOVOU 64(SI), X4
  18790. MOVOU 80(SI), X5
  18791. MOVOU 96(SI), X6
  18792. MOVOU 112(SI), X7
  18793. MOVOU -128(SI)(R8*1), X8
  18794. MOVOU -112(SI)(R8*1), X9
  18795. MOVOU -96(SI)(R8*1), X10
  18796. MOVOU -80(SI)(R8*1), X11
  18797. MOVOU -64(SI)(R8*1), X12
  18798. MOVOU -48(SI)(R8*1), X13
  18799. MOVOU -32(SI)(R8*1), X14
  18800. MOVOU -16(SI)(R8*1), X15
  18801. MOVOU X0, (AX)
  18802. MOVOU X1, 16(AX)
  18803. MOVOU X2, 32(AX)
  18804. MOVOU X3, 48(AX)
  18805. MOVOU X4, 64(AX)
  18806. MOVOU X5, 80(AX)
  18807. MOVOU X6, 96(AX)
  18808. MOVOU X7, 112(AX)
  18809. MOVOU X8, -128(AX)(R8*1)
  18810. MOVOU X9, -112(AX)(R8*1)
  18811. MOVOU X10, -96(AX)(R8*1)
  18812. MOVOU X11, -80(AX)(R8*1)
  18813. MOVOU X12, -64(AX)(R8*1)
  18814. MOVOU X13, -48(AX)(R8*1)
  18815. MOVOU X14, -32(AX)(R8*1)
  18816. MOVOU X15, -16(AX)(R8*1)
  18817. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8BAvx
  18818. emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_256through2048:
  18819. LEAQ -256(R8), R8
  18820. MOVOU (SI), X0
  18821. MOVOU 16(SI), X1
  18822. MOVOU 32(SI), X2
  18823. MOVOU 48(SI), X3
  18824. MOVOU 64(SI), X4
  18825. MOVOU 80(SI), X5
  18826. MOVOU 96(SI), X6
  18827. MOVOU 112(SI), X7
  18828. MOVOU 128(SI), X8
  18829. MOVOU 144(SI), X9
  18830. MOVOU 160(SI), X10
  18831. MOVOU 176(SI), X11
  18832. MOVOU 192(SI), X12
  18833. MOVOU 208(SI), X13
  18834. MOVOU 224(SI), X14
  18835. MOVOU 240(SI), X15
  18836. MOVOU X0, (AX)
  18837. MOVOU X1, 16(AX)
  18838. MOVOU X2, 32(AX)
  18839. MOVOU X3, 48(AX)
  18840. MOVOU X4, 64(AX)
  18841. MOVOU X5, 80(AX)
  18842. MOVOU X6, 96(AX)
  18843. MOVOU X7, 112(AX)
  18844. MOVOU X8, 128(AX)
  18845. MOVOU X9, 144(AX)
  18846. MOVOU X10, 160(AX)
  18847. MOVOU X11, 176(AX)
  18848. MOVOU X12, 192(AX)
  18849. MOVOU X13, 208(AX)
  18850. MOVOU X14, 224(AX)
  18851. MOVOU X15, 240(AX)
  18852. CMPQ R8, $0x00000100
  18853. LEAQ 256(SI), SI
  18854. LEAQ 256(AX), AX
  18855. JGE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_256through2048
  18856. JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_tail
  18857. emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_avxUnaligned:
  18858. LEAQ (SI)(R8*1), R10
  18859. MOVQ AX, R12
  18860. MOVOU -128(R10), X5
  18861. MOVOU -112(R10), X6
  18862. MOVQ $0x00000080, R9
  18863. ANDQ $0xffffffe0, AX
  18864. ADDQ $0x20, AX
  18865. MOVOU -96(R10), X7
  18866. MOVOU -80(R10), X8
  18867. MOVQ AX, R11
  18868. SUBQ R12, R11
  18869. MOVOU -64(R10), X9
  18870. MOVOU -48(R10), X10
  18871. SUBQ R11, R8
  18872. MOVOU -32(R10), X11
  18873. MOVOU -16(R10), X12
  18874. VMOVDQU (SI), Y4
  18875. ADDQ R11, SI
  18876. SUBQ R9, R8
  18877. emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_gobble_128_loop:
  18878. VMOVDQU (SI), Y0
  18879. VMOVDQU 32(SI), Y1
  18880. VMOVDQU 64(SI), Y2
  18881. VMOVDQU 96(SI), Y3
  18882. ADDQ R9, SI
  18883. VMOVDQA Y0, (AX)
  18884. VMOVDQA Y1, 32(AX)
  18885. VMOVDQA Y2, 64(AX)
  18886. VMOVDQA Y3, 96(AX)
  18887. ADDQ R9, AX
  18888. SUBQ R9, R8
  18889. JA emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_gobble_128_loop
  18890. ADDQ R9, R8
  18891. ADDQ AX, R8
  18892. VMOVDQU Y4, (R12)
  18893. VZEROUPPER
  18894. MOVOU X5, -128(R8)
  18895. MOVOU X6, -112(R8)
  18896. MOVOU X7, -96(R8)
  18897. MOVOU X8, -80(R8)
  18898. MOVOU X9, -64(R8)
  18899. MOVOU X10, -48(R8)
  18900. MOVOU X11, -32(R8)
  18901. MOVOU X12, -16(R8)
  18902. memmove_end_copy_match_emit_encodeSnappyBlockAsm8BAvx:
  18903. MOVQ DI, AX
  18904. emit_literal_done_match_emit_encodeSnappyBlockAsm8BAvx:
  18905. match_nolit_loop_encodeSnappyBlockAsm8BAvx:
  18906. MOVL CX, SI
  18907. SUBL BP, SI
  18908. MOVL SI, 16(SP)
  18909. ADDL $0x04, CX
  18910. ADDL $0x04, BP
  18911. MOVQ src_len+32(FP), SI
  18912. SUBL CX, SI
  18913. LEAQ (DX)(CX*1), DI
  18914. LEAQ (DX)(BP*1), BP
  18915. XORL R9, R9
  18916. CMPL SI, $0x08
  18917. JL matchlen_single_match_nolit_encodeSnappyBlockAsm8BAvx
  18918. matchlen_loopback_match_nolit_encodeSnappyBlockAsm8BAvx:
  18919. MOVQ (DI)(R9*1), R8
  18920. XORQ (BP)(R9*1), R8
  18921. TESTQ R8, R8
  18922. JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm8BAvx
  18923. BSFQ R8, R8
  18924. SARQ $0x03, R8
  18925. LEAL (R9)(R8*1), R9
  18926. JMP match_nolit_end_encodeSnappyBlockAsm8BAvx
  18927. matchlen_loop_match_nolit_encodeSnappyBlockAsm8BAvx:
  18928. LEAL -8(SI), SI
  18929. LEAL 8(R9), R9
  18930. CMPL SI, $0x08
  18931. JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm8BAvx
  18932. matchlen_single_match_nolit_encodeSnappyBlockAsm8BAvx:
  18933. TESTL SI, SI
  18934. JZ match_nolit_end_encodeSnappyBlockAsm8BAvx
  18935. matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm8BAvx:
  18936. MOVB (DI)(R9*1), R8
  18937. CMPB (BP)(R9*1), R8
  18938. JNE match_nolit_end_encodeSnappyBlockAsm8BAvx
  18939. LEAL 1(R9), R9
  18940. DECL SI
  18941. JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm8BAvx
  18942. match_nolit_end_encodeSnappyBlockAsm8BAvx:
  18943. ADDL R9, CX
  18944. MOVL 16(SP), BP
  18945. ADDL $0x04, R9
  18946. CMPL BP, $0x00010000
  18947. JL two_byte_offset_match_nolit_encodeSnappyBlockAsm8BAvx
  18948. four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm8BAvx:
  18949. CMPL R9, $0x40
  18950. JLE four_bytes_remain_match_nolit_encodeSnappyBlockAsm8BAvx
  18951. MOVB $0xff, (AX)
  18952. MOVL BP, 1(AX)
  18953. LEAL -64(R9), R9
  18954. ADDQ $0x05, AX
  18955. CMPL R9, $0x04
  18956. JL four_bytes_remain_match_nolit_encodeSnappyBlockAsm8BAvx
  18957. JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm8BAvx
  18958. four_bytes_remain_match_nolit_encodeSnappyBlockAsm8BAvx:
  18959. TESTL R9, R9
  18960. JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm8BAvx
  18961. MOVB $0x03, BL
  18962. LEAL -4(BX)(R9*4), R9
  18963. MOVB R9, (AX)
  18964. MOVL BP, 1(AX)
  18965. ADDQ $0x05, AX
  18966. JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm8BAvx
  18967. two_byte_offset_match_nolit_encodeSnappyBlockAsm8BAvx:
  18968. CMPL R9, $0x40
  18969. JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8BAvx
  18970. MOVB $0xee, (AX)
  18971. MOVW BP, 1(AX)
  18972. LEAL -60(R9), R9
  18973. ADDQ $0x03, AX
  18974. JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm8BAvx
  18975. two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8BAvx:
  18976. CMPL R9, $0x0c
  18977. JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm8BAvx
  18978. CMPL BP, $0x00000800
  18979. JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm8BAvx
  18980. MOVB $0x01, BL
  18981. LEAL -16(BX)(R9*4), R9
  18982. MOVB BP, 1(AX)
  18983. SHRL $0x08, BP
  18984. SHLL $0x05, BP
  18985. ORL BP, R9
  18986. MOVB R9, (AX)
  18987. ADDQ $0x02, AX
  18988. JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm8BAvx
  18989. emit_copy_three_match_nolit_encodeSnappyBlockAsm8BAvx:
  18990. MOVB $0x02, BL
  18991. LEAL -4(BX)(R9*4), R9
  18992. MOVB R9, (AX)
  18993. MOVW BP, 1(AX)
  18994. ADDQ $0x03, AX
  18995. match_nolit_emitcopy_end_encodeSnappyBlockAsm8BAvx:
  18996. MOVL CX, 12(SP)
  18997. CMPL CX, 8(SP)
  18998. JGE emit_remainder_encodeSnappyBlockAsm8BAvx
  18999. CMPQ AX, (SP)
  19000. JL match_nolit_dst_ok_encodeSnappyBlockAsm8BAvx
  19001. MOVQ $0x00000000, ret+48(FP)
  19002. RET
  19003. match_nolit_dst_ok_encodeSnappyBlockAsm8BAvx:
  19004. MOVQ -2(DX)(CX*1), SI
  19005. MOVQ $0x9e3779b1, BP
  19006. MOVQ SI, DI
  19007. SHRQ $0x10, SI
  19008. MOVQ SI, R8
  19009. SHLQ $0x20, DI
  19010. IMULQ BP, DI
  19011. SHRQ $0x38, DI
  19012. SHLQ $0x20, R8
  19013. IMULQ BP, R8
  19014. SHRQ $0x38, R8
  19015. LEAL -2(CX), R9
  19016. MOVL 24(SP)(R8*4), BP
  19017. MOVL R9, 24(SP)(DI*4)
  19018. MOVL CX, 24(SP)(R8*4)
  19019. CMPL (DX)(BP*1), SI
  19020. JEQ match_nolit_loop_encodeSnappyBlockAsm8BAvx
  19021. INCL CX
  19022. JMP search_loop_encodeSnappyBlockAsm8BAvx
  19023. emit_remainder_encodeSnappyBlockAsm8BAvx:
  19024. MOVQ src_len+32(FP), CX
  19025. SUBL 12(SP), CX
  19026. LEAQ 4(AX)(CX*1), CX
  19027. CMPQ CX, (SP)
  19028. JL emit_remainder_ok_encodeSnappyBlockAsm8BAvx
  19029. MOVQ $0x00000000, ret+48(FP)
  19030. RET
  19031. emit_remainder_ok_encodeSnappyBlockAsm8BAvx:
  19032. MOVQ src_len+32(FP), CX
  19033. MOVL 12(SP), BX
  19034. CMPL BX, CX
  19035. JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm8BAvx
  19036. MOVL CX, BP
  19037. MOVL CX, 12(SP)
  19038. LEAQ (DX)(BX*1), CX
  19039. SUBL BX, BP
  19040. MOVL BP, DX
  19041. SUBL $0x01, DX
  19042. JC emit_literal_done_emit_remainder_encodeSnappyBlockAsm8BAvx
  19043. CMPL DX, $0x3c
  19044. JLT one_byte_emit_remainder_encodeSnappyBlockAsm8BAvx
  19045. CMPL DX, $0x00000100
  19046. JLT two_bytes_emit_remainder_encodeSnappyBlockAsm8BAvx
  19047. CMPL DX, $0x00010000
  19048. JLT three_bytes_emit_remainder_encodeSnappyBlockAsm8BAvx
  19049. CMPL DX, $0x01000000
  19050. JLT four_bytes_emit_remainder_encodeSnappyBlockAsm8BAvx
  19051. MOVB $0xfc, (AX)
  19052. MOVL DX, 1(AX)
  19053. ADDQ $0x05, AX
  19054. JMP memmove_emit_remainder_encodeSnappyBlockAsm8BAvx
  19055. four_bytes_emit_remainder_encodeSnappyBlockAsm8BAvx:
  19056. MOVL DX, BX
  19057. SHRL $0x10, BX
  19058. MOVB $0xf8, (AX)
  19059. MOVW DX, 1(AX)
  19060. MOVB BL, 3(AX)
  19061. ADDQ $0x04, AX
  19062. JMP memmove_emit_remainder_encodeSnappyBlockAsm8BAvx
  19063. three_bytes_emit_remainder_encodeSnappyBlockAsm8BAvx:
  19064. MOVB $0xf4, (AX)
  19065. MOVW DX, 1(AX)
  19066. ADDQ $0x03, AX
  19067. JMP memmove_emit_remainder_encodeSnappyBlockAsm8BAvx
  19068. two_bytes_emit_remainder_encodeSnappyBlockAsm8BAvx:
  19069. MOVB $0xf0, (AX)
  19070. MOVB DL, 1(AX)
  19071. ADDQ $0x02, AX
  19072. JMP memmove_emit_remainder_encodeSnappyBlockAsm8BAvx
  19073. one_byte_emit_remainder_encodeSnappyBlockAsm8BAvx:
  19074. SHLB $0x02, DL
  19075. MOVB DL, (AX)
  19076. ADDQ $0x01, AX
  19077. memmove_emit_remainder_encodeSnappyBlockAsm8BAvx:
  19078. LEAQ (AX)(BP*1), DX
  19079. MOVL BP, BX
  19080. NOP
  19081. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_tail:
  19082. TESTQ BX, BX
  19083. JEQ memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8BAvx
  19084. CMPQ BX, $0x02
  19085. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_1or2
  19086. CMPQ BX, $0x04
  19087. JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_3
  19088. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_4
  19089. CMPQ BX, $0x08
  19090. JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_5through7
  19091. JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_8
  19092. CMPQ BX, $0x10
  19093. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_9through16
  19094. CMPQ BX, $0x20
  19095. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_17through32
  19096. CMPQ BX, $0x40
  19097. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_33through64
  19098. CMPQ BX, $0x80
  19099. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_65through128
  19100. CMPQ BX, $0x00000100
  19101. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_129through256
  19102. JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_avxUnaligned
  19103. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_1or2:
  19104. MOVB (CX), BP
  19105. MOVB -1(CX)(BX*1), SI
  19106. MOVB BP, (AX)
  19107. MOVB SI, -1(AX)(BX*1)
  19108. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8BAvx
  19109. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_4:
  19110. MOVL (CX), BP
  19111. MOVL BP, (AX)
  19112. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8BAvx
  19113. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_3:
  19114. MOVW (CX), BP
  19115. MOVB 2(CX), SI
  19116. MOVW BP, (AX)
  19117. MOVB SI, 2(AX)
  19118. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8BAvx
  19119. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_5through7:
  19120. MOVL (CX), BP
  19121. MOVL -4(CX)(BX*1), SI
  19122. MOVL BP, (AX)
  19123. MOVL SI, -4(AX)(BX*1)
  19124. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8BAvx
  19125. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_8:
  19126. MOVQ (CX), BP
  19127. MOVQ BP, (AX)
  19128. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8BAvx
  19129. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_9through16:
  19130. MOVQ (CX), BP
  19131. MOVQ -8(CX)(BX*1), SI
  19132. MOVQ BP, (AX)
  19133. MOVQ SI, -8(AX)(BX*1)
  19134. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8BAvx
  19135. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_17through32:
  19136. MOVOU (CX), X0
  19137. MOVOU -16(CX)(BX*1), X1
  19138. MOVOU X0, (AX)
  19139. MOVOU X1, -16(AX)(BX*1)
  19140. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8BAvx
  19141. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_33through64:
  19142. MOVOU (CX), X0
  19143. MOVOU 16(CX), X1
  19144. MOVOU -32(CX)(BX*1), X2
  19145. MOVOU -16(CX)(BX*1), X3
  19146. MOVOU X0, (AX)
  19147. MOVOU X1, 16(AX)
  19148. MOVOU X2, -32(AX)(BX*1)
  19149. MOVOU X3, -16(AX)(BX*1)
  19150. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8BAvx
  19151. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_65through128:
  19152. MOVOU (CX), X0
  19153. MOVOU 16(CX), X1
  19154. MOVOU 32(CX), X2
  19155. MOVOU 48(CX), X3
  19156. MOVOU -64(CX)(BX*1), X12
  19157. MOVOU -48(CX)(BX*1), X13
  19158. MOVOU -32(CX)(BX*1), X14
  19159. MOVOU -16(CX)(BX*1), X15
  19160. MOVOU X0, (AX)
  19161. MOVOU X1, 16(AX)
  19162. MOVOU X2, 32(AX)
  19163. MOVOU X3, 48(AX)
  19164. MOVOU X12, -64(AX)(BX*1)
  19165. MOVOU X13, -48(AX)(BX*1)
  19166. MOVOU X14, -32(AX)(BX*1)
  19167. MOVOU X15, -16(AX)(BX*1)
  19168. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8BAvx
  19169. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_129through256:
  19170. MOVOU (CX), X0
  19171. MOVOU 16(CX), X1
  19172. MOVOU 32(CX), X2
  19173. MOVOU 48(CX), X3
  19174. MOVOU 64(CX), X4
  19175. MOVOU 80(CX), X5
  19176. MOVOU 96(CX), X6
  19177. MOVOU 112(CX), X7
  19178. MOVOU -128(CX)(BX*1), X8
  19179. MOVOU -112(CX)(BX*1), X9
  19180. MOVOU -96(CX)(BX*1), X10
  19181. MOVOU -80(CX)(BX*1), X11
  19182. MOVOU -64(CX)(BX*1), X12
  19183. MOVOU -48(CX)(BX*1), X13
  19184. MOVOU -32(CX)(BX*1), X14
  19185. MOVOU -16(CX)(BX*1), X15
  19186. MOVOU X0, (AX)
  19187. MOVOU X1, 16(AX)
  19188. MOVOU X2, 32(AX)
  19189. MOVOU X3, 48(AX)
  19190. MOVOU X4, 64(AX)
  19191. MOVOU X5, 80(AX)
  19192. MOVOU X6, 96(AX)
  19193. MOVOU X7, 112(AX)
  19194. MOVOU X8, -128(AX)(BX*1)
  19195. MOVOU X9, -112(AX)(BX*1)
  19196. MOVOU X10, -96(AX)(BX*1)
  19197. MOVOU X11, -80(AX)(BX*1)
  19198. MOVOU X12, -64(AX)(BX*1)
  19199. MOVOU X13, -48(AX)(BX*1)
  19200. MOVOU X14, -32(AX)(BX*1)
  19201. MOVOU X15, -16(AX)(BX*1)
  19202. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8BAvx
  19203. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_256through2048:
  19204. LEAQ -256(BX), BX
  19205. MOVOU (CX), X0
  19206. MOVOU 16(CX), X1
  19207. MOVOU 32(CX), X2
  19208. MOVOU 48(CX), X3
  19209. MOVOU 64(CX), X4
  19210. MOVOU 80(CX), X5
  19211. MOVOU 96(CX), X6
  19212. MOVOU 112(CX), X7
  19213. MOVOU 128(CX), X8
  19214. MOVOU 144(CX), X9
  19215. MOVOU 160(CX), X10
  19216. MOVOU 176(CX), X11
  19217. MOVOU 192(CX), X12
  19218. MOVOU 208(CX), X13
  19219. MOVOU 224(CX), X14
  19220. MOVOU 240(CX), X15
  19221. MOVOU X0, (AX)
  19222. MOVOU X1, 16(AX)
  19223. MOVOU X2, 32(AX)
  19224. MOVOU X3, 48(AX)
  19225. MOVOU X4, 64(AX)
  19226. MOVOU X5, 80(AX)
  19227. MOVOU X6, 96(AX)
  19228. MOVOU X7, 112(AX)
  19229. MOVOU X8, 128(AX)
  19230. MOVOU X9, 144(AX)
  19231. MOVOU X10, 160(AX)
  19232. MOVOU X11, 176(AX)
  19233. MOVOU X12, 192(AX)
  19234. MOVOU X13, 208(AX)
  19235. MOVOU X14, 224(AX)
  19236. MOVOU X15, 240(AX)
  19237. CMPQ BX, $0x00000100
  19238. LEAQ 256(CX), CX
  19239. LEAQ 256(AX), AX
  19240. JGE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_256through2048
  19241. JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_tail
  19242. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_avxUnaligned:
  19243. LEAQ (CX)(BX*1), SI
  19244. MOVQ AX, R8
  19245. MOVOU -128(SI), X5
  19246. MOVOU -112(SI), X6
  19247. MOVQ $0x00000080, BP
  19248. ANDQ $0xffffffe0, AX
  19249. ADDQ $0x20, AX
  19250. MOVOU -96(SI), X7
  19251. MOVOU -80(SI), X8
  19252. MOVQ AX, DI
  19253. SUBQ R8, DI
  19254. MOVOU -64(SI), X9
  19255. MOVOU -48(SI), X10
  19256. SUBQ DI, BX
  19257. MOVOU -32(SI), X11
  19258. MOVOU -16(SI), X12
  19259. VMOVDQU (CX), Y4
  19260. ADDQ DI, CX
  19261. SUBQ BP, BX
  19262. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_gobble_128_loop:
  19263. VMOVDQU (CX), Y0
  19264. VMOVDQU 32(CX), Y1
  19265. VMOVDQU 64(CX), Y2
  19266. VMOVDQU 96(CX), Y3
  19267. ADDQ BP, CX
  19268. VMOVDQA Y0, (AX)
  19269. VMOVDQA Y1, 32(AX)
  19270. VMOVDQA Y2, 64(AX)
  19271. VMOVDQA Y3, 96(AX)
  19272. ADDQ BP, AX
  19273. SUBQ BP, BX
  19274. JA emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_gobble_128_loop
  19275. ADDQ BP, BX
  19276. ADDQ AX, BX
  19277. VMOVDQU Y4, (R8)
  19278. VZEROUPPER
  19279. MOVOU X5, -128(BX)
  19280. MOVOU X6, -112(BX)
  19281. MOVOU X7, -96(BX)
  19282. MOVOU X8, -80(BX)
  19283. MOVOU X9, -64(BX)
  19284. MOVOU X10, -48(BX)
  19285. MOVOU X11, -32(BX)
  19286. MOVOU X12, -16(BX)
  19287. memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8BAvx:
  19288. MOVQ DX, AX
  19289. emit_literal_done_emit_remainder_encodeSnappyBlockAsm8BAvx:
  19290. MOVQ dst_base+0(FP), CX
  19291. SUBQ CX, AX
  19292. MOVQ AX, ret+48(FP)
  19293. RET
  19294. // func emitLiteral(dst []byte, lit []byte) int
  19295. // Requires: SSE2
  19296. TEXT ·emitLiteral(SB), NOSPLIT, $0-56
  19297. MOVQ dst_base+0(FP), AX
  19298. MOVQ lit_base+24(FP), CX
  19299. MOVQ lit_len+32(FP), DX
  19300. MOVL DX, BX
  19301. MOVL DX, BP
  19302. SUBL $0x01, BP
  19303. JC emit_literal_end_standalone
  19304. CMPL BP, $0x3c
  19305. JLT one_byte_standalone
  19306. CMPL BP, $0x00000100
  19307. JLT two_bytes_standalone
  19308. CMPL BP, $0x00010000
  19309. JLT three_bytes_standalone
  19310. CMPL BP, $0x01000000
  19311. JLT four_bytes_standalone
  19312. MOVB $0xfc, (AX)
  19313. MOVL BP, 1(AX)
  19314. ADDQ $0x05, BX
  19315. ADDQ $0x05, AX
  19316. JMP memmove_standalone
  19317. four_bytes_standalone:
  19318. MOVL BP, SI
  19319. SHRL $0x10, SI
  19320. MOVB $0xf8, (AX)
  19321. MOVW BP, 1(AX)
  19322. MOVB SI, 3(AX)
  19323. ADDQ $0x04, BX
  19324. ADDQ $0x04, AX
  19325. JMP memmove_standalone
  19326. three_bytes_standalone:
  19327. MOVB $0xf4, (AX)
  19328. MOVW BP, 1(AX)
  19329. ADDQ $0x03, BX
  19330. ADDQ $0x03, AX
  19331. JMP memmove_standalone
  19332. two_bytes_standalone:
  19333. MOVB $0xf0, (AX)
  19334. MOVB BP, 1(AX)
  19335. ADDQ $0x02, BX
  19336. ADDQ $0x02, AX
  19337. JMP memmove_standalone
  19338. one_byte_standalone:
  19339. SHLB $0x02, BP
  19340. MOVB BP, (AX)
  19341. ADDQ $0x01, BX
  19342. ADDQ $0x01, AX
  19343. memmove_standalone:
  19344. NOP
  19345. emit_lit_memmove_standalone_memmove_tail:
  19346. TESTQ DX, DX
  19347. JEQ emit_literal_end_standalone
  19348. CMPQ DX, $0x02
  19349. JBE emit_lit_memmove_standalone_memmove_move_1or2
  19350. CMPQ DX, $0x04
  19351. JB emit_lit_memmove_standalone_memmove_move_3
  19352. JBE emit_lit_memmove_standalone_memmove_move_4
  19353. CMPQ DX, $0x08
  19354. JB emit_lit_memmove_standalone_memmove_move_5through7
  19355. JE emit_lit_memmove_standalone_memmove_move_8
  19356. CMPQ DX, $0x10
  19357. JBE emit_lit_memmove_standalone_memmove_move_9through16
  19358. CMPQ DX, $0x20
  19359. JBE emit_lit_memmove_standalone_memmove_move_17through32
  19360. CMPQ DX, $0x40
  19361. JBE emit_lit_memmove_standalone_memmove_move_33through64
  19362. CMPQ DX, $0x80
  19363. JBE emit_lit_memmove_standalone_memmove_move_65through128
  19364. CMPQ DX, $0x00000100
  19365. JBE emit_lit_memmove_standalone_memmove_move_129through256
  19366. JMP emit_lit_memmove_standalone_memmove_move_256through2048
  19367. emit_lit_memmove_standalone_memmove_move_1or2:
  19368. MOVB (CX), BP
  19369. MOVB -1(CX)(DX*1), CL
  19370. MOVB BP, (AX)
  19371. MOVB CL, -1(AX)(DX*1)
  19372. JMP emit_literal_end_standalone
  19373. emit_lit_memmove_standalone_memmove_move_4:
  19374. MOVL (CX), BP
  19375. MOVL BP, (AX)
  19376. JMP emit_literal_end_standalone
  19377. emit_lit_memmove_standalone_memmove_move_3:
  19378. MOVW (CX), BP
  19379. MOVB 2(CX), CL
  19380. MOVW BP, (AX)
  19381. MOVB CL, 2(AX)
  19382. JMP emit_literal_end_standalone
  19383. emit_lit_memmove_standalone_memmove_move_5through7:
  19384. MOVL (CX), BP
  19385. MOVL -4(CX)(DX*1), CX
  19386. MOVL BP, (AX)
  19387. MOVL CX, -4(AX)(DX*1)
  19388. JMP emit_literal_end_standalone
  19389. emit_lit_memmove_standalone_memmove_move_8:
  19390. MOVQ (CX), BP
  19391. MOVQ BP, (AX)
  19392. JMP emit_literal_end_standalone
  19393. emit_lit_memmove_standalone_memmove_move_9through16:
  19394. MOVQ (CX), BP
  19395. MOVQ -8(CX)(DX*1), CX
  19396. MOVQ BP, (AX)
  19397. MOVQ CX, -8(AX)(DX*1)
  19398. JMP emit_literal_end_standalone
  19399. emit_lit_memmove_standalone_memmove_move_17through32:
  19400. MOVOU (CX), X0
  19401. MOVOU -16(CX)(DX*1), X1
  19402. MOVOU X0, (AX)
  19403. MOVOU X1, -16(AX)(DX*1)
  19404. JMP emit_literal_end_standalone
  19405. emit_lit_memmove_standalone_memmove_move_33through64:
  19406. MOVOU (CX), X0
  19407. MOVOU 16(CX), X1
  19408. MOVOU -32(CX)(DX*1), X2
  19409. MOVOU -16(CX)(DX*1), X3
  19410. MOVOU X0, (AX)
  19411. MOVOU X1, 16(AX)
  19412. MOVOU X2, -32(AX)(DX*1)
  19413. MOVOU X3, -16(AX)(DX*1)
  19414. JMP emit_literal_end_standalone
  19415. emit_lit_memmove_standalone_memmove_move_65through128:
  19416. MOVOU (CX), X0
  19417. MOVOU 16(CX), X1
  19418. MOVOU 32(CX), X2
  19419. MOVOU 48(CX), X3
  19420. MOVOU -64(CX)(DX*1), X12
  19421. MOVOU -48(CX)(DX*1), X13
  19422. MOVOU -32(CX)(DX*1), X14
  19423. MOVOU -16(CX)(DX*1), X15
  19424. MOVOU X0, (AX)
  19425. MOVOU X1, 16(AX)
  19426. MOVOU X2, 32(AX)
  19427. MOVOU X3, 48(AX)
  19428. MOVOU X12, -64(AX)(DX*1)
  19429. MOVOU X13, -48(AX)(DX*1)
  19430. MOVOU X14, -32(AX)(DX*1)
  19431. MOVOU X15, -16(AX)(DX*1)
  19432. JMP emit_literal_end_standalone
  19433. emit_lit_memmove_standalone_memmove_move_129through256:
  19434. MOVOU (CX), X0
  19435. MOVOU 16(CX), X1
  19436. MOVOU 32(CX), X2
  19437. MOVOU 48(CX), X3
  19438. MOVOU 64(CX), X4
  19439. MOVOU 80(CX), X5
  19440. MOVOU 96(CX), X6
  19441. MOVOU 112(CX), X7
  19442. MOVOU -128(CX)(DX*1), X8
  19443. MOVOU -112(CX)(DX*1), X9
  19444. MOVOU -96(CX)(DX*1), X10
  19445. MOVOU -80(CX)(DX*1), X11
  19446. MOVOU -64(CX)(DX*1), X12
  19447. MOVOU -48(CX)(DX*1), X13
  19448. MOVOU -32(CX)(DX*1), X14
  19449. MOVOU -16(CX)(DX*1), X15
  19450. MOVOU X0, (AX)
  19451. MOVOU X1, 16(AX)
  19452. MOVOU X2, 32(AX)
  19453. MOVOU X3, 48(AX)
  19454. MOVOU X4, 64(AX)
  19455. MOVOU X5, 80(AX)
  19456. MOVOU X6, 96(AX)
  19457. MOVOU X7, 112(AX)
  19458. MOVOU X8, -128(AX)(DX*1)
  19459. MOVOU X9, -112(AX)(DX*1)
  19460. MOVOU X10, -96(AX)(DX*1)
  19461. MOVOU X11, -80(AX)(DX*1)
  19462. MOVOU X12, -64(AX)(DX*1)
  19463. MOVOU X13, -48(AX)(DX*1)
  19464. MOVOU X14, -32(AX)(DX*1)
  19465. MOVOU X15, -16(AX)(DX*1)
  19466. JMP emit_literal_end_standalone
  19467. emit_lit_memmove_standalone_memmove_move_256through2048:
  19468. LEAQ -256(DX), DX
  19469. MOVOU (CX), X0
  19470. MOVOU 16(CX), X1
  19471. MOVOU 32(CX), X2
  19472. MOVOU 48(CX), X3
  19473. MOVOU 64(CX), X4
  19474. MOVOU 80(CX), X5
  19475. MOVOU 96(CX), X6
  19476. MOVOU 112(CX), X7
  19477. MOVOU 128(CX), X8
  19478. MOVOU 144(CX), X9
  19479. MOVOU 160(CX), X10
  19480. MOVOU 176(CX), X11
  19481. MOVOU 192(CX), X12
  19482. MOVOU 208(CX), X13
  19483. MOVOU 224(CX), X14
  19484. MOVOU 240(CX), X15
  19485. MOVOU X0, (AX)
  19486. MOVOU X1, 16(AX)
  19487. MOVOU X2, 32(AX)
  19488. MOVOU X3, 48(AX)
  19489. MOVOU X4, 64(AX)
  19490. MOVOU X5, 80(AX)
  19491. MOVOU X6, 96(AX)
  19492. MOVOU X7, 112(AX)
  19493. MOVOU X8, 128(AX)
  19494. MOVOU X9, 144(AX)
  19495. MOVOU X10, 160(AX)
  19496. MOVOU X11, 176(AX)
  19497. MOVOU X12, 192(AX)
  19498. MOVOU X13, 208(AX)
  19499. MOVOU X14, 224(AX)
  19500. MOVOU X15, 240(AX)
  19501. CMPQ DX, $0x00000100
  19502. LEAQ 256(CX), CX
  19503. LEAQ 256(AX), AX
  19504. JGE emit_lit_memmove_standalone_memmove_move_256through2048
  19505. JMP emit_lit_memmove_standalone_memmove_tail
  19506. emit_literal_end_standalone:
  19507. MOVQ BX, ret+48(FP)
  19508. RET
  19509. // func emitLiteralAvx(dst []byte, lit []byte) int
  19510. // Requires: AVX, SSE2
  19511. TEXT ·emitLiteralAvx(SB), NOSPLIT, $0-56
  19512. MOVQ dst_base+0(FP), AX
  19513. MOVQ lit_base+24(FP), CX
  19514. MOVQ lit_len+32(FP), DX
  19515. MOVL DX, BX
  19516. MOVL DX, BP
  19517. SUBL $0x01, BP
  19518. JC emit_literal_end_avx_standalone
  19519. CMPL BP, $0x3c
  19520. JLT one_byte_standalone
  19521. CMPL BP, $0x00000100
  19522. JLT two_bytes_standalone
  19523. CMPL BP, $0x00010000
  19524. JLT three_bytes_standalone
  19525. CMPL BP, $0x01000000
  19526. JLT four_bytes_standalone
  19527. MOVB $0xfc, (AX)
  19528. MOVL BP, 1(AX)
  19529. ADDQ $0x05, BX
  19530. ADDQ $0x05, AX
  19531. JMP memmove_standalone
  19532. four_bytes_standalone:
  19533. MOVL BP, SI
  19534. SHRL $0x10, SI
  19535. MOVB $0xf8, (AX)
  19536. MOVW BP, 1(AX)
  19537. MOVB SI, 3(AX)
  19538. ADDQ $0x04, BX
  19539. ADDQ $0x04, AX
  19540. JMP memmove_standalone
  19541. three_bytes_standalone:
  19542. MOVB $0xf4, (AX)
  19543. MOVW BP, 1(AX)
  19544. ADDQ $0x03, BX
  19545. ADDQ $0x03, AX
  19546. JMP memmove_standalone
  19547. two_bytes_standalone:
  19548. MOVB $0xf0, (AX)
  19549. MOVB BP, 1(AX)
  19550. ADDQ $0x02, BX
  19551. ADDQ $0x02, AX
  19552. JMP memmove_standalone
  19553. one_byte_standalone:
  19554. SHLB $0x02, BP
  19555. MOVB BP, (AX)
  19556. ADDQ $0x01, BX
  19557. ADDQ $0x01, AX
  19558. memmove_standalone:
  19559. NOP
  19560. emit_lit_memmove_standalone_memmove_tail:
  19561. TESTQ DX, DX
  19562. JEQ emit_literal_end_avx_standalone
  19563. CMPQ DX, $0x02
  19564. JBE emit_lit_memmove_standalone_memmove_move_1or2
  19565. CMPQ DX, $0x04
  19566. JB emit_lit_memmove_standalone_memmove_move_3
  19567. JBE emit_lit_memmove_standalone_memmove_move_4
  19568. CMPQ DX, $0x08
  19569. JB emit_lit_memmove_standalone_memmove_move_5through7
  19570. JE emit_lit_memmove_standalone_memmove_move_8
  19571. CMPQ DX, $0x10
  19572. JBE emit_lit_memmove_standalone_memmove_move_9through16
  19573. CMPQ DX, $0x20
  19574. JBE emit_lit_memmove_standalone_memmove_move_17through32
  19575. CMPQ DX, $0x40
  19576. JBE emit_lit_memmove_standalone_memmove_move_33through64
  19577. CMPQ DX, $0x80
  19578. JBE emit_lit_memmove_standalone_memmove_move_65through128
  19579. CMPQ DX, $0x00000100
  19580. JBE emit_lit_memmove_standalone_memmove_move_129through256
  19581. JMP emit_lit_memmove_standalone_memmove_avxUnaligned
  19582. emit_lit_memmove_standalone_memmove_move_1or2:
  19583. MOVB (CX), BP
  19584. MOVB -1(CX)(DX*1), SI
  19585. MOVB BP, (AX)
  19586. MOVB SI, -1(AX)(DX*1)
  19587. JMP emit_literal_end_avx_standalone
  19588. emit_lit_memmove_standalone_memmove_move_4:
  19589. MOVL (CX), BP
  19590. MOVL BP, (AX)
  19591. JMP emit_literal_end_avx_standalone
  19592. emit_lit_memmove_standalone_memmove_move_3:
  19593. MOVW (CX), BP
  19594. MOVB 2(CX), SI
  19595. MOVW BP, (AX)
  19596. MOVB SI, 2(AX)
  19597. JMP emit_literal_end_avx_standalone
  19598. emit_lit_memmove_standalone_memmove_move_5through7:
  19599. MOVL (CX), BP
  19600. MOVL -4(CX)(DX*1), SI
  19601. MOVL BP, (AX)
  19602. MOVL SI, -4(AX)(DX*1)
  19603. JMP emit_literal_end_avx_standalone
  19604. emit_lit_memmove_standalone_memmove_move_8:
  19605. MOVQ (CX), BP
  19606. MOVQ BP, (AX)
  19607. JMP emit_literal_end_avx_standalone
  19608. emit_lit_memmove_standalone_memmove_move_9through16:
  19609. MOVQ (CX), BP
  19610. MOVQ -8(CX)(DX*1), SI
  19611. MOVQ BP, (AX)
  19612. MOVQ SI, -8(AX)(DX*1)
  19613. JMP emit_literal_end_avx_standalone
  19614. emit_lit_memmove_standalone_memmove_move_17through32:
  19615. MOVOU (CX), X0
  19616. MOVOU -16(CX)(DX*1), X1
  19617. MOVOU X0, (AX)
  19618. MOVOU X1, -16(AX)(DX*1)
  19619. JMP emit_literal_end_avx_standalone
  19620. emit_lit_memmove_standalone_memmove_move_33through64:
  19621. MOVOU (CX), X0
  19622. MOVOU 16(CX), X1
  19623. MOVOU -32(CX)(DX*1), X2
  19624. MOVOU -16(CX)(DX*1), X3
  19625. MOVOU X0, (AX)
  19626. MOVOU X1, 16(AX)
  19627. MOVOU X2, -32(AX)(DX*1)
  19628. MOVOU X3, -16(AX)(DX*1)
  19629. JMP emit_literal_end_avx_standalone
  19630. emit_lit_memmove_standalone_memmove_move_65through128:
  19631. MOVOU (CX), X0
  19632. MOVOU 16(CX), X1
  19633. MOVOU 32(CX), X2
  19634. MOVOU 48(CX), X3
  19635. MOVOU -64(CX)(DX*1), X12
  19636. MOVOU -48(CX)(DX*1), X13
  19637. MOVOU -32(CX)(DX*1), X14
  19638. MOVOU -16(CX)(DX*1), X15
  19639. MOVOU X0, (AX)
  19640. MOVOU X1, 16(AX)
  19641. MOVOU X2, 32(AX)
  19642. MOVOU X3, 48(AX)
  19643. MOVOU X12, -64(AX)(DX*1)
  19644. MOVOU X13, -48(AX)(DX*1)
  19645. MOVOU X14, -32(AX)(DX*1)
  19646. MOVOU X15, -16(AX)(DX*1)
  19647. JMP emit_literal_end_avx_standalone
  19648. emit_lit_memmove_standalone_memmove_move_129through256:
  19649. MOVOU (CX), X0
  19650. MOVOU 16(CX), X1
  19651. MOVOU 32(CX), X2
  19652. MOVOU 48(CX), X3
  19653. MOVOU 64(CX), X4
  19654. MOVOU 80(CX), X5
  19655. MOVOU 96(CX), X6
  19656. MOVOU 112(CX), X7
  19657. MOVOU -128(CX)(DX*1), X8
  19658. MOVOU -112(CX)(DX*1), X9
  19659. MOVOU -96(CX)(DX*1), X10
  19660. MOVOU -80(CX)(DX*1), X11
  19661. MOVOU -64(CX)(DX*1), X12
  19662. MOVOU -48(CX)(DX*1), X13
  19663. MOVOU -32(CX)(DX*1), X14
  19664. MOVOU -16(CX)(DX*1), X15
  19665. MOVOU X0, (AX)
  19666. MOVOU X1, 16(AX)
  19667. MOVOU X2, 32(AX)
  19668. MOVOU X3, 48(AX)
  19669. MOVOU X4, 64(AX)
  19670. MOVOU X5, 80(AX)
  19671. MOVOU X6, 96(AX)
  19672. MOVOU X7, 112(AX)
  19673. MOVOU X8, -128(AX)(DX*1)
  19674. MOVOU X9, -112(AX)(DX*1)
  19675. MOVOU X10, -96(AX)(DX*1)
  19676. MOVOU X11, -80(AX)(DX*1)
  19677. MOVOU X12, -64(AX)(DX*1)
  19678. MOVOU X13, -48(AX)(DX*1)
  19679. MOVOU X14, -32(AX)(DX*1)
  19680. MOVOU X15, -16(AX)(DX*1)
  19681. JMP emit_literal_end_avx_standalone
  19682. emit_lit_memmove_standalone_memmove_move_256through2048:
  19683. LEAQ -256(DX), DX
  19684. MOVOU (CX), X0
  19685. MOVOU 16(CX), X1
  19686. MOVOU 32(CX), X2
  19687. MOVOU 48(CX), X3
  19688. MOVOU 64(CX), X4
  19689. MOVOU 80(CX), X5
  19690. MOVOU 96(CX), X6
  19691. MOVOU 112(CX), X7
  19692. MOVOU 128(CX), X8
  19693. MOVOU 144(CX), X9
  19694. MOVOU 160(CX), X10
  19695. MOVOU 176(CX), X11
  19696. MOVOU 192(CX), X12
  19697. MOVOU 208(CX), X13
  19698. MOVOU 224(CX), X14
  19699. MOVOU 240(CX), X15
  19700. MOVOU X0, (AX)
  19701. MOVOU X1, 16(AX)
  19702. MOVOU X2, 32(AX)
  19703. MOVOU X3, 48(AX)
  19704. MOVOU X4, 64(AX)
  19705. MOVOU X5, 80(AX)
  19706. MOVOU X6, 96(AX)
  19707. MOVOU X7, 112(AX)
  19708. MOVOU X8, 128(AX)
  19709. MOVOU X9, 144(AX)
  19710. MOVOU X10, 160(AX)
  19711. MOVOU X11, 176(AX)
  19712. MOVOU X12, 192(AX)
  19713. MOVOU X13, 208(AX)
  19714. MOVOU X14, 224(AX)
  19715. MOVOU X15, 240(AX)
  19716. CMPQ DX, $0x00000100
  19717. LEAQ 256(CX), CX
  19718. LEAQ 256(AX), AX
  19719. JGE emit_lit_memmove_standalone_memmove_move_256through2048
  19720. JMP emit_lit_memmove_standalone_memmove_tail
  19721. emit_lit_memmove_standalone_memmove_avxUnaligned:
  19722. LEAQ (CX)(DX*1), SI
  19723. MOVQ AX, R8
  19724. MOVOU -128(SI), X5
  19725. MOVOU -112(SI), X6
  19726. MOVQ $0x00000080, BP
  19727. ANDQ $0xffffffe0, AX
  19728. ADDQ $0x20, AX
  19729. MOVOU -96(SI), X7
  19730. MOVOU -80(SI), X8
  19731. MOVQ AX, DI
  19732. SUBQ R8, DI
  19733. MOVOU -64(SI), X9
  19734. MOVOU -48(SI), X10
  19735. SUBQ DI, DX
  19736. MOVOU -32(SI), X11
  19737. MOVOU -16(SI), X12
  19738. VMOVDQU (CX), Y4
  19739. ADDQ DI, CX
  19740. SUBQ BP, DX
  19741. emit_lit_memmove_standalone_memmove_gobble_128_loop:
  19742. VMOVDQU (CX), Y0
  19743. VMOVDQU 32(CX), Y1
  19744. VMOVDQU 64(CX), Y2
  19745. VMOVDQU 96(CX), Y3
  19746. ADDQ BP, CX
  19747. VMOVDQA Y0, (AX)
  19748. VMOVDQA Y1, 32(AX)
  19749. VMOVDQA Y2, 64(AX)
  19750. VMOVDQA Y3, 96(AX)
  19751. ADDQ BP, AX
  19752. SUBQ BP, DX
  19753. JA emit_lit_memmove_standalone_memmove_gobble_128_loop
  19754. ADDQ BP, DX
  19755. ADDQ AX, DX
  19756. VMOVDQU Y4, (R8)
  19757. VZEROUPPER
  19758. MOVOU X5, -128(DX)
  19759. MOVOU X6, -112(DX)
  19760. MOVOU X7, -96(DX)
  19761. MOVOU X8, -80(DX)
  19762. MOVOU X9, -64(DX)
  19763. MOVOU X10, -48(DX)
  19764. MOVOU X11, -32(DX)
  19765. MOVOU X12, -16(DX)
  19766. emit_literal_end_avx_standalone:
  19767. MOVQ BX, ret+48(FP)
  19768. RET
  19769. // func emitRepeat(dst []byte, offset int, length int) int
  19770. TEXT ·emitRepeat(SB), NOSPLIT, $0-48
  19771. XORQ BX, BX
  19772. MOVQ dst_base+0(FP), AX
  19773. MOVQ offset+24(FP), CX
  19774. MOVQ length+32(FP), DX
  19775. emit_repeat_again_standalone:
  19776. MOVL DX, BP
  19777. LEAL -4(DX), DX
  19778. CMPL BP, $0x08
  19779. JLE repeat_two_standalone
  19780. CMPL BP, $0x0c
  19781. JGE cant_repeat_two_offset_standalone
  19782. CMPL CX, $0x00000800
  19783. JLT repeat_two_offset_standalone
  19784. cant_repeat_two_offset_standalone:
  19785. CMPL DX, $0x00000104
  19786. JLT repeat_three_standalone
  19787. CMPL DX, $0x00010100
  19788. JLT repeat_four_standalone
  19789. CMPL DX, $0x0100ffff
  19790. JLT repeat_five_standalone
  19791. LEAL -16842747(DX), DX
  19792. MOVW $0x001d, (AX)
  19793. MOVW $0xfffb, 2(AX)
  19794. MOVB $0xff, 4(AX)
  19795. ADDQ $0x05, AX
  19796. ADDQ $0x05, BX
  19797. JMP emit_repeat_again_standalone
  19798. repeat_five_standalone:
  19799. LEAL -65536(DX), DX
  19800. MOVL DX, CX
  19801. MOVW $0x001d, (AX)
  19802. MOVW DX, 2(AX)
  19803. SARL $0x10, CX
  19804. MOVB CL, 4(AX)
  19805. ADDQ $0x05, BX
  19806. ADDQ $0x05, AX
  19807. JMP gen_emit_repeat_end
  19808. repeat_four_standalone:
  19809. LEAL -256(DX), DX
  19810. MOVW $0x0019, (AX)
  19811. MOVW DX, 2(AX)
  19812. ADDQ $0x04, BX
  19813. ADDQ $0x04, AX
  19814. JMP gen_emit_repeat_end
  19815. repeat_three_standalone:
  19816. LEAL -4(DX), DX
  19817. MOVW $0x0015, (AX)
  19818. MOVB DL, 2(AX)
  19819. ADDQ $0x03, BX
  19820. ADDQ $0x03, AX
  19821. JMP gen_emit_repeat_end
  19822. repeat_two_standalone:
  19823. SHLL $0x02, DX
  19824. ORL $0x01, DX
  19825. MOVW DX, (AX)
  19826. ADDQ $0x02, BX
  19827. ADDQ $0x02, AX
  19828. JMP gen_emit_repeat_end
  19829. repeat_two_offset_standalone:
  19830. XORQ BP, BP
  19831. LEAL 1(BP)(DX*4), DX
  19832. MOVB CL, 1(AX)
  19833. SARL $0x08, CX
  19834. SHLL $0x05, CX
  19835. ORL CX, DX
  19836. MOVB DL, (AX)
  19837. ADDQ $0x02, BX
  19838. ADDQ $0x02, AX
  19839. gen_emit_repeat_end:
  19840. MOVQ BX, ret+40(FP)
  19841. RET
  19842. // func emitCopy(dst []byte, offset int, length int) int
  19843. TEXT ·emitCopy(SB), NOSPLIT, $0-48
  19844. XORQ BX, BX
  19845. MOVQ dst_base+0(FP), AX
  19846. MOVQ offset+24(FP), CX
  19847. MOVQ length+32(FP), DX
  19848. CMPL CX, $0x00010000
  19849. JL two_byte_offset_standalone
  19850. four_bytes_loop_back_standalone:
  19851. CMPL DX, $0x40
  19852. JLE four_bytes_remain_standalone
  19853. MOVB $0xff, (AX)
  19854. MOVL CX, 1(AX)
  19855. LEAL -64(DX), DX
  19856. ADDQ $0x05, BX
  19857. ADDQ $0x05, AX
  19858. CMPL DX, $0x04
  19859. JL four_bytes_remain_standalone
  19860. emit_repeat_again_standalone_emit_copy:
  19861. MOVL DX, BP
  19862. LEAL -4(DX), DX
  19863. CMPL BP, $0x08
  19864. JLE repeat_two_standalone_emit_copy
  19865. CMPL BP, $0x0c
  19866. JGE cant_repeat_two_offset_standalone_emit_copy
  19867. CMPL CX, $0x00000800
  19868. JLT repeat_two_offset_standalone_emit_copy
  19869. cant_repeat_two_offset_standalone_emit_copy:
  19870. CMPL DX, $0x00000104
  19871. JLT repeat_three_standalone_emit_copy
  19872. CMPL DX, $0x00010100
  19873. JLT repeat_four_standalone_emit_copy
  19874. CMPL DX, $0x0100ffff
  19875. JLT repeat_five_standalone_emit_copy
  19876. LEAL -16842747(DX), DX
  19877. MOVW $0x001d, (AX)
  19878. MOVW $0xfffb, 2(AX)
  19879. MOVB $0xff, 4(AX)
  19880. ADDQ $0x05, AX
  19881. ADDQ $0x05, BX
  19882. JMP emit_repeat_again_standalone_emit_copy
  19883. repeat_five_standalone_emit_copy:
  19884. LEAL -65536(DX), DX
  19885. MOVL DX, CX
  19886. MOVW $0x001d, (AX)
  19887. MOVW DX, 2(AX)
  19888. SARL $0x10, CX
  19889. MOVB CL, 4(AX)
  19890. ADDQ $0x05, BX
  19891. ADDQ $0x05, AX
  19892. JMP gen_emit_copy_end
  19893. repeat_four_standalone_emit_copy:
  19894. LEAL -256(DX), DX
  19895. MOVW $0x0019, (AX)
  19896. MOVW DX, 2(AX)
  19897. ADDQ $0x04, BX
  19898. ADDQ $0x04, AX
  19899. JMP gen_emit_copy_end
  19900. repeat_three_standalone_emit_copy:
  19901. LEAL -4(DX), DX
  19902. MOVW $0x0015, (AX)
  19903. MOVB DL, 2(AX)
  19904. ADDQ $0x03, BX
  19905. ADDQ $0x03, AX
  19906. JMP gen_emit_copy_end
  19907. repeat_two_standalone_emit_copy:
  19908. SHLL $0x02, DX
  19909. ORL $0x01, DX
  19910. MOVW DX, (AX)
  19911. ADDQ $0x02, BX
  19912. ADDQ $0x02, AX
  19913. JMP gen_emit_copy_end
  19914. repeat_two_offset_standalone_emit_copy:
  19915. XORQ BP, BP
  19916. LEAL 1(BP)(DX*4), DX
  19917. MOVB CL, 1(AX)
  19918. SARL $0x08, CX
  19919. SHLL $0x05, CX
  19920. ORL CX, DX
  19921. MOVB DL, (AX)
  19922. ADDQ $0x02, BX
  19923. ADDQ $0x02, AX
  19924. JMP gen_emit_copy_end
  19925. JMP four_bytes_loop_back_standalone
  19926. four_bytes_remain_standalone:
  19927. TESTL DX, DX
  19928. JZ gen_emit_copy_end
  19929. MOVB $0x03, BP
  19930. LEAL -4(BP)(DX*4), DX
  19931. MOVB DL, (AX)
  19932. MOVL CX, 1(AX)
  19933. ADDQ $0x05, BX
  19934. ADDQ $0x05, AX
  19935. JMP gen_emit_copy_end
  19936. two_byte_offset_standalone:
  19937. CMPL DX, $0x40
  19938. JLE two_byte_offset_short_standalone
  19939. MOVB $0xee, (AX)
  19940. MOVW CX, 1(AX)
  19941. LEAL -60(DX), DX
  19942. ADDQ $0x03, AX
  19943. ADDQ $0x03, BX
  19944. emit_repeat_again_standalone_emit_copy_short:
  19945. MOVL DX, BP
  19946. LEAL -4(DX), DX
  19947. CMPL BP, $0x08
  19948. JLE repeat_two_standalone_emit_copy_short
  19949. CMPL BP, $0x0c
  19950. JGE cant_repeat_two_offset_standalone_emit_copy_short
  19951. CMPL CX, $0x00000800
  19952. JLT repeat_two_offset_standalone_emit_copy_short
  19953. cant_repeat_two_offset_standalone_emit_copy_short:
  19954. CMPL DX, $0x00000104
  19955. JLT repeat_three_standalone_emit_copy_short
  19956. CMPL DX, $0x00010100
  19957. JLT repeat_four_standalone_emit_copy_short
  19958. CMPL DX, $0x0100ffff
  19959. JLT repeat_five_standalone_emit_copy_short
  19960. LEAL -16842747(DX), DX
  19961. MOVW $0x001d, (AX)
  19962. MOVW $0xfffb, 2(AX)
  19963. MOVB $0xff, 4(AX)
  19964. ADDQ $0x05, AX
  19965. ADDQ $0x05, BX
  19966. JMP emit_repeat_again_standalone_emit_copy_short
  19967. repeat_five_standalone_emit_copy_short:
  19968. LEAL -65536(DX), DX
  19969. MOVL DX, CX
  19970. MOVW $0x001d, (AX)
  19971. MOVW DX, 2(AX)
  19972. SARL $0x10, CX
  19973. MOVB CL, 4(AX)
  19974. ADDQ $0x05, BX
  19975. ADDQ $0x05, AX
  19976. JMP gen_emit_copy_end
  19977. repeat_four_standalone_emit_copy_short:
  19978. LEAL -256(DX), DX
  19979. MOVW $0x0019, (AX)
  19980. MOVW DX, 2(AX)
  19981. ADDQ $0x04, BX
  19982. ADDQ $0x04, AX
  19983. JMP gen_emit_copy_end
  19984. repeat_three_standalone_emit_copy_short:
  19985. LEAL -4(DX), DX
  19986. MOVW $0x0015, (AX)
  19987. MOVB DL, 2(AX)
  19988. ADDQ $0x03, BX
  19989. ADDQ $0x03, AX
  19990. JMP gen_emit_copy_end
  19991. repeat_two_standalone_emit_copy_short:
  19992. SHLL $0x02, DX
  19993. ORL $0x01, DX
  19994. MOVW DX, (AX)
  19995. ADDQ $0x02, BX
  19996. ADDQ $0x02, AX
  19997. JMP gen_emit_copy_end
  19998. repeat_two_offset_standalone_emit_copy_short:
  19999. XORQ BP, BP
  20000. LEAL 1(BP)(DX*4), DX
  20001. MOVB CL, 1(AX)
  20002. SARL $0x08, CX
  20003. SHLL $0x05, CX
  20004. ORL CX, DX
  20005. MOVB DL, (AX)
  20006. ADDQ $0x02, BX
  20007. ADDQ $0x02, AX
  20008. JMP gen_emit_copy_end
  20009. JMP two_byte_offset_standalone
  20010. two_byte_offset_short_standalone:
  20011. CMPL DX, $0x0c
  20012. JGE emit_copy_three_standalone
  20013. CMPL CX, $0x00000800
  20014. JGE emit_copy_three_standalone
  20015. MOVB $0x01, BP
  20016. LEAL -16(BP)(DX*4), DX
  20017. MOVB CL, 1(AX)
  20018. SHRL $0x08, CX
  20019. SHLL $0x05, CX
  20020. ORL CX, DX
  20021. MOVB DL, (AX)
  20022. ADDQ $0x02, BX
  20023. ADDQ $0x02, AX
  20024. JMP gen_emit_copy_end
  20025. emit_copy_three_standalone:
  20026. MOVB $0x02, BP
  20027. LEAL -4(BP)(DX*4), DX
  20028. MOVB DL, (AX)
  20029. MOVW CX, 1(AX)
  20030. ADDQ $0x03, BX
  20031. ADDQ $0x03, AX
  20032. gen_emit_copy_end:
  20033. MOVQ BX, ret+40(FP)
  20034. RET
  20035. // func emitCopyNoRepeat(dst []byte, offset int, length int) int
  20036. TEXT ·emitCopyNoRepeat(SB), NOSPLIT, $0-48
  20037. XORQ BX, BX
  20038. MOVQ dst_base+0(FP), AX
  20039. MOVQ offset+24(FP), CX
  20040. MOVQ length+32(FP), DX
  20041. CMPL CX, $0x00010000
  20042. JL two_byte_offset_standalone_snappy
  20043. four_bytes_loop_back_standalone_snappy:
  20044. CMPL DX, $0x40
  20045. JLE four_bytes_remain_standalone_snappy
  20046. MOVB $0xff, (AX)
  20047. MOVL CX, 1(AX)
  20048. LEAL -64(DX), DX
  20049. ADDQ $0x05, BX
  20050. ADDQ $0x05, AX
  20051. CMPL DX, $0x04
  20052. JL four_bytes_remain_standalone_snappy
  20053. JMP four_bytes_loop_back_standalone_snappy
  20054. four_bytes_remain_standalone_snappy:
  20055. TESTL DX, DX
  20056. JZ gen_emit_copy_end_snappy
  20057. MOVB $0x03, BP
  20058. LEAL -4(BP)(DX*4), DX
  20059. MOVB DL, (AX)
  20060. MOVL CX, 1(AX)
  20061. ADDQ $0x05, BX
  20062. ADDQ $0x05, AX
  20063. JMP gen_emit_copy_end_snappy
  20064. two_byte_offset_standalone_snappy:
  20065. CMPL DX, $0x40
  20066. JLE two_byte_offset_short_standalone_snappy
  20067. MOVB $0xee, (AX)
  20068. MOVW CX, 1(AX)
  20069. LEAL -60(DX), DX
  20070. ADDQ $0x03, AX
  20071. ADDQ $0x03, BX
  20072. JMP two_byte_offset_standalone_snappy
  20073. two_byte_offset_short_standalone_snappy:
  20074. CMPL DX, $0x0c
  20075. JGE emit_copy_three_standalone_snappy
  20076. CMPL CX, $0x00000800
  20077. JGE emit_copy_three_standalone_snappy
  20078. MOVB $0x01, BP
  20079. LEAL -16(BP)(DX*4), DX
  20080. MOVB CL, 1(AX)
  20081. SHRL $0x08, CX
  20082. SHLL $0x05, CX
  20083. ORL CX, DX
  20084. MOVB DL, (AX)
  20085. ADDQ $0x02, BX
  20086. ADDQ $0x02, AX
  20087. JMP gen_emit_copy_end_snappy
  20088. emit_copy_three_standalone_snappy:
  20089. MOVB $0x02, BP
  20090. LEAL -4(BP)(DX*4), DX
  20091. MOVB DL, (AX)
  20092. MOVW CX, 1(AX)
  20093. ADDQ $0x03, BX
  20094. ADDQ $0x03, AX
  20095. gen_emit_copy_end_snappy:
  20096. MOVQ BX, ret+40(FP)
  20097. RET
  20098. // func matchLen(a []byte, b []byte) int
  20099. TEXT ·matchLen(SB), NOSPLIT, $0-56
  20100. MOVQ a_base+0(FP), AX
  20101. MOVQ b_base+24(FP), CX
  20102. MOVQ a_len+8(FP), DX
  20103. XORL BP, BP
  20104. CMPL DX, $0x08
  20105. JL matchlen_single_standalone
  20106. matchlen_loopback_standalone:
  20107. MOVQ (AX)(BP*1), BX
  20108. XORQ (CX)(BP*1), BX
  20109. TESTQ BX, BX
  20110. JZ matchlen_loop_standalone
  20111. BSFQ BX, BX
  20112. SARQ $0x03, BX
  20113. LEAL (BP)(BX*1), BP
  20114. JMP gen_match_len_end
  20115. matchlen_loop_standalone:
  20116. LEAL -8(DX), DX
  20117. LEAL 8(BP), BP
  20118. CMPL DX, $0x08
  20119. JGE matchlen_loopback_standalone
  20120. matchlen_single_standalone:
  20121. TESTL DX, DX
  20122. JZ gen_match_len_end
  20123. matchlen_single_loopback_standalone:
  20124. MOVB (AX)(BP*1), BL
  20125. CMPB (CX)(BP*1), BL
  20126. JNE gen_match_len_end
  20127. LEAL 1(BP), BP
  20128. DECL DX
  20129. JNZ matchlen_single_loopback_standalone
  20130. gen_match_len_end:
  20131. MOVQ BP, ret+48(FP)
  20132. RET