aesp8-ppc.pl 91 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789
  1. #! /usr/bin/env perl
  2. # Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
  3. #
  4. # Licensed under the OpenSSL license (the "License"). You may not use
  5. # this file except in compliance with the License. You can obtain a copy
  6. # in the file LICENSE in the source distribution or at
  7. # https://www.openssl.org/source/license.html
  8. #
  9. # ====================================================================
  10. # Written by Andy Polyakov <[email protected]> for the OpenSSL
  11. # project. The module is, however, dual licensed under OpenSSL and
  12. # CRYPTOGAMS licenses depending on where you obtain it. For further
  13. # details see http://www.openssl.org/~appro/cryptogams/.
  14. # ====================================================================
  15. #
  16. # This module implements support for AES instructions as per PowerISA
  17. # specification version 2.07, first implemented by POWER8 processor.
  18. # The module is endian-agnostic in sense that it supports both big-
  19. # and little-endian cases. Data alignment in parallelizable modes is
  20. # handled with VSX loads and stores, which implies MSR.VSX flag being
  21. # set. It should also be noted that ISA specification doesn't prohibit
  22. # alignment exceptions for these instructions on page boundaries.
  23. # Initially alignment was handled in pure AltiVec/VMX way [when data
  24. # is aligned programmatically, which in turn guarantees exception-
  25. # free execution], but it turned to hamper performance when vcipher
  26. # instructions are interleaved. It's reckoned that eventual
  27. # misalignment penalties at page boundaries are in average lower
  28. # than additional overhead in pure AltiVec approach.
  29. #
  30. # May 2016
  31. #
  32. # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
  33. # systems were measured.
  34. #
  35. ######################################################################
  36. # Current large-block performance in cycles per byte processed with
  37. # 128-bit key (less is better).
  38. #
  39. # CBC en-/decrypt CTR XTS
  40. # POWER8[le] 3.96/0.72 0.74 1.1
  41. # POWER8[be] 3.75/0.65 0.66 1.0
  42. $flavour = shift;
  43. if ($flavour =~ /64/) {
  44. $SIZE_T =8;
  45. $LRSAVE =2*$SIZE_T;
  46. $STU ="stdu";
  47. $POP ="ld";
  48. $PUSH ="std";
  49. $UCMP ="cmpld";
  50. $SHL ="sldi";
  51. } elsif ($flavour =~ /32/) {
  52. $SIZE_T =4;
  53. $LRSAVE =$SIZE_T;
  54. $STU ="stwu";
  55. $POP ="lwz";
  56. $PUSH ="stw";
  57. $UCMP ="cmplw";
  58. $SHL ="slwi";
  59. } else { die "nonsense $flavour"; }
  60. $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
  61. $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
  62. ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
  63. ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
  64. die "can't locate ppc-xlate.pl";
  65. open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
  66. $FRAME=8*$SIZE_T;
  67. $prefix="aes_p8";
  68. $sp="r1";
  69. $vrsave="r12";
  70. #########################################################################
  71. {{{ # Key setup procedures #
  72. my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
  73. my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
  74. my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
  75. $code.=<<___;
  76. .machine "any"
  77. .text
  78. .align 7
  79. rcon:
  80. .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
  81. .long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
  82. .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
  83. .long 0,0,0,0 ?asis
  84. Lconsts:
  85. mflr r0
  86. bcl 20,31,\$+4
  87. mflr $ptr #vvvvv "distance between . and rcon
  88. addi $ptr,$ptr,-0x48
  89. mtlr r0
  90. blr
  91. .long 0
  92. .byte 0,12,0x14,0,0,0,0,0
  93. .asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
  94. .globl .${prefix}_set_encrypt_key
  95. Lset_encrypt_key:
  96. mflr r11
  97. $PUSH r11,$LRSAVE($sp)
  98. li $ptr,-1
  99. ${UCMP}i $inp,0
  100. beq- Lenc_key_abort # if ($inp==0) return -1;
  101. ${UCMP}i $out,0
  102. beq- Lenc_key_abort # if ($out==0) return -1;
  103. li $ptr,-2
  104. cmpwi $bits,128
  105. blt- Lenc_key_abort
  106. cmpwi $bits,256
  107. bgt- Lenc_key_abort
  108. andi. r0,$bits,0x3f
  109. bne- Lenc_key_abort
  110. lis r0,0xfff0
  111. mfspr $vrsave,256
  112. mtspr 256,r0
  113. bl Lconsts
  114. mtlr r11
  115. neg r9,$inp
  116. lvx $in0,0,$inp
  117. addi $inp,$inp,15 # 15 is not typo
  118. lvsr $key,0,r9 # borrow $key
  119. li r8,0x20
  120. cmpwi $bits,192
  121. lvx $in1,0,$inp
  122. le?vspltisb $mask,0x0f # borrow $mask
  123. lvx $rcon,0,$ptr
  124. le?vxor $key,$key,$mask # adjust for byte swap
  125. lvx $mask,r8,$ptr
  126. addi $ptr,$ptr,0x10
  127. vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
  128. li $cnt,8
  129. vxor $zero,$zero,$zero
  130. mtctr $cnt
  131. ?lvsr $outperm,0,$out
  132. vspltisb $outmask,-1
  133. lvx $outhead,0,$out
  134. ?vperm $outmask,$zero,$outmask,$outperm
  135. blt Loop128
  136. addi $inp,$inp,8
  137. beq L192
  138. addi $inp,$inp,8
  139. b L256
  140. .align 4
  141. Loop128:
  142. vperm $key,$in0,$in0,$mask # rotate-n-splat
  143. vsldoi $tmp,$zero,$in0,12 # >>32
  144. vperm $outtail,$in0,$in0,$outperm # rotate
  145. vsel $stage,$outhead,$outtail,$outmask
  146. vmr $outhead,$outtail
  147. vcipherlast $key,$key,$rcon
  148. stvx $stage,0,$out
  149. addi $out,$out,16
  150. vxor $in0,$in0,$tmp
  151. vsldoi $tmp,$zero,$tmp,12 # >>32
  152. vxor $in0,$in0,$tmp
  153. vsldoi $tmp,$zero,$tmp,12 # >>32
  154. vxor $in0,$in0,$tmp
  155. vadduwm $rcon,$rcon,$rcon
  156. vxor $in0,$in0,$key
  157. bdnz Loop128
  158. lvx $rcon,0,$ptr # last two round keys
  159. vperm $key,$in0,$in0,$mask # rotate-n-splat
  160. vsldoi $tmp,$zero,$in0,12 # >>32
  161. vperm $outtail,$in0,$in0,$outperm # rotate
  162. vsel $stage,$outhead,$outtail,$outmask
  163. vmr $outhead,$outtail
  164. vcipherlast $key,$key,$rcon
  165. stvx $stage,0,$out
  166. addi $out,$out,16
  167. vxor $in0,$in0,$tmp
  168. vsldoi $tmp,$zero,$tmp,12 # >>32
  169. vxor $in0,$in0,$tmp
  170. vsldoi $tmp,$zero,$tmp,12 # >>32
  171. vxor $in0,$in0,$tmp
  172. vadduwm $rcon,$rcon,$rcon
  173. vxor $in0,$in0,$key
  174. vperm $key,$in0,$in0,$mask # rotate-n-splat
  175. vsldoi $tmp,$zero,$in0,12 # >>32
  176. vperm $outtail,$in0,$in0,$outperm # rotate
  177. vsel $stage,$outhead,$outtail,$outmask
  178. vmr $outhead,$outtail
  179. vcipherlast $key,$key,$rcon
  180. stvx $stage,0,$out
  181. addi $out,$out,16
  182. vxor $in0,$in0,$tmp
  183. vsldoi $tmp,$zero,$tmp,12 # >>32
  184. vxor $in0,$in0,$tmp
  185. vsldoi $tmp,$zero,$tmp,12 # >>32
  186. vxor $in0,$in0,$tmp
  187. vxor $in0,$in0,$key
  188. vperm $outtail,$in0,$in0,$outperm # rotate
  189. vsel $stage,$outhead,$outtail,$outmask
  190. vmr $outhead,$outtail
  191. stvx $stage,0,$out
  192. addi $inp,$out,15 # 15 is not typo
  193. addi $out,$out,0x50
  194. li $rounds,10
  195. b Ldone
  196. .align 4
  197. L192:
  198. lvx $tmp,0,$inp
  199. li $cnt,4
  200. vperm $outtail,$in0,$in0,$outperm # rotate
  201. vsel $stage,$outhead,$outtail,$outmask
  202. vmr $outhead,$outtail
  203. stvx $stage,0,$out
  204. addi $out,$out,16
  205. vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
  206. vspltisb $key,8 # borrow $key
  207. mtctr $cnt
  208. vsububm $mask,$mask,$key # adjust the mask
  209. Loop192:
  210. vperm $key,$in1,$in1,$mask # roate-n-splat
  211. vsldoi $tmp,$zero,$in0,12 # >>32
  212. vcipherlast $key,$key,$rcon
  213. vxor $in0,$in0,$tmp
  214. vsldoi $tmp,$zero,$tmp,12 # >>32
  215. vxor $in0,$in0,$tmp
  216. vsldoi $tmp,$zero,$tmp,12 # >>32
  217. vxor $in0,$in0,$tmp
  218. vsldoi $stage,$zero,$in1,8
  219. vspltw $tmp,$in0,3
  220. vxor $tmp,$tmp,$in1
  221. vsldoi $in1,$zero,$in1,12 # >>32
  222. vadduwm $rcon,$rcon,$rcon
  223. vxor $in1,$in1,$tmp
  224. vxor $in0,$in0,$key
  225. vxor $in1,$in1,$key
  226. vsldoi $stage,$stage,$in0,8
  227. vperm $key,$in1,$in1,$mask # rotate-n-splat
  228. vsldoi $tmp,$zero,$in0,12 # >>32
  229. vperm $outtail,$stage,$stage,$outperm # rotate
  230. vsel $stage,$outhead,$outtail,$outmask
  231. vmr $outhead,$outtail
  232. vcipherlast $key,$key,$rcon
  233. stvx $stage,0,$out
  234. addi $out,$out,16
  235. vsldoi $stage,$in0,$in1,8
  236. vxor $in0,$in0,$tmp
  237. vsldoi $tmp,$zero,$tmp,12 # >>32
  238. vperm $outtail,$stage,$stage,$outperm # rotate
  239. vsel $stage,$outhead,$outtail,$outmask
  240. vmr $outhead,$outtail
  241. vxor $in0,$in0,$tmp
  242. vsldoi $tmp,$zero,$tmp,12 # >>32
  243. vxor $in0,$in0,$tmp
  244. stvx $stage,0,$out
  245. addi $out,$out,16
  246. vspltw $tmp,$in0,3
  247. vxor $tmp,$tmp,$in1
  248. vsldoi $in1,$zero,$in1,12 # >>32
  249. vadduwm $rcon,$rcon,$rcon
  250. vxor $in1,$in1,$tmp
  251. vxor $in0,$in0,$key
  252. vxor $in1,$in1,$key
  253. vperm $outtail,$in0,$in0,$outperm # rotate
  254. vsel $stage,$outhead,$outtail,$outmask
  255. vmr $outhead,$outtail
  256. stvx $stage,0,$out
  257. addi $inp,$out,15 # 15 is not typo
  258. addi $out,$out,16
  259. bdnz Loop192
  260. li $rounds,12
  261. addi $out,$out,0x20
  262. b Ldone
  263. .align 4
  264. L256:
  265. lvx $tmp,0,$inp
  266. li $cnt,7
  267. li $rounds,14
  268. vperm $outtail,$in0,$in0,$outperm # rotate
  269. vsel $stage,$outhead,$outtail,$outmask
  270. vmr $outhead,$outtail
  271. stvx $stage,0,$out
  272. addi $out,$out,16
  273. vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
  274. mtctr $cnt
  275. Loop256:
  276. vperm $key,$in1,$in1,$mask # rotate-n-splat
  277. vsldoi $tmp,$zero,$in0,12 # >>32
  278. vperm $outtail,$in1,$in1,$outperm # rotate
  279. vsel $stage,$outhead,$outtail,$outmask
  280. vmr $outhead,$outtail
  281. vcipherlast $key,$key,$rcon
  282. stvx $stage,0,$out
  283. addi $out,$out,16
  284. vxor $in0,$in0,$tmp
  285. vsldoi $tmp,$zero,$tmp,12 # >>32
  286. vxor $in0,$in0,$tmp
  287. vsldoi $tmp,$zero,$tmp,12 # >>32
  288. vxor $in0,$in0,$tmp
  289. vadduwm $rcon,$rcon,$rcon
  290. vxor $in0,$in0,$key
  291. vperm $outtail,$in0,$in0,$outperm # rotate
  292. vsel $stage,$outhead,$outtail,$outmask
  293. vmr $outhead,$outtail
  294. stvx $stage,0,$out
  295. addi $inp,$out,15 # 15 is not typo
  296. addi $out,$out,16
  297. bdz Ldone
  298. vspltw $key,$in0,3 # just splat
  299. vsldoi $tmp,$zero,$in1,12 # >>32
  300. vsbox $key,$key
  301. vxor $in1,$in1,$tmp
  302. vsldoi $tmp,$zero,$tmp,12 # >>32
  303. vxor $in1,$in1,$tmp
  304. vsldoi $tmp,$zero,$tmp,12 # >>32
  305. vxor $in1,$in1,$tmp
  306. vxor $in1,$in1,$key
  307. b Loop256
  308. .align 4
  309. Ldone:
  310. lvx $in1,0,$inp # redundant in aligned case
  311. vsel $in1,$outhead,$in1,$outmask
  312. stvx $in1,0,$inp
  313. li $ptr,0
  314. mtspr 256,$vrsave
  315. stw $rounds,0($out)
  316. Lenc_key_abort:
  317. mr r3,$ptr
  318. blr
  319. .long 0
  320. .byte 0,12,0x14,1,0,0,3,0
  321. .long 0
  322. .size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
  323. .globl .${prefix}_set_decrypt_key
  324. $STU $sp,-$FRAME($sp)
  325. mflr r10
  326. $PUSH r10,$FRAME+$LRSAVE($sp)
  327. bl Lset_encrypt_key
  328. mtlr r10
  329. cmpwi r3,0
  330. bne- Ldec_key_abort
  331. slwi $cnt,$rounds,4
  332. subi $inp,$out,240 # first round key
  333. srwi $rounds,$rounds,1
  334. add $out,$inp,$cnt # last round key
  335. mtctr $rounds
  336. Ldeckey:
  337. lwz r0, 0($inp)
  338. lwz r6, 4($inp)
  339. lwz r7, 8($inp)
  340. lwz r8, 12($inp)
  341. addi $inp,$inp,16
  342. lwz r9, 0($out)
  343. lwz r10,4($out)
  344. lwz r11,8($out)
  345. lwz r12,12($out)
  346. stw r0, 0($out)
  347. stw r6, 4($out)
  348. stw r7, 8($out)
  349. stw r8, 12($out)
  350. subi $out,$out,16
  351. stw r9, -16($inp)
  352. stw r10,-12($inp)
  353. stw r11,-8($inp)
  354. stw r12,-4($inp)
  355. bdnz Ldeckey
  356. xor r3,r3,r3 # return value
  357. Ldec_key_abort:
  358. addi $sp,$sp,$FRAME
  359. blr
  360. .long 0
  361. .byte 0,12,4,1,0x80,0,3,0
  362. .long 0
  363. .size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
  364. ___
  365. }}}
  366. #########################################################################
  367. {{{ # Single block en- and decrypt procedures #
  368. sub gen_block () {
  369. my $dir = shift;
  370. my $n = $dir eq "de" ? "n" : "";
  371. my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
  372. $code.=<<___;
  373. .globl .${prefix}_${dir}crypt
  374. lwz $rounds,240($key)
  375. lis r0,0xfc00
  376. mfspr $vrsave,256
  377. li $idx,15 # 15 is not typo
  378. mtspr 256,r0
  379. lvx v0,0,$inp
  380. neg r11,$out
  381. lvx v1,$idx,$inp
  382. lvsl v2,0,$inp # inpperm
  383. le?vspltisb v4,0x0f
  384. ?lvsl v3,0,r11 # outperm
  385. le?vxor v2,v2,v4
  386. li $idx,16
  387. vperm v0,v0,v1,v2 # align [and byte swap in LE]
  388. lvx v1,0,$key
  389. ?lvsl v5,0,$key # keyperm
  390. srwi $rounds,$rounds,1
  391. lvx v2,$idx,$key
  392. addi $idx,$idx,16
  393. subi $rounds,$rounds,1
  394. ?vperm v1,v1,v2,v5 # align round key
  395. vxor v0,v0,v1
  396. lvx v1,$idx,$key
  397. addi $idx,$idx,16
  398. mtctr $rounds
  399. Loop_${dir}c:
  400. ?vperm v2,v2,v1,v5
  401. v${n}cipher v0,v0,v2
  402. lvx v2,$idx,$key
  403. addi $idx,$idx,16
  404. ?vperm v1,v1,v2,v5
  405. v${n}cipher v0,v0,v1
  406. lvx v1,$idx,$key
  407. addi $idx,$idx,16
  408. bdnz Loop_${dir}c
  409. ?vperm v2,v2,v1,v5
  410. v${n}cipher v0,v0,v2
  411. lvx v2,$idx,$key
  412. ?vperm v1,v1,v2,v5
  413. v${n}cipherlast v0,v0,v1
  414. vspltisb v2,-1
  415. vxor v1,v1,v1
  416. li $idx,15 # 15 is not typo
  417. ?vperm v2,v1,v2,v3 # outmask
  418. le?vxor v3,v3,v4
  419. lvx v1,0,$out # outhead
  420. vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
  421. vsel v1,v1,v0,v2
  422. lvx v4,$idx,$out
  423. stvx v1,0,$out
  424. vsel v0,v0,v4,v2
  425. stvx v0,$idx,$out
  426. mtspr 256,$vrsave
  427. blr
  428. .long 0
  429. .byte 0,12,0x14,0,0,0,3,0
  430. .long 0
  431. .size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
  432. ___
  433. }
  434. &gen_block("en");
  435. &gen_block("de");
  436. }}}
  437. #########################################################################
  438. {{{ # CBC en- and decrypt procedures #
  439. my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
  440. my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
  441. my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
  442. map("v$_",(4..10));
  443. $code.=<<___;
  444. .globl .${prefix}_cbc_encrypt
  445. ${UCMP}i $len,16
  446. bltlr-
  447. cmpwi $enc,0 # test direction
  448. lis r0,0xffe0
  449. mfspr $vrsave,256
  450. mtspr 256,r0
  451. li $idx,15
  452. vxor $rndkey0,$rndkey0,$rndkey0
  453. le?vspltisb $tmp,0x0f
  454. lvx $ivec,0,$ivp # load [unaligned] iv
  455. lvsl $inpperm,0,$ivp
  456. lvx $inptail,$idx,$ivp
  457. le?vxor $inpperm,$inpperm,$tmp
  458. vperm $ivec,$ivec,$inptail,$inpperm
  459. neg r11,$inp
  460. ?lvsl $keyperm,0,$key # prepare for unaligned key
  461. lwz $rounds,240($key)
  462. lvsr $inpperm,0,r11 # prepare for unaligned load
  463. lvx $inptail,0,$inp
  464. addi $inp,$inp,15 # 15 is not typo
  465. le?vxor $inpperm,$inpperm,$tmp
  466. ?lvsr $outperm,0,$out # prepare for unaligned store
  467. vspltisb $outmask,-1
  468. lvx $outhead,0,$out
  469. ?vperm $outmask,$rndkey0,$outmask,$outperm
  470. le?vxor $outperm,$outperm,$tmp
  471. srwi $rounds,$rounds,1
  472. li $idx,16
  473. subi $rounds,$rounds,1
  474. beq Lcbc_dec
  475. Lcbc_enc:
  476. vmr $inout,$inptail
  477. lvx $inptail,0,$inp
  478. addi $inp,$inp,16
  479. mtctr $rounds
  480. subi $len,$len,16 # len-=16
  481. lvx $rndkey0,0,$key
  482. vperm $inout,$inout,$inptail,$inpperm
  483. lvx $rndkey1,$idx,$key
  484. addi $idx,$idx,16
  485. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  486. vxor $inout,$inout,$rndkey0
  487. lvx $rndkey0,$idx,$key
  488. addi $idx,$idx,16
  489. vxor $inout,$inout,$ivec
  490. Loop_cbc_enc:
  491. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  492. vcipher $inout,$inout,$rndkey1
  493. lvx $rndkey1,$idx,$key
  494. addi $idx,$idx,16
  495. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  496. vcipher $inout,$inout,$rndkey0
  497. lvx $rndkey0,$idx,$key
  498. addi $idx,$idx,16
  499. bdnz Loop_cbc_enc
  500. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  501. vcipher $inout,$inout,$rndkey1
  502. lvx $rndkey1,$idx,$key
  503. li $idx,16
  504. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  505. vcipherlast $ivec,$inout,$rndkey0
  506. ${UCMP}i $len,16
  507. vperm $tmp,$ivec,$ivec,$outperm
  508. vsel $inout,$outhead,$tmp,$outmask
  509. vmr $outhead,$tmp
  510. stvx $inout,0,$out
  511. addi $out,$out,16
  512. bge Lcbc_enc
  513. b Lcbc_done
  514. .align 4
  515. Lcbc_dec:
  516. ${UCMP}i $len,128
  517. bge _aesp8_cbc_decrypt8x
  518. vmr $tmp,$inptail
  519. lvx $inptail,0,$inp
  520. addi $inp,$inp,16
  521. mtctr $rounds
  522. subi $len,$len,16 # len-=16
  523. lvx $rndkey0,0,$key
  524. vperm $tmp,$tmp,$inptail,$inpperm
  525. lvx $rndkey1,$idx,$key
  526. addi $idx,$idx,16
  527. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  528. vxor $inout,$tmp,$rndkey0
  529. lvx $rndkey0,$idx,$key
  530. addi $idx,$idx,16
  531. Loop_cbc_dec:
  532. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  533. vncipher $inout,$inout,$rndkey1
  534. lvx $rndkey1,$idx,$key
  535. addi $idx,$idx,16
  536. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  537. vncipher $inout,$inout,$rndkey0
  538. lvx $rndkey0,$idx,$key
  539. addi $idx,$idx,16
  540. bdnz Loop_cbc_dec
  541. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  542. vncipher $inout,$inout,$rndkey1
  543. lvx $rndkey1,$idx,$key
  544. li $idx,16
  545. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  546. vncipherlast $inout,$inout,$rndkey0
  547. ${UCMP}i $len,16
  548. vxor $inout,$inout,$ivec
  549. vmr $ivec,$tmp
  550. vperm $tmp,$inout,$inout,$outperm
  551. vsel $inout,$outhead,$tmp,$outmask
  552. vmr $outhead,$tmp
  553. stvx $inout,0,$out
  554. addi $out,$out,16
  555. bge Lcbc_dec
  556. Lcbc_done:
  557. addi $out,$out,-1
  558. lvx $inout,0,$out # redundant in aligned case
  559. vsel $inout,$outhead,$inout,$outmask
  560. stvx $inout,0,$out
  561. neg $enc,$ivp # write [unaligned] iv
  562. li $idx,15 # 15 is not typo
  563. vxor $rndkey0,$rndkey0,$rndkey0
  564. vspltisb $outmask,-1
  565. le?vspltisb $tmp,0x0f
  566. ?lvsl $outperm,0,$enc
  567. ?vperm $outmask,$rndkey0,$outmask,$outperm
  568. le?vxor $outperm,$outperm,$tmp
  569. lvx $outhead,0,$ivp
  570. vperm $ivec,$ivec,$ivec,$outperm
  571. vsel $inout,$outhead,$ivec,$outmask
  572. lvx $inptail,$idx,$ivp
  573. stvx $inout,0,$ivp
  574. vsel $inout,$ivec,$inptail,$outmask
  575. stvx $inout,$idx,$ivp
  576. mtspr 256,$vrsave
  577. blr
  578. .long 0
  579. .byte 0,12,0x14,0,0,0,6,0
  580. .long 0
  581. ___
  582. #########################################################################
  583. {{ # Optimized CBC decrypt procedure #
  584. my $key_="r11";
  585. my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
  586. my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
  587. my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
  588. my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
  589. # v26-v31 last 6 round keys
  590. my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
  591. $code.=<<___;
  592. .align 5
  593. _aesp8_cbc_decrypt8x:
  594. $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
  595. li r10,`$FRAME+8*16+15`
  596. li r11,`$FRAME+8*16+31`
  597. stvx v20,r10,$sp # ABI says so
  598. addi r10,r10,32
  599. stvx v21,r11,$sp
  600. addi r11,r11,32
  601. stvx v22,r10,$sp
  602. addi r10,r10,32
  603. stvx v23,r11,$sp
  604. addi r11,r11,32
  605. stvx v24,r10,$sp
  606. addi r10,r10,32
  607. stvx v25,r11,$sp
  608. addi r11,r11,32
  609. stvx v26,r10,$sp
  610. addi r10,r10,32
  611. stvx v27,r11,$sp
  612. addi r11,r11,32
  613. stvx v28,r10,$sp
  614. addi r10,r10,32
  615. stvx v29,r11,$sp
  616. addi r11,r11,32
  617. stvx v30,r10,$sp
  618. stvx v31,r11,$sp
  619. li r0,-1
  620. stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
  621. li $x10,0x10
  622. $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  623. li $x20,0x20
  624. $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  625. li $x30,0x30
  626. $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  627. li $x40,0x40
  628. $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  629. li $x50,0x50
  630. $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  631. li $x60,0x60
  632. $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  633. li $x70,0x70
  634. mtspr 256,r0
  635. subi $rounds,$rounds,3 # -4 in total
  636. subi $len,$len,128 # bias
  637. lvx $rndkey0,$x00,$key # load key schedule
  638. lvx v30,$x10,$key
  639. addi $key,$key,0x20
  640. lvx v31,$x00,$key
  641. ?vperm $rndkey0,$rndkey0,v30,$keyperm
  642. addi $key_,$sp,$FRAME+15
  643. mtctr $rounds
  644. Load_cbc_dec_key:
  645. ?vperm v24,v30,v31,$keyperm
  646. lvx v30,$x10,$key
  647. addi $key,$key,0x20
  648. stvx v24,$x00,$key_ # off-load round[1]
  649. ?vperm v25,v31,v30,$keyperm
  650. lvx v31,$x00,$key
  651. stvx v25,$x10,$key_ # off-load round[2]
  652. addi $key_,$key_,0x20
  653. bdnz Load_cbc_dec_key
  654. lvx v26,$x10,$key
  655. ?vperm v24,v30,v31,$keyperm
  656. lvx v27,$x20,$key
  657. stvx v24,$x00,$key_ # off-load round[3]
  658. ?vperm v25,v31,v26,$keyperm
  659. lvx v28,$x30,$key
  660. stvx v25,$x10,$key_ # off-load round[4]
  661. addi $key_,$sp,$FRAME+15 # rewind $key_
  662. ?vperm v26,v26,v27,$keyperm
  663. lvx v29,$x40,$key
  664. ?vperm v27,v27,v28,$keyperm
  665. lvx v30,$x50,$key
  666. ?vperm v28,v28,v29,$keyperm
  667. lvx v31,$x60,$key
  668. ?vperm v29,v29,v30,$keyperm
  669. lvx $out0,$x70,$key # borrow $out0
  670. ?vperm v30,v30,v31,$keyperm
  671. lvx v24,$x00,$key_ # pre-load round[1]
  672. ?vperm v31,v31,$out0,$keyperm
  673. lvx v25,$x10,$key_ # pre-load round[2]
  674. #lvx $inptail,0,$inp # "caller" already did this
  675. #addi $inp,$inp,15 # 15 is not typo
  676. subi $inp,$inp,15 # undo "caller"
  677. le?li $idx,8
  678. lvx_u $in0,$x00,$inp # load first 8 "words"
  679. le?lvsl $inpperm,0,$idx
  680. le?vspltisb $tmp,0x0f
  681. lvx_u $in1,$x10,$inp
  682. le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
  683. lvx_u $in2,$x20,$inp
  684. le?vperm $in0,$in0,$in0,$inpperm
  685. lvx_u $in3,$x30,$inp
  686. le?vperm $in1,$in1,$in1,$inpperm
  687. lvx_u $in4,$x40,$inp
  688. le?vperm $in2,$in2,$in2,$inpperm
  689. vxor $out0,$in0,$rndkey0
  690. lvx_u $in5,$x50,$inp
  691. le?vperm $in3,$in3,$in3,$inpperm
  692. vxor $out1,$in1,$rndkey0
  693. lvx_u $in6,$x60,$inp
  694. le?vperm $in4,$in4,$in4,$inpperm
  695. vxor $out2,$in2,$rndkey0
  696. lvx_u $in7,$x70,$inp
  697. addi $inp,$inp,0x80
  698. le?vperm $in5,$in5,$in5,$inpperm
  699. vxor $out3,$in3,$rndkey0
  700. le?vperm $in6,$in6,$in6,$inpperm
  701. vxor $out4,$in4,$rndkey0
  702. le?vperm $in7,$in7,$in7,$inpperm
  703. vxor $out5,$in5,$rndkey0
  704. vxor $out6,$in6,$rndkey0
  705. vxor $out7,$in7,$rndkey0
  706. mtctr $rounds
  707. b Loop_cbc_dec8x
  708. .align 5
  709. Loop_cbc_dec8x:
  710. vncipher $out0,$out0,v24
  711. vncipher $out1,$out1,v24
  712. vncipher $out2,$out2,v24
  713. vncipher $out3,$out3,v24
  714. vncipher $out4,$out4,v24
  715. vncipher $out5,$out5,v24
  716. vncipher $out6,$out6,v24
  717. vncipher $out7,$out7,v24
  718. lvx v24,$x20,$key_ # round[3]
  719. addi $key_,$key_,0x20
  720. vncipher $out0,$out0,v25
  721. vncipher $out1,$out1,v25
  722. vncipher $out2,$out2,v25
  723. vncipher $out3,$out3,v25
  724. vncipher $out4,$out4,v25
  725. vncipher $out5,$out5,v25
  726. vncipher $out6,$out6,v25
  727. vncipher $out7,$out7,v25
  728. lvx v25,$x10,$key_ # round[4]
  729. bdnz Loop_cbc_dec8x
  730. subic $len,$len,128 # $len-=128
  731. vncipher $out0,$out0,v24
  732. vncipher $out1,$out1,v24
  733. vncipher $out2,$out2,v24
  734. vncipher $out3,$out3,v24
  735. vncipher $out4,$out4,v24
  736. vncipher $out5,$out5,v24
  737. vncipher $out6,$out6,v24
  738. vncipher $out7,$out7,v24
  739. subfe. r0,r0,r0 # borrow?-1:0
  740. vncipher $out0,$out0,v25
  741. vncipher $out1,$out1,v25
  742. vncipher $out2,$out2,v25
  743. vncipher $out3,$out3,v25
  744. vncipher $out4,$out4,v25
  745. vncipher $out5,$out5,v25
  746. vncipher $out6,$out6,v25
  747. vncipher $out7,$out7,v25
  748. and r0,r0,$len
  749. vncipher $out0,$out0,v26
  750. vncipher $out1,$out1,v26
  751. vncipher $out2,$out2,v26
  752. vncipher $out3,$out3,v26
  753. vncipher $out4,$out4,v26
  754. vncipher $out5,$out5,v26
  755. vncipher $out6,$out6,v26
  756. vncipher $out7,$out7,v26
  757. add $inp,$inp,r0 # $inp is adjusted in such
  758. # way that at exit from the
  759. # loop inX-in7 are loaded
  760. # with last "words"
  761. vncipher $out0,$out0,v27
  762. vncipher $out1,$out1,v27
  763. vncipher $out2,$out2,v27
  764. vncipher $out3,$out3,v27
  765. vncipher $out4,$out4,v27
  766. vncipher $out5,$out5,v27
  767. vncipher $out6,$out6,v27
  768. vncipher $out7,$out7,v27
  769. addi $key_,$sp,$FRAME+15 # rewind $key_
  770. vncipher $out0,$out0,v28
  771. vncipher $out1,$out1,v28
  772. vncipher $out2,$out2,v28
  773. vncipher $out3,$out3,v28
  774. vncipher $out4,$out4,v28
  775. vncipher $out5,$out5,v28
  776. vncipher $out6,$out6,v28
  777. vncipher $out7,$out7,v28
  778. lvx v24,$x00,$key_ # re-pre-load round[1]
  779. vncipher $out0,$out0,v29
  780. vncipher $out1,$out1,v29
  781. vncipher $out2,$out2,v29
  782. vncipher $out3,$out3,v29
  783. vncipher $out4,$out4,v29
  784. vncipher $out5,$out5,v29
  785. vncipher $out6,$out6,v29
  786. vncipher $out7,$out7,v29
  787. lvx v25,$x10,$key_ # re-pre-load round[2]
  788. vncipher $out0,$out0,v30
  789. vxor $ivec,$ivec,v31 # xor with last round key
  790. vncipher $out1,$out1,v30
  791. vxor $in0,$in0,v31
  792. vncipher $out2,$out2,v30
  793. vxor $in1,$in1,v31
  794. vncipher $out3,$out3,v30
  795. vxor $in2,$in2,v31
  796. vncipher $out4,$out4,v30
  797. vxor $in3,$in3,v31
  798. vncipher $out5,$out5,v30
  799. vxor $in4,$in4,v31
  800. vncipher $out6,$out6,v30
  801. vxor $in5,$in5,v31
  802. vncipher $out7,$out7,v30
  803. vxor $in6,$in6,v31
  804. vncipherlast $out0,$out0,$ivec
  805. vncipherlast $out1,$out1,$in0
  806. lvx_u $in0,$x00,$inp # load next input block
  807. vncipherlast $out2,$out2,$in1
  808. lvx_u $in1,$x10,$inp
  809. vncipherlast $out3,$out3,$in2
  810. le?vperm $in0,$in0,$in0,$inpperm
  811. lvx_u $in2,$x20,$inp
  812. vncipherlast $out4,$out4,$in3
  813. le?vperm $in1,$in1,$in1,$inpperm
  814. lvx_u $in3,$x30,$inp
  815. vncipherlast $out5,$out5,$in4
  816. le?vperm $in2,$in2,$in2,$inpperm
  817. lvx_u $in4,$x40,$inp
  818. vncipherlast $out6,$out6,$in5
  819. le?vperm $in3,$in3,$in3,$inpperm
  820. lvx_u $in5,$x50,$inp
  821. vncipherlast $out7,$out7,$in6
  822. le?vperm $in4,$in4,$in4,$inpperm
  823. lvx_u $in6,$x60,$inp
  824. vmr $ivec,$in7
  825. le?vperm $in5,$in5,$in5,$inpperm
  826. lvx_u $in7,$x70,$inp
  827. addi $inp,$inp,0x80
  828. le?vperm $out0,$out0,$out0,$inpperm
  829. le?vperm $out1,$out1,$out1,$inpperm
  830. stvx_u $out0,$x00,$out
  831. le?vperm $in6,$in6,$in6,$inpperm
  832. vxor $out0,$in0,$rndkey0
  833. le?vperm $out2,$out2,$out2,$inpperm
  834. stvx_u $out1,$x10,$out
  835. le?vperm $in7,$in7,$in7,$inpperm
  836. vxor $out1,$in1,$rndkey0
  837. le?vperm $out3,$out3,$out3,$inpperm
  838. stvx_u $out2,$x20,$out
  839. vxor $out2,$in2,$rndkey0
  840. le?vperm $out4,$out4,$out4,$inpperm
  841. stvx_u $out3,$x30,$out
  842. vxor $out3,$in3,$rndkey0
  843. le?vperm $out5,$out5,$out5,$inpperm
  844. stvx_u $out4,$x40,$out
  845. vxor $out4,$in4,$rndkey0
  846. le?vperm $out6,$out6,$out6,$inpperm
  847. stvx_u $out5,$x50,$out
  848. vxor $out5,$in5,$rndkey0
  849. le?vperm $out7,$out7,$out7,$inpperm
  850. stvx_u $out6,$x60,$out
  851. vxor $out6,$in6,$rndkey0
  852. stvx_u $out7,$x70,$out
  853. addi $out,$out,0x80
  854. vxor $out7,$in7,$rndkey0
  855. mtctr $rounds
  856. beq Loop_cbc_dec8x # did $len-=128 borrow?
  857. addic. $len,$len,128
  858. beq Lcbc_dec8x_done
  859. nop
  860. nop
  861. Loop_cbc_dec8x_tail: # up to 7 "words" tail...
  862. vncipher $out1,$out1,v24
  863. vncipher $out2,$out2,v24
  864. vncipher $out3,$out3,v24
  865. vncipher $out4,$out4,v24
  866. vncipher $out5,$out5,v24
  867. vncipher $out6,$out6,v24
  868. vncipher $out7,$out7,v24
  869. lvx v24,$x20,$key_ # round[3]
  870. addi $key_,$key_,0x20
  871. vncipher $out1,$out1,v25
  872. vncipher $out2,$out2,v25
  873. vncipher $out3,$out3,v25
  874. vncipher $out4,$out4,v25
  875. vncipher $out5,$out5,v25
  876. vncipher $out6,$out6,v25
  877. vncipher $out7,$out7,v25
  878. lvx v25,$x10,$key_ # round[4]
  879. bdnz Loop_cbc_dec8x_tail
  880. vncipher $out1,$out1,v24
  881. vncipher $out2,$out2,v24
  882. vncipher $out3,$out3,v24
  883. vncipher $out4,$out4,v24
  884. vncipher $out5,$out5,v24
  885. vncipher $out6,$out6,v24
  886. vncipher $out7,$out7,v24
  887. vncipher $out1,$out1,v25
  888. vncipher $out2,$out2,v25
  889. vncipher $out3,$out3,v25
  890. vncipher $out4,$out4,v25
  891. vncipher $out5,$out5,v25
  892. vncipher $out6,$out6,v25
  893. vncipher $out7,$out7,v25
  894. vncipher $out1,$out1,v26
  895. vncipher $out2,$out2,v26
  896. vncipher $out3,$out3,v26
  897. vncipher $out4,$out4,v26
  898. vncipher $out5,$out5,v26
  899. vncipher $out6,$out6,v26
  900. vncipher $out7,$out7,v26
  901. vncipher $out1,$out1,v27
  902. vncipher $out2,$out2,v27
  903. vncipher $out3,$out3,v27
  904. vncipher $out4,$out4,v27
  905. vncipher $out5,$out5,v27
  906. vncipher $out6,$out6,v27
  907. vncipher $out7,$out7,v27
  908. vncipher $out1,$out1,v28
  909. vncipher $out2,$out2,v28
  910. vncipher $out3,$out3,v28
  911. vncipher $out4,$out4,v28
  912. vncipher $out5,$out5,v28
  913. vncipher $out6,$out6,v28
  914. vncipher $out7,$out7,v28
  915. vncipher $out1,$out1,v29
  916. vncipher $out2,$out2,v29
  917. vncipher $out3,$out3,v29
  918. vncipher $out4,$out4,v29
  919. vncipher $out5,$out5,v29
  920. vncipher $out6,$out6,v29
  921. vncipher $out7,$out7,v29
  922. vncipher $out1,$out1,v30
  923. vxor $ivec,$ivec,v31 # last round key
  924. vncipher $out2,$out2,v30
  925. vxor $in1,$in1,v31
  926. vncipher $out3,$out3,v30
  927. vxor $in2,$in2,v31
  928. vncipher $out4,$out4,v30
  929. vxor $in3,$in3,v31
  930. vncipher $out5,$out5,v30
  931. vxor $in4,$in4,v31
  932. vncipher $out6,$out6,v30
  933. vxor $in5,$in5,v31
  934. vncipher $out7,$out7,v30
  935. vxor $in6,$in6,v31
  936. cmplwi $len,32 # switch($len)
  937. blt Lcbc_dec8x_one
  938. nop
  939. beq Lcbc_dec8x_two
  940. cmplwi $len,64
  941. blt Lcbc_dec8x_three
  942. nop
  943. beq Lcbc_dec8x_four
  944. cmplwi $len,96
  945. blt Lcbc_dec8x_five
  946. nop
  947. beq Lcbc_dec8x_six
  948. Lcbc_dec8x_seven:
  949. vncipherlast $out1,$out1,$ivec
  950. vncipherlast $out2,$out2,$in1
  951. vncipherlast $out3,$out3,$in2
  952. vncipherlast $out4,$out4,$in3
  953. vncipherlast $out5,$out5,$in4
  954. vncipherlast $out6,$out6,$in5
  955. vncipherlast $out7,$out7,$in6
  956. vmr $ivec,$in7
  957. le?vperm $out1,$out1,$out1,$inpperm
  958. le?vperm $out2,$out2,$out2,$inpperm
  959. stvx_u $out1,$x00,$out
  960. le?vperm $out3,$out3,$out3,$inpperm
  961. stvx_u $out2,$x10,$out
  962. le?vperm $out4,$out4,$out4,$inpperm
  963. stvx_u $out3,$x20,$out
  964. le?vperm $out5,$out5,$out5,$inpperm
  965. stvx_u $out4,$x30,$out
  966. le?vperm $out6,$out6,$out6,$inpperm
  967. stvx_u $out5,$x40,$out
  968. le?vperm $out7,$out7,$out7,$inpperm
  969. stvx_u $out6,$x50,$out
  970. stvx_u $out7,$x60,$out
  971. addi $out,$out,0x70
  972. b Lcbc_dec8x_done
  973. .align 5
  974. Lcbc_dec8x_six:
  975. vncipherlast $out2,$out2,$ivec
  976. vncipherlast $out3,$out3,$in2
  977. vncipherlast $out4,$out4,$in3
  978. vncipherlast $out5,$out5,$in4
  979. vncipherlast $out6,$out6,$in5
  980. vncipherlast $out7,$out7,$in6
  981. vmr $ivec,$in7
  982. le?vperm $out2,$out2,$out2,$inpperm
  983. le?vperm $out3,$out3,$out3,$inpperm
  984. stvx_u $out2,$x00,$out
  985. le?vperm $out4,$out4,$out4,$inpperm
  986. stvx_u $out3,$x10,$out
  987. le?vperm $out5,$out5,$out5,$inpperm
  988. stvx_u $out4,$x20,$out
  989. le?vperm $out6,$out6,$out6,$inpperm
  990. stvx_u $out5,$x30,$out
  991. le?vperm $out7,$out7,$out7,$inpperm
  992. stvx_u $out6,$x40,$out
  993. stvx_u $out7,$x50,$out
  994. addi $out,$out,0x60
  995. b Lcbc_dec8x_done
  996. .align 5
  997. Lcbc_dec8x_five:
  998. vncipherlast $out3,$out3,$ivec
  999. vncipherlast $out4,$out4,$in3
  1000. vncipherlast $out5,$out5,$in4
  1001. vncipherlast $out6,$out6,$in5
  1002. vncipherlast $out7,$out7,$in6
  1003. vmr $ivec,$in7
  1004. le?vperm $out3,$out3,$out3,$inpperm
  1005. le?vperm $out4,$out4,$out4,$inpperm
  1006. stvx_u $out3,$x00,$out
  1007. le?vperm $out5,$out5,$out5,$inpperm
  1008. stvx_u $out4,$x10,$out
  1009. le?vperm $out6,$out6,$out6,$inpperm
  1010. stvx_u $out5,$x20,$out
  1011. le?vperm $out7,$out7,$out7,$inpperm
  1012. stvx_u $out6,$x30,$out
  1013. stvx_u $out7,$x40,$out
  1014. addi $out,$out,0x50
  1015. b Lcbc_dec8x_done
  1016. .align 5
  1017. Lcbc_dec8x_four:
  1018. vncipherlast $out4,$out4,$ivec
  1019. vncipherlast $out5,$out5,$in4
  1020. vncipherlast $out6,$out6,$in5
  1021. vncipherlast $out7,$out7,$in6
  1022. vmr $ivec,$in7
  1023. le?vperm $out4,$out4,$out4,$inpperm
  1024. le?vperm $out5,$out5,$out5,$inpperm
  1025. stvx_u $out4,$x00,$out
  1026. le?vperm $out6,$out6,$out6,$inpperm
  1027. stvx_u $out5,$x10,$out
  1028. le?vperm $out7,$out7,$out7,$inpperm
  1029. stvx_u $out6,$x20,$out
  1030. stvx_u $out7,$x30,$out
  1031. addi $out,$out,0x40
  1032. b Lcbc_dec8x_done
  1033. .align 5
  1034. Lcbc_dec8x_three:
  1035. vncipherlast $out5,$out5,$ivec
  1036. vncipherlast $out6,$out6,$in5
  1037. vncipherlast $out7,$out7,$in6
  1038. vmr $ivec,$in7
  1039. le?vperm $out5,$out5,$out5,$inpperm
  1040. le?vperm $out6,$out6,$out6,$inpperm
  1041. stvx_u $out5,$x00,$out
  1042. le?vperm $out7,$out7,$out7,$inpperm
  1043. stvx_u $out6,$x10,$out
  1044. stvx_u $out7,$x20,$out
  1045. addi $out,$out,0x30
  1046. b Lcbc_dec8x_done
  1047. .align 5
  1048. Lcbc_dec8x_two:
  1049. vncipherlast $out6,$out6,$ivec
  1050. vncipherlast $out7,$out7,$in6
  1051. vmr $ivec,$in7
  1052. le?vperm $out6,$out6,$out6,$inpperm
  1053. le?vperm $out7,$out7,$out7,$inpperm
  1054. stvx_u $out6,$x00,$out
  1055. stvx_u $out7,$x10,$out
  1056. addi $out,$out,0x20
  1057. b Lcbc_dec8x_done
  1058. .align 5
  1059. Lcbc_dec8x_one:
  1060. vncipherlast $out7,$out7,$ivec
  1061. vmr $ivec,$in7
  1062. le?vperm $out7,$out7,$out7,$inpperm
  1063. stvx_u $out7,0,$out
  1064. addi $out,$out,0x10
  1065. Lcbc_dec8x_done:
  1066. le?vperm $ivec,$ivec,$ivec,$inpperm
  1067. stvx_u $ivec,0,$ivp # write [unaligned] iv
  1068. li r10,`$FRAME+15`
  1069. li r11,`$FRAME+31`
  1070. stvx $inpperm,r10,$sp # wipe copies of round keys
  1071. addi r10,r10,32
  1072. stvx $inpperm,r11,$sp
  1073. addi r11,r11,32
  1074. stvx $inpperm,r10,$sp
  1075. addi r10,r10,32
  1076. stvx $inpperm,r11,$sp
  1077. addi r11,r11,32
  1078. stvx $inpperm,r10,$sp
  1079. addi r10,r10,32
  1080. stvx $inpperm,r11,$sp
  1081. addi r11,r11,32
  1082. stvx $inpperm,r10,$sp
  1083. addi r10,r10,32
  1084. stvx $inpperm,r11,$sp
  1085. addi r11,r11,32
  1086. mtspr 256,$vrsave
  1087. lvx v20,r10,$sp # ABI says so
  1088. addi r10,r10,32
  1089. lvx v21,r11,$sp
  1090. addi r11,r11,32
  1091. lvx v22,r10,$sp
  1092. addi r10,r10,32
  1093. lvx v23,r11,$sp
  1094. addi r11,r11,32
  1095. lvx v24,r10,$sp
  1096. addi r10,r10,32
  1097. lvx v25,r11,$sp
  1098. addi r11,r11,32
  1099. lvx v26,r10,$sp
  1100. addi r10,r10,32
  1101. lvx v27,r11,$sp
  1102. addi r11,r11,32
  1103. lvx v28,r10,$sp
  1104. addi r10,r10,32
  1105. lvx v29,r11,$sp
  1106. addi r11,r11,32
  1107. lvx v30,r10,$sp
  1108. lvx v31,r11,$sp
  1109. $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  1110. $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  1111. $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  1112. $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  1113. $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  1114. $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  1115. addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
  1116. blr
  1117. .long 0
  1118. .byte 0,12,0x14,0,0x80,6,6,0
  1119. .long 0
  1120. .size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
  1121. ___
  1122. }} }}}
  1123. #########################################################################
  1124. {{{ # CTR procedure[s] #
  1125. my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
  1126. my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
  1127. my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
  1128. map("v$_",(4..11));
  1129. my $dat=$tmp;
  1130. $code.=<<___;
  1131. .globl .${prefix}_ctr32_encrypt_blocks
  1132. ${UCMP}i $len,1
  1133. bltlr-
  1134. lis r0,0xfff0
  1135. mfspr $vrsave,256
  1136. mtspr 256,r0
  1137. li $idx,15
  1138. vxor $rndkey0,$rndkey0,$rndkey0
  1139. le?vspltisb $tmp,0x0f
  1140. lvx $ivec,0,$ivp # load [unaligned] iv
  1141. lvsl $inpperm,0,$ivp
  1142. lvx $inptail,$idx,$ivp
  1143. vspltisb $one,1
  1144. le?vxor $inpperm,$inpperm,$tmp
  1145. vperm $ivec,$ivec,$inptail,$inpperm
  1146. vsldoi $one,$rndkey0,$one,1
  1147. neg r11,$inp
  1148. ?lvsl $keyperm,0,$key # prepare for unaligned key
  1149. lwz $rounds,240($key)
  1150. lvsr $inpperm,0,r11 # prepare for unaligned load
  1151. lvx $inptail,0,$inp
  1152. addi $inp,$inp,15 # 15 is not typo
  1153. le?vxor $inpperm,$inpperm,$tmp
  1154. srwi $rounds,$rounds,1
  1155. li $idx,16
  1156. subi $rounds,$rounds,1
  1157. ${UCMP}i $len,8
  1158. bge _aesp8_ctr32_encrypt8x
  1159. ?lvsr $outperm,0,$out # prepare for unaligned store
  1160. vspltisb $outmask,-1
  1161. lvx $outhead,0,$out
  1162. ?vperm $outmask,$rndkey0,$outmask,$outperm
  1163. le?vxor $outperm,$outperm,$tmp
  1164. lvx $rndkey0,0,$key
  1165. mtctr $rounds
  1166. lvx $rndkey1,$idx,$key
  1167. addi $idx,$idx,16
  1168. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1169. vxor $inout,$ivec,$rndkey0
  1170. lvx $rndkey0,$idx,$key
  1171. addi $idx,$idx,16
  1172. b Loop_ctr32_enc
  1173. .align 5
  1174. Loop_ctr32_enc:
  1175. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1176. vcipher $inout,$inout,$rndkey1
  1177. lvx $rndkey1,$idx,$key
  1178. addi $idx,$idx,16
  1179. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1180. vcipher $inout,$inout,$rndkey0
  1181. lvx $rndkey0,$idx,$key
  1182. addi $idx,$idx,16
  1183. bdnz Loop_ctr32_enc
  1184. vadduqm $ivec,$ivec,$one
  1185. vmr $dat,$inptail
  1186. lvx $inptail,0,$inp
  1187. addi $inp,$inp,16
  1188. subic. $len,$len,1 # blocks--
  1189. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1190. vcipher $inout,$inout,$rndkey1
  1191. lvx $rndkey1,$idx,$key
  1192. vperm $dat,$dat,$inptail,$inpperm
  1193. li $idx,16
  1194. ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
  1195. lvx $rndkey0,0,$key
  1196. vxor $dat,$dat,$rndkey1 # last round key
  1197. vcipherlast $inout,$inout,$dat
  1198. lvx $rndkey1,$idx,$key
  1199. addi $idx,$idx,16
  1200. vperm $inout,$inout,$inout,$outperm
  1201. vsel $dat,$outhead,$inout,$outmask
  1202. mtctr $rounds
  1203. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1204. vmr $outhead,$inout
  1205. vxor $inout,$ivec,$rndkey0
  1206. lvx $rndkey0,$idx,$key
  1207. addi $idx,$idx,16
  1208. stvx $dat,0,$out
  1209. addi $out,$out,16
  1210. bne Loop_ctr32_enc
  1211. addi $out,$out,-1
  1212. lvx $inout,0,$out # redundant in aligned case
  1213. vsel $inout,$outhead,$inout,$outmask
  1214. stvx $inout,0,$out
  1215. mtspr 256,$vrsave
  1216. blr
  1217. .long 0
  1218. .byte 0,12,0x14,0,0,0,6,0
  1219. .long 0
  1220. ___
  1221. #########################################################################
  1222. {{ # Optimized CTR procedure #
  1223. my $key_="r11";
  1224. my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
  1225. my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
  1226. my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
  1227. my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
  1228. # v26-v31 last 6 round keys
  1229. my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
  1230. my ($two,$three,$four)=($outhead,$outperm,$outmask);
  1231. $code.=<<___;
  1232. .align 5
  1233. _aesp8_ctr32_encrypt8x:
  1234. $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
  1235. li r10,`$FRAME+8*16+15`
  1236. li r11,`$FRAME+8*16+31`
  1237. stvx v20,r10,$sp # ABI says so
  1238. addi r10,r10,32
  1239. stvx v21,r11,$sp
  1240. addi r11,r11,32
  1241. stvx v22,r10,$sp
  1242. addi r10,r10,32
  1243. stvx v23,r11,$sp
  1244. addi r11,r11,32
  1245. stvx v24,r10,$sp
  1246. addi r10,r10,32
  1247. stvx v25,r11,$sp
  1248. addi r11,r11,32
  1249. stvx v26,r10,$sp
  1250. addi r10,r10,32
  1251. stvx v27,r11,$sp
  1252. addi r11,r11,32
  1253. stvx v28,r10,$sp
  1254. addi r10,r10,32
  1255. stvx v29,r11,$sp
  1256. addi r11,r11,32
  1257. stvx v30,r10,$sp
  1258. stvx v31,r11,$sp
  1259. li r0,-1
  1260. stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
  1261. li $x10,0x10
  1262. $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  1263. li $x20,0x20
  1264. $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  1265. li $x30,0x30
  1266. $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  1267. li $x40,0x40
  1268. $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  1269. li $x50,0x50
  1270. $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  1271. li $x60,0x60
  1272. $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  1273. li $x70,0x70
  1274. mtspr 256,r0
  1275. subi $rounds,$rounds,3 # -4 in total
  1276. lvx $rndkey0,$x00,$key # load key schedule
  1277. lvx v30,$x10,$key
  1278. addi $key,$key,0x20
  1279. lvx v31,$x00,$key
  1280. ?vperm $rndkey0,$rndkey0,v30,$keyperm
  1281. addi $key_,$sp,$FRAME+15
  1282. mtctr $rounds
  1283. Load_ctr32_enc_key:
  1284. ?vperm v24,v30,v31,$keyperm
  1285. lvx v30,$x10,$key
  1286. addi $key,$key,0x20
  1287. stvx v24,$x00,$key_ # off-load round[1]
  1288. ?vperm v25,v31,v30,$keyperm
  1289. lvx v31,$x00,$key
  1290. stvx v25,$x10,$key_ # off-load round[2]
  1291. addi $key_,$key_,0x20
  1292. bdnz Load_ctr32_enc_key
  1293. lvx v26,$x10,$key
  1294. ?vperm v24,v30,v31,$keyperm
  1295. lvx v27,$x20,$key
  1296. stvx v24,$x00,$key_ # off-load round[3]
  1297. ?vperm v25,v31,v26,$keyperm
  1298. lvx v28,$x30,$key
  1299. stvx v25,$x10,$key_ # off-load round[4]
  1300. addi $key_,$sp,$FRAME+15 # rewind $key_
  1301. ?vperm v26,v26,v27,$keyperm
  1302. lvx v29,$x40,$key
  1303. ?vperm v27,v27,v28,$keyperm
  1304. lvx v30,$x50,$key
  1305. ?vperm v28,v28,v29,$keyperm
  1306. lvx v31,$x60,$key
  1307. ?vperm v29,v29,v30,$keyperm
  1308. lvx $out0,$x70,$key # borrow $out0
  1309. ?vperm v30,v30,v31,$keyperm
  1310. lvx v24,$x00,$key_ # pre-load round[1]
  1311. ?vperm v31,v31,$out0,$keyperm
  1312. lvx v25,$x10,$key_ # pre-load round[2]
  1313. vadduqm $two,$one,$one
  1314. subi $inp,$inp,15 # undo "caller"
  1315. $SHL $len,$len,4
  1316. vadduqm $out1,$ivec,$one # counter values ...
  1317. vadduqm $out2,$ivec,$two
  1318. vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
  1319. le?li $idx,8
  1320. vadduqm $out3,$out1,$two
  1321. vxor $out1,$out1,$rndkey0
  1322. le?lvsl $inpperm,0,$idx
  1323. vadduqm $out4,$out2,$two
  1324. vxor $out2,$out2,$rndkey0
  1325. le?vspltisb $tmp,0x0f
  1326. vadduqm $out5,$out3,$two
  1327. vxor $out3,$out3,$rndkey0
  1328. le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
  1329. vadduqm $out6,$out4,$two
  1330. vxor $out4,$out4,$rndkey0
  1331. vadduqm $out7,$out5,$two
  1332. vxor $out5,$out5,$rndkey0
  1333. vadduqm $ivec,$out6,$two # next counter value
  1334. vxor $out6,$out6,$rndkey0
  1335. vxor $out7,$out7,$rndkey0
  1336. mtctr $rounds
  1337. b Loop_ctr32_enc8x
  1338. .align 5
  1339. Loop_ctr32_enc8x:
  1340. vcipher $out0,$out0,v24
  1341. vcipher $out1,$out1,v24
  1342. vcipher $out2,$out2,v24
  1343. vcipher $out3,$out3,v24
  1344. vcipher $out4,$out4,v24
  1345. vcipher $out5,$out5,v24
  1346. vcipher $out6,$out6,v24
  1347. vcipher $out7,$out7,v24
  1348. Loop_ctr32_enc8x_middle:
  1349. lvx v24,$x20,$key_ # round[3]
  1350. addi $key_,$key_,0x20
  1351. vcipher $out0,$out0,v25
  1352. vcipher $out1,$out1,v25
  1353. vcipher $out2,$out2,v25
  1354. vcipher $out3,$out3,v25
  1355. vcipher $out4,$out4,v25
  1356. vcipher $out5,$out5,v25
  1357. vcipher $out6,$out6,v25
  1358. vcipher $out7,$out7,v25
  1359. lvx v25,$x10,$key_ # round[4]
  1360. bdnz Loop_ctr32_enc8x
  1361. subic r11,$len,256 # $len-256, borrow $key_
  1362. vcipher $out0,$out0,v24
  1363. vcipher $out1,$out1,v24
  1364. vcipher $out2,$out2,v24
  1365. vcipher $out3,$out3,v24
  1366. vcipher $out4,$out4,v24
  1367. vcipher $out5,$out5,v24
  1368. vcipher $out6,$out6,v24
  1369. vcipher $out7,$out7,v24
  1370. subfe r0,r0,r0 # borrow?-1:0
  1371. vcipher $out0,$out0,v25
  1372. vcipher $out1,$out1,v25
  1373. vcipher $out2,$out2,v25
  1374. vcipher $out3,$out3,v25
  1375. vcipher $out4,$out4,v25
  1376. vcipher $out5,$out5,v25
  1377. vcipher $out6,$out6,v25
  1378. vcipher $out7,$out7,v25
  1379. and r0,r0,r11
  1380. addi $key_,$sp,$FRAME+15 # rewind $key_
  1381. vcipher $out0,$out0,v26
  1382. vcipher $out1,$out1,v26
  1383. vcipher $out2,$out2,v26
  1384. vcipher $out3,$out3,v26
  1385. vcipher $out4,$out4,v26
  1386. vcipher $out5,$out5,v26
  1387. vcipher $out6,$out6,v26
  1388. vcipher $out7,$out7,v26
  1389. lvx v24,$x00,$key_ # re-pre-load round[1]
  1390. subic $len,$len,129 # $len-=129
  1391. vcipher $out0,$out0,v27
  1392. addi $len,$len,1 # $len-=128 really
  1393. vcipher $out1,$out1,v27
  1394. vcipher $out2,$out2,v27
  1395. vcipher $out3,$out3,v27
  1396. vcipher $out4,$out4,v27
  1397. vcipher $out5,$out5,v27
  1398. vcipher $out6,$out6,v27
  1399. vcipher $out7,$out7,v27
  1400. lvx v25,$x10,$key_ # re-pre-load round[2]
  1401. vcipher $out0,$out0,v28
  1402. lvx_u $in0,$x00,$inp # load input
  1403. vcipher $out1,$out1,v28
  1404. lvx_u $in1,$x10,$inp
  1405. vcipher $out2,$out2,v28
  1406. lvx_u $in2,$x20,$inp
  1407. vcipher $out3,$out3,v28
  1408. lvx_u $in3,$x30,$inp
  1409. vcipher $out4,$out4,v28
  1410. lvx_u $in4,$x40,$inp
  1411. vcipher $out5,$out5,v28
  1412. lvx_u $in5,$x50,$inp
  1413. vcipher $out6,$out6,v28
  1414. lvx_u $in6,$x60,$inp
  1415. vcipher $out7,$out7,v28
  1416. lvx_u $in7,$x70,$inp
  1417. addi $inp,$inp,0x80
  1418. vcipher $out0,$out0,v29
  1419. le?vperm $in0,$in0,$in0,$inpperm
  1420. vcipher $out1,$out1,v29
  1421. le?vperm $in1,$in1,$in1,$inpperm
  1422. vcipher $out2,$out2,v29
  1423. le?vperm $in2,$in2,$in2,$inpperm
  1424. vcipher $out3,$out3,v29
  1425. le?vperm $in3,$in3,$in3,$inpperm
  1426. vcipher $out4,$out4,v29
  1427. le?vperm $in4,$in4,$in4,$inpperm
  1428. vcipher $out5,$out5,v29
  1429. le?vperm $in5,$in5,$in5,$inpperm
  1430. vcipher $out6,$out6,v29
  1431. le?vperm $in6,$in6,$in6,$inpperm
  1432. vcipher $out7,$out7,v29
  1433. le?vperm $in7,$in7,$in7,$inpperm
  1434. add $inp,$inp,r0 # $inp is adjusted in such
  1435. # way that at exit from the
  1436. # loop inX-in7 are loaded
  1437. # with last "words"
  1438. subfe. r0,r0,r0 # borrow?-1:0
  1439. vcipher $out0,$out0,v30
  1440. vxor $in0,$in0,v31 # xor with last round key
  1441. vcipher $out1,$out1,v30
  1442. vxor $in1,$in1,v31
  1443. vcipher $out2,$out2,v30
  1444. vxor $in2,$in2,v31
  1445. vcipher $out3,$out3,v30
  1446. vxor $in3,$in3,v31
  1447. vcipher $out4,$out4,v30
  1448. vxor $in4,$in4,v31
  1449. vcipher $out5,$out5,v30
  1450. vxor $in5,$in5,v31
  1451. vcipher $out6,$out6,v30
  1452. vxor $in6,$in6,v31
  1453. vcipher $out7,$out7,v30
  1454. vxor $in7,$in7,v31
  1455. bne Lctr32_enc8x_break # did $len-129 borrow?
  1456. vcipherlast $in0,$out0,$in0
  1457. vcipherlast $in1,$out1,$in1
  1458. vadduqm $out1,$ivec,$one # counter values ...
  1459. vcipherlast $in2,$out2,$in2
  1460. vadduqm $out2,$ivec,$two
  1461. vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
  1462. vcipherlast $in3,$out3,$in3
  1463. vadduqm $out3,$out1,$two
  1464. vxor $out1,$out1,$rndkey0
  1465. vcipherlast $in4,$out4,$in4
  1466. vadduqm $out4,$out2,$two
  1467. vxor $out2,$out2,$rndkey0
  1468. vcipherlast $in5,$out5,$in5
  1469. vadduqm $out5,$out3,$two
  1470. vxor $out3,$out3,$rndkey0
  1471. vcipherlast $in6,$out6,$in6
  1472. vadduqm $out6,$out4,$two
  1473. vxor $out4,$out4,$rndkey0
  1474. vcipherlast $in7,$out7,$in7
  1475. vadduqm $out7,$out5,$two
  1476. vxor $out5,$out5,$rndkey0
  1477. le?vperm $in0,$in0,$in0,$inpperm
  1478. vadduqm $ivec,$out6,$two # next counter value
  1479. vxor $out6,$out6,$rndkey0
  1480. le?vperm $in1,$in1,$in1,$inpperm
  1481. vxor $out7,$out7,$rndkey0
  1482. mtctr $rounds
  1483. vcipher $out0,$out0,v24
  1484. stvx_u $in0,$x00,$out
  1485. le?vperm $in2,$in2,$in2,$inpperm
  1486. vcipher $out1,$out1,v24
  1487. stvx_u $in1,$x10,$out
  1488. le?vperm $in3,$in3,$in3,$inpperm
  1489. vcipher $out2,$out2,v24
  1490. stvx_u $in2,$x20,$out
  1491. le?vperm $in4,$in4,$in4,$inpperm
  1492. vcipher $out3,$out3,v24
  1493. stvx_u $in3,$x30,$out
  1494. le?vperm $in5,$in5,$in5,$inpperm
  1495. vcipher $out4,$out4,v24
  1496. stvx_u $in4,$x40,$out
  1497. le?vperm $in6,$in6,$in6,$inpperm
  1498. vcipher $out5,$out5,v24
  1499. stvx_u $in5,$x50,$out
  1500. le?vperm $in7,$in7,$in7,$inpperm
  1501. vcipher $out6,$out6,v24
  1502. stvx_u $in6,$x60,$out
  1503. vcipher $out7,$out7,v24
  1504. stvx_u $in7,$x70,$out
  1505. addi $out,$out,0x80
  1506. b Loop_ctr32_enc8x_middle
  1507. .align 5
  1508. Lctr32_enc8x_break:
  1509. cmpwi $len,-0x60
  1510. blt Lctr32_enc8x_one
  1511. nop
  1512. beq Lctr32_enc8x_two
  1513. cmpwi $len,-0x40
  1514. blt Lctr32_enc8x_three
  1515. nop
  1516. beq Lctr32_enc8x_four
  1517. cmpwi $len,-0x20
  1518. blt Lctr32_enc8x_five
  1519. nop
  1520. beq Lctr32_enc8x_six
  1521. cmpwi $len,0x00
  1522. blt Lctr32_enc8x_seven
  1523. Lctr32_enc8x_eight:
  1524. vcipherlast $out0,$out0,$in0
  1525. vcipherlast $out1,$out1,$in1
  1526. vcipherlast $out2,$out2,$in2
  1527. vcipherlast $out3,$out3,$in3
  1528. vcipherlast $out4,$out4,$in4
  1529. vcipherlast $out5,$out5,$in5
  1530. vcipherlast $out6,$out6,$in6
  1531. vcipherlast $out7,$out7,$in7
  1532. le?vperm $out0,$out0,$out0,$inpperm
  1533. le?vperm $out1,$out1,$out1,$inpperm
  1534. stvx_u $out0,$x00,$out
  1535. le?vperm $out2,$out2,$out2,$inpperm
  1536. stvx_u $out1,$x10,$out
  1537. le?vperm $out3,$out3,$out3,$inpperm
  1538. stvx_u $out2,$x20,$out
  1539. le?vperm $out4,$out4,$out4,$inpperm
  1540. stvx_u $out3,$x30,$out
  1541. le?vperm $out5,$out5,$out5,$inpperm
  1542. stvx_u $out4,$x40,$out
  1543. le?vperm $out6,$out6,$out6,$inpperm
  1544. stvx_u $out5,$x50,$out
  1545. le?vperm $out7,$out7,$out7,$inpperm
  1546. stvx_u $out6,$x60,$out
  1547. stvx_u $out7,$x70,$out
  1548. addi $out,$out,0x80
  1549. b Lctr32_enc8x_done
  1550. .align 5
  1551. Lctr32_enc8x_seven:
  1552. vcipherlast $out0,$out0,$in1
  1553. vcipherlast $out1,$out1,$in2
  1554. vcipherlast $out2,$out2,$in3
  1555. vcipherlast $out3,$out3,$in4
  1556. vcipherlast $out4,$out4,$in5
  1557. vcipherlast $out5,$out5,$in6
  1558. vcipherlast $out6,$out6,$in7
  1559. le?vperm $out0,$out0,$out0,$inpperm
  1560. le?vperm $out1,$out1,$out1,$inpperm
  1561. stvx_u $out0,$x00,$out
  1562. le?vperm $out2,$out2,$out2,$inpperm
  1563. stvx_u $out1,$x10,$out
  1564. le?vperm $out3,$out3,$out3,$inpperm
  1565. stvx_u $out2,$x20,$out
  1566. le?vperm $out4,$out4,$out4,$inpperm
  1567. stvx_u $out3,$x30,$out
  1568. le?vperm $out5,$out5,$out5,$inpperm
  1569. stvx_u $out4,$x40,$out
  1570. le?vperm $out6,$out6,$out6,$inpperm
  1571. stvx_u $out5,$x50,$out
  1572. stvx_u $out6,$x60,$out
  1573. addi $out,$out,0x70
  1574. b Lctr32_enc8x_done
  1575. .align 5
  1576. Lctr32_enc8x_six:
  1577. vcipherlast $out0,$out0,$in2
  1578. vcipherlast $out1,$out1,$in3
  1579. vcipherlast $out2,$out2,$in4
  1580. vcipherlast $out3,$out3,$in5
  1581. vcipherlast $out4,$out4,$in6
  1582. vcipherlast $out5,$out5,$in7
  1583. le?vperm $out0,$out0,$out0,$inpperm
  1584. le?vperm $out1,$out1,$out1,$inpperm
  1585. stvx_u $out0,$x00,$out
  1586. le?vperm $out2,$out2,$out2,$inpperm
  1587. stvx_u $out1,$x10,$out
  1588. le?vperm $out3,$out3,$out3,$inpperm
  1589. stvx_u $out2,$x20,$out
  1590. le?vperm $out4,$out4,$out4,$inpperm
  1591. stvx_u $out3,$x30,$out
  1592. le?vperm $out5,$out5,$out5,$inpperm
  1593. stvx_u $out4,$x40,$out
  1594. stvx_u $out5,$x50,$out
  1595. addi $out,$out,0x60
  1596. b Lctr32_enc8x_done
  1597. .align 5
  1598. Lctr32_enc8x_five:
  1599. vcipherlast $out0,$out0,$in3
  1600. vcipherlast $out1,$out1,$in4
  1601. vcipherlast $out2,$out2,$in5
  1602. vcipherlast $out3,$out3,$in6
  1603. vcipherlast $out4,$out4,$in7
  1604. le?vperm $out0,$out0,$out0,$inpperm
  1605. le?vperm $out1,$out1,$out1,$inpperm
  1606. stvx_u $out0,$x00,$out
  1607. le?vperm $out2,$out2,$out2,$inpperm
  1608. stvx_u $out1,$x10,$out
  1609. le?vperm $out3,$out3,$out3,$inpperm
  1610. stvx_u $out2,$x20,$out
  1611. le?vperm $out4,$out4,$out4,$inpperm
  1612. stvx_u $out3,$x30,$out
  1613. stvx_u $out4,$x40,$out
  1614. addi $out,$out,0x50
  1615. b Lctr32_enc8x_done
  1616. .align 5
  1617. Lctr32_enc8x_four:
  1618. vcipherlast $out0,$out0,$in4
  1619. vcipherlast $out1,$out1,$in5
  1620. vcipherlast $out2,$out2,$in6
  1621. vcipherlast $out3,$out3,$in7
  1622. le?vperm $out0,$out0,$out0,$inpperm
  1623. le?vperm $out1,$out1,$out1,$inpperm
  1624. stvx_u $out0,$x00,$out
  1625. le?vperm $out2,$out2,$out2,$inpperm
  1626. stvx_u $out1,$x10,$out
  1627. le?vperm $out3,$out3,$out3,$inpperm
  1628. stvx_u $out2,$x20,$out
  1629. stvx_u $out3,$x30,$out
  1630. addi $out,$out,0x40
  1631. b Lctr32_enc8x_done
  1632. .align 5
  1633. Lctr32_enc8x_three:
  1634. vcipherlast $out0,$out0,$in5
  1635. vcipherlast $out1,$out1,$in6
  1636. vcipherlast $out2,$out2,$in7
  1637. le?vperm $out0,$out0,$out0,$inpperm
  1638. le?vperm $out1,$out1,$out1,$inpperm
  1639. stvx_u $out0,$x00,$out
  1640. le?vperm $out2,$out2,$out2,$inpperm
  1641. stvx_u $out1,$x10,$out
  1642. stvx_u $out2,$x20,$out
  1643. addi $out,$out,0x30
  1644. b Lctr32_enc8x_done
  1645. .align 5
  1646. Lctr32_enc8x_two:
  1647. vcipherlast $out0,$out0,$in6
  1648. vcipherlast $out1,$out1,$in7
  1649. le?vperm $out0,$out0,$out0,$inpperm
  1650. le?vperm $out1,$out1,$out1,$inpperm
  1651. stvx_u $out0,$x00,$out
  1652. stvx_u $out1,$x10,$out
  1653. addi $out,$out,0x20
  1654. b Lctr32_enc8x_done
  1655. .align 5
  1656. Lctr32_enc8x_one:
  1657. vcipherlast $out0,$out0,$in7
  1658. le?vperm $out0,$out0,$out0,$inpperm
  1659. stvx_u $out0,0,$out
  1660. addi $out,$out,0x10
  1661. Lctr32_enc8x_done:
  1662. li r10,`$FRAME+15`
  1663. li r11,`$FRAME+31`
  1664. stvx $inpperm,r10,$sp # wipe copies of round keys
  1665. addi r10,r10,32
  1666. stvx $inpperm,r11,$sp
  1667. addi r11,r11,32
  1668. stvx $inpperm,r10,$sp
  1669. addi r10,r10,32
  1670. stvx $inpperm,r11,$sp
  1671. addi r11,r11,32
  1672. stvx $inpperm,r10,$sp
  1673. addi r10,r10,32
  1674. stvx $inpperm,r11,$sp
  1675. addi r11,r11,32
  1676. stvx $inpperm,r10,$sp
  1677. addi r10,r10,32
  1678. stvx $inpperm,r11,$sp
  1679. addi r11,r11,32
  1680. mtspr 256,$vrsave
  1681. lvx v20,r10,$sp # ABI says so
  1682. addi r10,r10,32
  1683. lvx v21,r11,$sp
  1684. addi r11,r11,32
  1685. lvx v22,r10,$sp
  1686. addi r10,r10,32
  1687. lvx v23,r11,$sp
  1688. addi r11,r11,32
  1689. lvx v24,r10,$sp
  1690. addi r10,r10,32
  1691. lvx v25,r11,$sp
  1692. addi r11,r11,32
  1693. lvx v26,r10,$sp
  1694. addi r10,r10,32
  1695. lvx v27,r11,$sp
  1696. addi r11,r11,32
  1697. lvx v28,r10,$sp
  1698. addi r10,r10,32
  1699. lvx v29,r11,$sp
  1700. addi r11,r11,32
  1701. lvx v30,r10,$sp
  1702. lvx v31,r11,$sp
  1703. $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  1704. $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  1705. $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  1706. $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  1707. $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  1708. $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  1709. addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
  1710. blr
  1711. .long 0
  1712. .byte 0,12,0x14,0,0x80,6,6,0
  1713. .long 0
  1714. .size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
  1715. ___
  1716. }} }}}
  1717. #########################################################################
  1718. {{{ # XTS procedures #
  1719. # int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
  1720. # const AES_KEY *key1, const AES_KEY *key2, #
  1721. # [const] unsigned char iv[16]); #
  1722. # If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
  1723. # input tweak value is assumed to be encrypted already, and last tweak #
  1724. # value, one suitable for consecutive call on same chunk of data, is #
  1725. # written back to original buffer. In addition, in "tweak chaining" #
  1726. # mode only complete input blocks are processed. #
  1727. my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
  1728. my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
  1729. my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
  1730. my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
  1731. my $taillen = $key2;
  1732. ($inp,$idx) = ($idx,$inp); # reassign
  1733. $code.=<<___;
  1734. .globl .${prefix}_xts_encrypt
  1735. mr $inp,r3 # reassign
  1736. li r3,-1
  1737. ${UCMP}i $len,16
  1738. bltlr-
  1739. lis r0,0xfff0
  1740. mfspr r12,256 # save vrsave
  1741. li r11,0
  1742. mtspr 256,r0
  1743. vspltisb $seven,0x07 # 0x070707..07
  1744. le?lvsl $leperm,r11,r11
  1745. le?vspltisb $tmp,0x0f
  1746. le?vxor $leperm,$leperm,$seven
  1747. li $idx,15
  1748. lvx $tweak,0,$ivp # load [unaligned] iv
  1749. lvsl $inpperm,0,$ivp
  1750. lvx $inptail,$idx,$ivp
  1751. le?vxor $inpperm,$inpperm,$tmp
  1752. vperm $tweak,$tweak,$inptail,$inpperm
  1753. neg r11,$inp
  1754. lvsr $inpperm,0,r11 # prepare for unaligned load
  1755. lvx $inout,0,$inp
  1756. addi $inp,$inp,15 # 15 is not typo
  1757. le?vxor $inpperm,$inpperm,$tmp
  1758. ${UCMP}i $key2,0 # key2==NULL?
  1759. beq Lxts_enc_no_key2
  1760. ?lvsl $keyperm,0,$key2 # prepare for unaligned key
  1761. lwz $rounds,240($key2)
  1762. srwi $rounds,$rounds,1
  1763. subi $rounds,$rounds,1
  1764. li $idx,16
  1765. lvx $rndkey0,0,$key2
  1766. lvx $rndkey1,$idx,$key2
  1767. addi $idx,$idx,16
  1768. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1769. vxor $tweak,$tweak,$rndkey0
  1770. lvx $rndkey0,$idx,$key2
  1771. addi $idx,$idx,16
  1772. mtctr $rounds
  1773. Ltweak_xts_enc:
  1774. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1775. vcipher $tweak,$tweak,$rndkey1
  1776. lvx $rndkey1,$idx,$key2
  1777. addi $idx,$idx,16
  1778. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1779. vcipher $tweak,$tweak,$rndkey0
  1780. lvx $rndkey0,$idx,$key2
  1781. addi $idx,$idx,16
  1782. bdnz Ltweak_xts_enc
  1783. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1784. vcipher $tweak,$tweak,$rndkey1
  1785. lvx $rndkey1,$idx,$key2
  1786. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1787. vcipherlast $tweak,$tweak,$rndkey0
  1788. li $ivp,0 # don't chain the tweak
  1789. b Lxts_enc
  1790. Lxts_enc_no_key2:
  1791. li $idx,-16
  1792. and $len,$len,$idx # in "tweak chaining"
  1793. # mode only complete
  1794. # blocks are processed
  1795. Lxts_enc:
  1796. lvx $inptail,0,$inp
  1797. addi $inp,$inp,16
  1798. ?lvsl $keyperm,0,$key1 # prepare for unaligned key
  1799. lwz $rounds,240($key1)
  1800. srwi $rounds,$rounds,1
  1801. subi $rounds,$rounds,1
  1802. li $idx,16
  1803. vslb $eighty7,$seven,$seven # 0x808080..80
  1804. vor $eighty7,$eighty7,$seven # 0x878787..87
  1805. vspltisb $tmp,1 # 0x010101..01
  1806. vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
  1807. ${UCMP}i $len,96
  1808. bge _aesp8_xts_encrypt6x
  1809. andi. $taillen,$len,15
  1810. subic r0,$len,32
  1811. subi $taillen,$taillen,16
  1812. subfe r0,r0,r0
  1813. and r0,r0,$taillen
  1814. add $inp,$inp,r0
  1815. lvx $rndkey0,0,$key1
  1816. lvx $rndkey1,$idx,$key1
  1817. addi $idx,$idx,16
  1818. vperm $inout,$inout,$inptail,$inpperm
  1819. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1820. vxor $inout,$inout,$tweak
  1821. vxor $inout,$inout,$rndkey0
  1822. lvx $rndkey0,$idx,$key1
  1823. addi $idx,$idx,16
  1824. mtctr $rounds
  1825. b Loop_xts_enc
  1826. .align 5
  1827. Loop_xts_enc:
  1828. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1829. vcipher $inout,$inout,$rndkey1
  1830. lvx $rndkey1,$idx,$key1
  1831. addi $idx,$idx,16
  1832. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1833. vcipher $inout,$inout,$rndkey0
  1834. lvx $rndkey0,$idx,$key1
  1835. addi $idx,$idx,16
  1836. bdnz Loop_xts_enc
  1837. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1838. vcipher $inout,$inout,$rndkey1
  1839. lvx $rndkey1,$idx,$key1
  1840. li $idx,16
  1841. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1842. vxor $rndkey0,$rndkey0,$tweak
  1843. vcipherlast $output,$inout,$rndkey0
  1844. le?vperm $tmp,$output,$output,$leperm
  1845. be?nop
  1846. le?stvx_u $tmp,0,$out
  1847. be?stvx_u $output,0,$out
  1848. addi $out,$out,16
  1849. subic. $len,$len,16
  1850. beq Lxts_enc_done
  1851. vmr $inout,$inptail
  1852. lvx $inptail,0,$inp
  1853. addi $inp,$inp,16
  1854. lvx $rndkey0,0,$key1
  1855. lvx $rndkey1,$idx,$key1
  1856. addi $idx,$idx,16
  1857. subic r0,$len,32
  1858. subfe r0,r0,r0
  1859. and r0,r0,$taillen
  1860. add $inp,$inp,r0
  1861. vsrab $tmp,$tweak,$seven # next tweak value
  1862. vaddubm $tweak,$tweak,$tweak
  1863. vsldoi $tmp,$tmp,$tmp,15
  1864. vand $tmp,$tmp,$eighty7
  1865. vxor $tweak,$tweak,$tmp
  1866. vperm $inout,$inout,$inptail,$inpperm
  1867. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1868. vxor $inout,$inout,$tweak
  1869. vxor $output,$output,$rndkey0 # just in case $len<16
  1870. vxor $inout,$inout,$rndkey0
  1871. lvx $rndkey0,$idx,$key1
  1872. addi $idx,$idx,16
  1873. mtctr $rounds
  1874. ${UCMP}i $len,16
  1875. bge Loop_xts_enc
  1876. vxor $output,$output,$tweak
  1877. lvsr $inpperm,0,$len # $inpperm is no longer needed
  1878. vxor $inptail,$inptail,$inptail # $inptail is no longer needed
  1879. vspltisb $tmp,-1
  1880. vperm $inptail,$inptail,$tmp,$inpperm
  1881. vsel $inout,$inout,$output,$inptail
  1882. subi r11,$out,17
  1883. subi $out,$out,16
  1884. mtctr $len
  1885. li $len,16
  1886. Loop_xts_enc_steal:
  1887. lbzu r0,1(r11)
  1888. stb r0,16(r11)
  1889. bdnz Loop_xts_enc_steal
  1890. mtctr $rounds
  1891. b Loop_xts_enc # one more time...
  1892. Lxts_enc_done:
  1893. ${UCMP}i $ivp,0
  1894. beq Lxts_enc_ret
  1895. vsrab $tmp,$tweak,$seven # next tweak value
  1896. vaddubm $tweak,$tweak,$tweak
  1897. vsldoi $tmp,$tmp,$tmp,15
  1898. vand $tmp,$tmp,$eighty7
  1899. vxor $tweak,$tweak,$tmp
  1900. le?vperm $tweak,$tweak,$tweak,$leperm
  1901. stvx_u $tweak,0,$ivp
  1902. Lxts_enc_ret:
  1903. mtspr 256,r12 # restore vrsave
  1904. li r3,0
  1905. blr
  1906. .long 0
  1907. .byte 0,12,0x04,0,0x80,6,6,0
  1908. .long 0
  1909. .size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
  1910. .globl .${prefix}_xts_decrypt
  1911. mr $inp,r3 # reassign
  1912. li r3,-1
  1913. ${UCMP}i $len,16
  1914. bltlr-
  1915. lis r0,0xfff8
  1916. mfspr r12,256 # save vrsave
  1917. li r11,0
  1918. mtspr 256,r0
  1919. andi. r0,$len,15
  1920. neg r0,r0
  1921. andi. r0,r0,16
  1922. sub $len,$len,r0
  1923. vspltisb $seven,0x07 # 0x070707..07
  1924. le?lvsl $leperm,r11,r11
  1925. le?vspltisb $tmp,0x0f
  1926. le?vxor $leperm,$leperm,$seven
  1927. li $idx,15
  1928. lvx $tweak,0,$ivp # load [unaligned] iv
  1929. lvsl $inpperm,0,$ivp
  1930. lvx $inptail,$idx,$ivp
  1931. le?vxor $inpperm,$inpperm,$tmp
  1932. vperm $tweak,$tweak,$inptail,$inpperm
  1933. neg r11,$inp
  1934. lvsr $inpperm,0,r11 # prepare for unaligned load
  1935. lvx $inout,0,$inp
  1936. addi $inp,$inp,15 # 15 is not typo
  1937. le?vxor $inpperm,$inpperm,$tmp
  1938. ${UCMP}i $key2,0 # key2==NULL?
  1939. beq Lxts_dec_no_key2
  1940. ?lvsl $keyperm,0,$key2 # prepare for unaligned key
  1941. lwz $rounds,240($key2)
  1942. srwi $rounds,$rounds,1
  1943. subi $rounds,$rounds,1
  1944. li $idx,16
  1945. lvx $rndkey0,0,$key2
  1946. lvx $rndkey1,$idx,$key2
  1947. addi $idx,$idx,16
  1948. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1949. vxor $tweak,$tweak,$rndkey0
  1950. lvx $rndkey0,$idx,$key2
  1951. addi $idx,$idx,16
  1952. mtctr $rounds
  1953. Ltweak_xts_dec:
  1954. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1955. vcipher $tweak,$tweak,$rndkey1
  1956. lvx $rndkey1,$idx,$key2
  1957. addi $idx,$idx,16
  1958. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1959. vcipher $tweak,$tweak,$rndkey0
  1960. lvx $rndkey0,$idx,$key2
  1961. addi $idx,$idx,16
  1962. bdnz Ltweak_xts_dec
  1963. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1964. vcipher $tweak,$tweak,$rndkey1
  1965. lvx $rndkey1,$idx,$key2
  1966. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1967. vcipherlast $tweak,$tweak,$rndkey0
  1968. li $ivp,0 # don't chain the tweak
  1969. b Lxts_dec
  1970. Lxts_dec_no_key2:
  1971. neg $idx,$len
  1972. andi. $idx,$idx,15
  1973. add $len,$len,$idx # in "tweak chaining"
  1974. # mode only complete
  1975. # blocks are processed
  1976. Lxts_dec:
  1977. lvx $inptail,0,$inp
  1978. addi $inp,$inp,16
  1979. ?lvsl $keyperm,0,$key1 # prepare for unaligned key
  1980. lwz $rounds,240($key1)
  1981. srwi $rounds,$rounds,1
  1982. subi $rounds,$rounds,1
  1983. li $idx,16
  1984. vslb $eighty7,$seven,$seven # 0x808080..80
  1985. vor $eighty7,$eighty7,$seven # 0x878787..87
  1986. vspltisb $tmp,1 # 0x010101..01
  1987. vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
  1988. ${UCMP}i $len,96
  1989. bge _aesp8_xts_decrypt6x
  1990. lvx $rndkey0,0,$key1
  1991. lvx $rndkey1,$idx,$key1
  1992. addi $idx,$idx,16
  1993. vperm $inout,$inout,$inptail,$inpperm
  1994. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1995. vxor $inout,$inout,$tweak
  1996. vxor $inout,$inout,$rndkey0
  1997. lvx $rndkey0,$idx,$key1
  1998. addi $idx,$idx,16
  1999. mtctr $rounds
  2000. ${UCMP}i $len,16
  2001. blt Ltail_xts_dec
  2002. be?b Loop_xts_dec
  2003. .align 5
  2004. Loop_xts_dec:
  2005. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  2006. vncipher $inout,$inout,$rndkey1
  2007. lvx $rndkey1,$idx,$key1
  2008. addi $idx,$idx,16
  2009. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2010. vncipher $inout,$inout,$rndkey0
  2011. lvx $rndkey0,$idx,$key1
  2012. addi $idx,$idx,16
  2013. bdnz Loop_xts_dec
  2014. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  2015. vncipher $inout,$inout,$rndkey1
  2016. lvx $rndkey1,$idx,$key1
  2017. li $idx,16
  2018. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2019. vxor $rndkey0,$rndkey0,$tweak
  2020. vncipherlast $output,$inout,$rndkey0
  2021. le?vperm $tmp,$output,$output,$leperm
  2022. be?nop
  2023. le?stvx_u $tmp,0,$out
  2024. be?stvx_u $output,0,$out
  2025. addi $out,$out,16
  2026. subic. $len,$len,16
  2027. beq Lxts_dec_done
  2028. vmr $inout,$inptail
  2029. lvx $inptail,0,$inp
  2030. addi $inp,$inp,16
  2031. lvx $rndkey0,0,$key1
  2032. lvx $rndkey1,$idx,$key1
  2033. addi $idx,$idx,16
  2034. vsrab $tmp,$tweak,$seven # next tweak value
  2035. vaddubm $tweak,$tweak,$tweak
  2036. vsldoi $tmp,$tmp,$tmp,15
  2037. vand $tmp,$tmp,$eighty7
  2038. vxor $tweak,$tweak,$tmp
  2039. vperm $inout,$inout,$inptail,$inpperm
  2040. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2041. vxor $inout,$inout,$tweak
  2042. vxor $inout,$inout,$rndkey0
  2043. lvx $rndkey0,$idx,$key1
  2044. addi $idx,$idx,16
  2045. mtctr $rounds
  2046. ${UCMP}i $len,16
  2047. bge Loop_xts_dec
  2048. Ltail_xts_dec:
  2049. vsrab $tmp,$tweak,$seven # next tweak value
  2050. vaddubm $tweak1,$tweak,$tweak
  2051. vsldoi $tmp,$tmp,$tmp,15
  2052. vand $tmp,$tmp,$eighty7
  2053. vxor $tweak1,$tweak1,$tmp
  2054. subi $inp,$inp,16
  2055. add $inp,$inp,$len
  2056. vxor $inout,$inout,$tweak # :-(
  2057. vxor $inout,$inout,$tweak1 # :-)
  2058. Loop_xts_dec_short:
  2059. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  2060. vncipher $inout,$inout,$rndkey1
  2061. lvx $rndkey1,$idx,$key1
  2062. addi $idx,$idx,16
  2063. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2064. vncipher $inout,$inout,$rndkey0
  2065. lvx $rndkey0,$idx,$key1
  2066. addi $idx,$idx,16
  2067. bdnz Loop_xts_dec_short
  2068. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  2069. vncipher $inout,$inout,$rndkey1
  2070. lvx $rndkey1,$idx,$key1
  2071. li $idx,16
  2072. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2073. vxor $rndkey0,$rndkey0,$tweak1
  2074. vncipherlast $output,$inout,$rndkey0
  2075. le?vperm $tmp,$output,$output,$leperm
  2076. be?nop
  2077. le?stvx_u $tmp,0,$out
  2078. be?stvx_u $output,0,$out
  2079. vmr $inout,$inptail
  2080. lvx $inptail,0,$inp
  2081. #addi $inp,$inp,16
  2082. lvx $rndkey0,0,$key1
  2083. lvx $rndkey1,$idx,$key1
  2084. addi $idx,$idx,16
  2085. vperm $inout,$inout,$inptail,$inpperm
  2086. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2087. lvsr $inpperm,0,$len # $inpperm is no longer needed
  2088. vxor $inptail,$inptail,$inptail # $inptail is no longer needed
  2089. vspltisb $tmp,-1
  2090. vperm $inptail,$inptail,$tmp,$inpperm
  2091. vsel $inout,$inout,$output,$inptail
  2092. vxor $rndkey0,$rndkey0,$tweak
  2093. vxor $inout,$inout,$rndkey0
  2094. lvx $rndkey0,$idx,$key1
  2095. addi $idx,$idx,16
  2096. subi r11,$out,1
  2097. mtctr $len
  2098. li $len,16
  2099. Loop_xts_dec_steal:
  2100. lbzu r0,1(r11)
  2101. stb r0,16(r11)
  2102. bdnz Loop_xts_dec_steal
  2103. mtctr $rounds
  2104. b Loop_xts_dec # one more time...
  2105. Lxts_dec_done:
  2106. ${UCMP}i $ivp,0
  2107. beq Lxts_dec_ret
  2108. vsrab $tmp,$tweak,$seven # next tweak value
  2109. vaddubm $tweak,$tweak,$tweak
  2110. vsldoi $tmp,$tmp,$tmp,15
  2111. vand $tmp,$tmp,$eighty7
  2112. vxor $tweak,$tweak,$tmp
  2113. le?vperm $tweak,$tweak,$tweak,$leperm
  2114. stvx_u $tweak,0,$ivp
  2115. Lxts_dec_ret:
  2116. mtspr 256,r12 # restore vrsave
  2117. li r3,0
  2118. blr
  2119. .long 0
  2120. .byte 0,12,0x04,0,0x80,6,6,0
  2121. .long 0
  2122. .size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
  2123. ___
  2124. #########################################################################
  2125. {{ # Optimized XTS procedures #
  2126. my $key_=$key2;
  2127. my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
  2128. $x00=0 if ($flavour =~ /osx/);
  2129. my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
  2130. my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
  2131. my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
  2132. my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
  2133. # v26-v31 last 6 round keys
  2134. my ($keyperm)=($out0); # aliases with "caller", redundant assignment
  2135. my $taillen=$x70;
  2136. $code.=<<___;
  2137. .align 5
  2138. _aesp8_xts_encrypt6x:
  2139. $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
  2140. mflr r11
  2141. li r7,`$FRAME+8*16+15`
  2142. li r3,`$FRAME+8*16+31`
  2143. $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
  2144. stvx v20,r7,$sp # ABI says so
  2145. addi r7,r7,32
  2146. stvx v21,r3,$sp
  2147. addi r3,r3,32
  2148. stvx v22,r7,$sp
  2149. addi r7,r7,32
  2150. stvx v23,r3,$sp
  2151. addi r3,r3,32
  2152. stvx v24,r7,$sp
  2153. addi r7,r7,32
  2154. stvx v25,r3,$sp
  2155. addi r3,r3,32
  2156. stvx v26,r7,$sp
  2157. addi r7,r7,32
  2158. stvx v27,r3,$sp
  2159. addi r3,r3,32
  2160. stvx v28,r7,$sp
  2161. addi r7,r7,32
  2162. stvx v29,r3,$sp
  2163. addi r3,r3,32
  2164. stvx v30,r7,$sp
  2165. stvx v31,r3,$sp
  2166. li r0,-1
  2167. stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
  2168. li $x10,0x10
  2169. $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  2170. li $x20,0x20
  2171. $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  2172. li $x30,0x30
  2173. $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  2174. li $x40,0x40
  2175. $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  2176. li $x50,0x50
  2177. $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  2178. li $x60,0x60
  2179. $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  2180. li $x70,0x70
  2181. mtspr 256,r0
  2182. subi $rounds,$rounds,3 # -4 in total
  2183. lvx $rndkey0,$x00,$key1 # load key schedule
  2184. lvx v30,$x10,$key1
  2185. addi $key1,$key1,0x20
  2186. lvx v31,$x00,$key1
  2187. ?vperm $rndkey0,$rndkey0,v30,$keyperm
  2188. addi $key_,$sp,$FRAME+15
  2189. mtctr $rounds
  2190. Load_xts_enc_key:
  2191. ?vperm v24,v30,v31,$keyperm
  2192. lvx v30,$x10,$key1
  2193. addi $key1,$key1,0x20
  2194. stvx v24,$x00,$key_ # off-load round[1]
  2195. ?vperm v25,v31,v30,$keyperm
  2196. lvx v31,$x00,$key1
  2197. stvx v25,$x10,$key_ # off-load round[2]
  2198. addi $key_,$key_,0x20
  2199. bdnz Load_xts_enc_key
  2200. lvx v26,$x10,$key1
  2201. ?vperm v24,v30,v31,$keyperm
  2202. lvx v27,$x20,$key1
  2203. stvx v24,$x00,$key_ # off-load round[3]
  2204. ?vperm v25,v31,v26,$keyperm
  2205. lvx v28,$x30,$key1
  2206. stvx v25,$x10,$key_ # off-load round[4]
  2207. addi $key_,$sp,$FRAME+15 # rewind $key_
  2208. ?vperm v26,v26,v27,$keyperm
  2209. lvx v29,$x40,$key1
  2210. ?vperm v27,v27,v28,$keyperm
  2211. lvx v30,$x50,$key1
  2212. ?vperm v28,v28,v29,$keyperm
  2213. lvx v31,$x60,$key1
  2214. ?vperm v29,v29,v30,$keyperm
  2215. lvx $twk5,$x70,$key1 # borrow $twk5
  2216. ?vperm v30,v30,v31,$keyperm
  2217. lvx v24,$x00,$key_ # pre-load round[1]
  2218. ?vperm v31,v31,$twk5,$keyperm
  2219. lvx v25,$x10,$key_ # pre-load round[2]
  2220. vperm $in0,$inout,$inptail,$inpperm
  2221. subi $inp,$inp,31 # undo "caller"
  2222. vxor $twk0,$tweak,$rndkey0
  2223. vsrab $tmp,$tweak,$seven # next tweak value
  2224. vaddubm $tweak,$tweak,$tweak
  2225. vsldoi $tmp,$tmp,$tmp,15
  2226. vand $tmp,$tmp,$eighty7
  2227. vxor $out0,$in0,$twk0
  2228. vxor $tweak,$tweak,$tmp
  2229. lvx_u $in1,$x10,$inp
  2230. vxor $twk1,$tweak,$rndkey0
  2231. vsrab $tmp,$tweak,$seven # next tweak value
  2232. vaddubm $tweak,$tweak,$tweak
  2233. vsldoi $tmp,$tmp,$tmp,15
  2234. le?vperm $in1,$in1,$in1,$leperm
  2235. vand $tmp,$tmp,$eighty7
  2236. vxor $out1,$in1,$twk1
  2237. vxor $tweak,$tweak,$tmp
  2238. lvx_u $in2,$x20,$inp
  2239. andi. $taillen,$len,15
  2240. vxor $twk2,$tweak,$rndkey0
  2241. vsrab $tmp,$tweak,$seven # next tweak value
  2242. vaddubm $tweak,$tweak,$tweak
  2243. vsldoi $tmp,$tmp,$tmp,15
  2244. le?vperm $in2,$in2,$in2,$leperm
  2245. vand $tmp,$tmp,$eighty7
  2246. vxor $out2,$in2,$twk2
  2247. vxor $tweak,$tweak,$tmp
  2248. lvx_u $in3,$x30,$inp
  2249. sub $len,$len,$taillen
  2250. vxor $twk3,$tweak,$rndkey0
  2251. vsrab $tmp,$tweak,$seven # next tweak value
  2252. vaddubm $tweak,$tweak,$tweak
  2253. vsldoi $tmp,$tmp,$tmp,15
  2254. le?vperm $in3,$in3,$in3,$leperm
  2255. vand $tmp,$tmp,$eighty7
  2256. vxor $out3,$in3,$twk3
  2257. vxor $tweak,$tweak,$tmp
  2258. lvx_u $in4,$x40,$inp
  2259. subi $len,$len,0x60
  2260. vxor $twk4,$tweak,$rndkey0
  2261. vsrab $tmp,$tweak,$seven # next tweak value
  2262. vaddubm $tweak,$tweak,$tweak
  2263. vsldoi $tmp,$tmp,$tmp,15
  2264. le?vperm $in4,$in4,$in4,$leperm
  2265. vand $tmp,$tmp,$eighty7
  2266. vxor $out4,$in4,$twk4
  2267. vxor $tweak,$tweak,$tmp
  2268. lvx_u $in5,$x50,$inp
  2269. addi $inp,$inp,0x60
  2270. vxor $twk5,$tweak,$rndkey0
  2271. vsrab $tmp,$tweak,$seven # next tweak value
  2272. vaddubm $tweak,$tweak,$tweak
  2273. vsldoi $tmp,$tmp,$tmp,15
  2274. le?vperm $in5,$in5,$in5,$leperm
  2275. vand $tmp,$tmp,$eighty7
  2276. vxor $out5,$in5,$twk5
  2277. vxor $tweak,$tweak,$tmp
  2278. vxor v31,v31,$rndkey0
  2279. mtctr $rounds
  2280. b Loop_xts_enc6x
  2281. .align 5
  2282. Loop_xts_enc6x:
  2283. vcipher $out0,$out0,v24
  2284. vcipher $out1,$out1,v24
  2285. vcipher $out2,$out2,v24
  2286. vcipher $out3,$out3,v24
  2287. vcipher $out4,$out4,v24
  2288. vcipher $out5,$out5,v24
  2289. lvx v24,$x20,$key_ # round[3]
  2290. addi $key_,$key_,0x20
  2291. vcipher $out0,$out0,v25
  2292. vcipher $out1,$out1,v25
  2293. vcipher $out2,$out2,v25
  2294. vcipher $out3,$out3,v25
  2295. vcipher $out4,$out4,v25
  2296. vcipher $out5,$out5,v25
  2297. lvx v25,$x10,$key_ # round[4]
  2298. bdnz Loop_xts_enc6x
  2299. subic $len,$len,96 # $len-=96
  2300. vxor $in0,$twk0,v31 # xor with last round key
  2301. vcipher $out0,$out0,v24
  2302. vcipher $out1,$out1,v24
  2303. vsrab $tmp,$tweak,$seven # next tweak value
  2304. vxor $twk0,$tweak,$rndkey0
  2305. vaddubm $tweak,$tweak,$tweak
  2306. vcipher $out2,$out2,v24
  2307. vcipher $out3,$out3,v24
  2308. vsldoi $tmp,$tmp,$tmp,15
  2309. vcipher $out4,$out4,v24
  2310. vcipher $out5,$out5,v24
  2311. subfe. r0,r0,r0 # borrow?-1:0
  2312. vand $tmp,$tmp,$eighty7
  2313. vcipher $out0,$out0,v25
  2314. vcipher $out1,$out1,v25
  2315. vxor $tweak,$tweak,$tmp
  2316. vcipher $out2,$out2,v25
  2317. vcipher $out3,$out3,v25
  2318. vxor $in1,$twk1,v31
  2319. vsrab $tmp,$tweak,$seven # next tweak value
  2320. vxor $twk1,$tweak,$rndkey0
  2321. vcipher $out4,$out4,v25
  2322. vcipher $out5,$out5,v25
  2323. and r0,r0,$len
  2324. vaddubm $tweak,$tweak,$tweak
  2325. vsldoi $tmp,$tmp,$tmp,15
  2326. vcipher $out0,$out0,v26
  2327. vcipher $out1,$out1,v26
  2328. vand $tmp,$tmp,$eighty7
  2329. vcipher $out2,$out2,v26
  2330. vcipher $out3,$out3,v26
  2331. vxor $tweak,$tweak,$tmp
  2332. vcipher $out4,$out4,v26
  2333. vcipher $out5,$out5,v26
  2334. add $inp,$inp,r0 # $inp is adjusted in such
  2335. # way that at exit from the
  2336. # loop inX-in5 are loaded
  2337. # with last "words"
  2338. vxor $in2,$twk2,v31
  2339. vsrab $tmp,$tweak,$seven # next tweak value
  2340. vxor $twk2,$tweak,$rndkey0
  2341. vaddubm $tweak,$tweak,$tweak
  2342. vcipher $out0,$out0,v27
  2343. vcipher $out1,$out1,v27
  2344. vsldoi $tmp,$tmp,$tmp,15
  2345. vcipher $out2,$out2,v27
  2346. vcipher $out3,$out3,v27
  2347. vand $tmp,$tmp,$eighty7
  2348. vcipher $out4,$out4,v27
  2349. vcipher $out5,$out5,v27
  2350. addi $key_,$sp,$FRAME+15 # rewind $key_
  2351. vxor $tweak,$tweak,$tmp
  2352. vcipher $out0,$out0,v28
  2353. vcipher $out1,$out1,v28
  2354. vxor $in3,$twk3,v31
  2355. vsrab $tmp,$tweak,$seven # next tweak value
  2356. vxor $twk3,$tweak,$rndkey0
  2357. vcipher $out2,$out2,v28
  2358. vcipher $out3,$out3,v28
  2359. vaddubm $tweak,$tweak,$tweak
  2360. vsldoi $tmp,$tmp,$tmp,15
  2361. vcipher $out4,$out4,v28
  2362. vcipher $out5,$out5,v28
  2363. lvx v24,$x00,$key_ # re-pre-load round[1]
  2364. vand $tmp,$tmp,$eighty7
  2365. vcipher $out0,$out0,v29
  2366. vcipher $out1,$out1,v29
  2367. vxor $tweak,$tweak,$tmp
  2368. vcipher $out2,$out2,v29
  2369. vcipher $out3,$out3,v29
  2370. vxor $in4,$twk4,v31
  2371. vsrab $tmp,$tweak,$seven # next tweak value
  2372. vxor $twk4,$tweak,$rndkey0
  2373. vcipher $out4,$out4,v29
  2374. vcipher $out5,$out5,v29
  2375. lvx v25,$x10,$key_ # re-pre-load round[2]
  2376. vaddubm $tweak,$tweak,$tweak
  2377. vsldoi $tmp,$tmp,$tmp,15
  2378. vcipher $out0,$out0,v30
  2379. vcipher $out1,$out1,v30
  2380. vand $tmp,$tmp,$eighty7
  2381. vcipher $out2,$out2,v30
  2382. vcipher $out3,$out3,v30
  2383. vxor $tweak,$tweak,$tmp
  2384. vcipher $out4,$out4,v30
  2385. vcipher $out5,$out5,v30
  2386. vxor $in5,$twk5,v31
  2387. vsrab $tmp,$tweak,$seven # next tweak value
  2388. vxor $twk5,$tweak,$rndkey0
  2389. vcipherlast $out0,$out0,$in0
  2390. lvx_u $in0,$x00,$inp # load next input block
  2391. vaddubm $tweak,$tweak,$tweak
  2392. vsldoi $tmp,$tmp,$tmp,15
  2393. vcipherlast $out1,$out1,$in1
  2394. lvx_u $in1,$x10,$inp
  2395. vcipherlast $out2,$out2,$in2
  2396. le?vperm $in0,$in0,$in0,$leperm
  2397. lvx_u $in2,$x20,$inp
  2398. vand $tmp,$tmp,$eighty7
  2399. vcipherlast $out3,$out3,$in3
  2400. le?vperm $in1,$in1,$in1,$leperm
  2401. lvx_u $in3,$x30,$inp
  2402. vcipherlast $out4,$out4,$in4
  2403. le?vperm $in2,$in2,$in2,$leperm
  2404. lvx_u $in4,$x40,$inp
  2405. vxor $tweak,$tweak,$tmp
  2406. vcipherlast $tmp,$out5,$in5 # last block might be needed
  2407. # in stealing mode
  2408. le?vperm $in3,$in3,$in3,$leperm
  2409. lvx_u $in5,$x50,$inp
  2410. addi $inp,$inp,0x60
  2411. le?vperm $in4,$in4,$in4,$leperm
  2412. le?vperm $in5,$in5,$in5,$leperm
  2413. le?vperm $out0,$out0,$out0,$leperm
  2414. le?vperm $out1,$out1,$out1,$leperm
  2415. stvx_u $out0,$x00,$out # store output
  2416. vxor $out0,$in0,$twk0
  2417. le?vperm $out2,$out2,$out2,$leperm
  2418. stvx_u $out1,$x10,$out
  2419. vxor $out1,$in1,$twk1
  2420. le?vperm $out3,$out3,$out3,$leperm
  2421. stvx_u $out2,$x20,$out
  2422. vxor $out2,$in2,$twk2
  2423. le?vperm $out4,$out4,$out4,$leperm
  2424. stvx_u $out3,$x30,$out
  2425. vxor $out3,$in3,$twk3
  2426. le?vperm $out5,$tmp,$tmp,$leperm
  2427. stvx_u $out4,$x40,$out
  2428. vxor $out4,$in4,$twk4
  2429. le?stvx_u $out5,$x50,$out
  2430. be?stvx_u $tmp, $x50,$out
  2431. vxor $out5,$in5,$twk5
  2432. addi $out,$out,0x60
  2433. mtctr $rounds
  2434. beq Loop_xts_enc6x # did $len-=96 borrow?
  2435. addic. $len,$len,0x60
  2436. beq Lxts_enc6x_zero
  2437. cmpwi $len,0x20
  2438. blt Lxts_enc6x_one
  2439. nop
  2440. beq Lxts_enc6x_two
  2441. cmpwi $len,0x40
  2442. blt Lxts_enc6x_three
  2443. nop
  2444. beq Lxts_enc6x_four
  2445. Lxts_enc6x_five:
  2446. vxor $out0,$in1,$twk0
  2447. vxor $out1,$in2,$twk1
  2448. vxor $out2,$in3,$twk2
  2449. vxor $out3,$in4,$twk3
  2450. vxor $out4,$in5,$twk4
  2451. bl _aesp8_xts_enc5x
  2452. le?vperm $out0,$out0,$out0,$leperm
  2453. vmr $twk0,$twk5 # unused tweak
  2454. le?vperm $out1,$out1,$out1,$leperm
  2455. stvx_u $out0,$x00,$out # store output
  2456. le?vperm $out2,$out2,$out2,$leperm
  2457. stvx_u $out1,$x10,$out
  2458. le?vperm $out3,$out3,$out3,$leperm
  2459. stvx_u $out2,$x20,$out
  2460. vxor $tmp,$out4,$twk5 # last block prep for stealing
  2461. le?vperm $out4,$out4,$out4,$leperm
  2462. stvx_u $out3,$x30,$out
  2463. stvx_u $out4,$x40,$out
  2464. addi $out,$out,0x50
  2465. bne Lxts_enc6x_steal
  2466. b Lxts_enc6x_done
  2467. .align 4
  2468. Lxts_enc6x_four:
  2469. vxor $out0,$in2,$twk0
  2470. vxor $out1,$in3,$twk1
  2471. vxor $out2,$in4,$twk2
  2472. vxor $out3,$in5,$twk3
  2473. vxor $out4,$out4,$out4
  2474. bl _aesp8_xts_enc5x
  2475. le?vperm $out0,$out0,$out0,$leperm
  2476. vmr $twk0,$twk4 # unused tweak
  2477. le?vperm $out1,$out1,$out1,$leperm
  2478. stvx_u $out0,$x00,$out # store output
  2479. le?vperm $out2,$out2,$out2,$leperm
  2480. stvx_u $out1,$x10,$out
  2481. vxor $tmp,$out3,$twk4 # last block prep for stealing
  2482. le?vperm $out3,$out3,$out3,$leperm
  2483. stvx_u $out2,$x20,$out
  2484. stvx_u $out3,$x30,$out
  2485. addi $out,$out,0x40
  2486. bne Lxts_enc6x_steal
  2487. b Lxts_enc6x_done
  2488. .align 4
  2489. Lxts_enc6x_three:
  2490. vxor $out0,$in3,$twk0
  2491. vxor $out1,$in4,$twk1
  2492. vxor $out2,$in5,$twk2
  2493. vxor $out3,$out3,$out3
  2494. vxor $out4,$out4,$out4
  2495. bl _aesp8_xts_enc5x
  2496. le?vperm $out0,$out0,$out0,$leperm
  2497. vmr $twk0,$twk3 # unused tweak
  2498. le?vperm $out1,$out1,$out1,$leperm
  2499. stvx_u $out0,$x00,$out # store output
  2500. vxor $tmp,$out2,$twk3 # last block prep for stealing
  2501. le?vperm $out2,$out2,$out2,$leperm
  2502. stvx_u $out1,$x10,$out
  2503. stvx_u $out2,$x20,$out
  2504. addi $out,$out,0x30
  2505. bne Lxts_enc6x_steal
  2506. b Lxts_enc6x_done
  2507. .align 4
  2508. Lxts_enc6x_two:
  2509. vxor $out0,$in4,$twk0
  2510. vxor $out1,$in5,$twk1
  2511. vxor $out2,$out2,$out2
  2512. vxor $out3,$out3,$out3
  2513. vxor $out4,$out4,$out4
  2514. bl _aesp8_xts_enc5x
  2515. le?vperm $out0,$out0,$out0,$leperm
  2516. vmr $twk0,$twk2 # unused tweak
  2517. vxor $tmp,$out1,$twk2 # last block prep for stealing
  2518. le?vperm $out1,$out1,$out1,$leperm
  2519. stvx_u $out0,$x00,$out # store output
  2520. stvx_u $out1,$x10,$out
  2521. addi $out,$out,0x20
  2522. bne Lxts_enc6x_steal
  2523. b Lxts_enc6x_done
  2524. .align 4
  2525. Lxts_enc6x_one:
  2526. vxor $out0,$in5,$twk0
  2527. nop
  2528. Loop_xts_enc1x:
  2529. vcipher $out0,$out0,v24
  2530. lvx v24,$x20,$key_ # round[3]
  2531. addi $key_,$key_,0x20
  2532. vcipher $out0,$out0,v25
  2533. lvx v25,$x10,$key_ # round[4]
  2534. bdnz Loop_xts_enc1x
  2535. add $inp,$inp,$taillen
  2536. cmpwi $taillen,0
  2537. vcipher $out0,$out0,v24
  2538. subi $inp,$inp,16
  2539. vcipher $out0,$out0,v25
  2540. lvsr $inpperm,0,$taillen
  2541. vcipher $out0,$out0,v26
  2542. lvx_u $in0,0,$inp
  2543. vcipher $out0,$out0,v27
  2544. addi $key_,$sp,$FRAME+15 # rewind $key_
  2545. vcipher $out0,$out0,v28
  2546. lvx v24,$x00,$key_ # re-pre-load round[1]
  2547. vcipher $out0,$out0,v29
  2548. lvx v25,$x10,$key_ # re-pre-load round[2]
  2549. vxor $twk0,$twk0,v31
  2550. le?vperm $in0,$in0,$in0,$leperm
  2551. vcipher $out0,$out0,v30
  2552. vperm $in0,$in0,$in0,$inpperm
  2553. vcipherlast $out0,$out0,$twk0
  2554. vmr $twk0,$twk1 # unused tweak
  2555. vxor $tmp,$out0,$twk1 # last block prep for stealing
  2556. le?vperm $out0,$out0,$out0,$leperm
  2557. stvx_u $out0,$x00,$out # store output
  2558. addi $out,$out,0x10
  2559. bne Lxts_enc6x_steal
  2560. b Lxts_enc6x_done
  2561. .align 4
  2562. Lxts_enc6x_zero:
  2563. cmpwi $taillen,0
  2564. beq Lxts_enc6x_done
  2565. add $inp,$inp,$taillen
  2566. subi $inp,$inp,16
  2567. lvx_u $in0,0,$inp
  2568. lvsr $inpperm,0,$taillen # $in5 is no more
  2569. le?vperm $in0,$in0,$in0,$leperm
  2570. vperm $in0,$in0,$in0,$inpperm
  2571. vxor $tmp,$tmp,$twk0
  2572. Lxts_enc6x_steal:
  2573. vxor $in0,$in0,$twk0
  2574. vxor $out0,$out0,$out0
  2575. vspltisb $out1,-1
  2576. vperm $out0,$out0,$out1,$inpperm
  2577. vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
  2578. subi r30,$out,17
  2579. subi $out,$out,16
  2580. mtctr $taillen
  2581. Loop_xts_enc6x_steal:
  2582. lbzu r0,1(r30)
  2583. stb r0,16(r30)
  2584. bdnz Loop_xts_enc6x_steal
  2585. li $taillen,0
  2586. mtctr $rounds
  2587. b Loop_xts_enc1x # one more time...
  2588. .align 4
  2589. Lxts_enc6x_done:
  2590. ${UCMP}i $ivp,0
  2591. beq Lxts_enc6x_ret
  2592. vxor $tweak,$twk0,$rndkey0
  2593. le?vperm $tweak,$tweak,$tweak,$leperm
  2594. stvx_u $tweak,0,$ivp
  2595. Lxts_enc6x_ret:
  2596. mtlr r11
  2597. li r10,`$FRAME+15`
  2598. li r11,`$FRAME+31`
  2599. stvx $seven,r10,$sp # wipe copies of round keys
  2600. addi r10,r10,32
  2601. stvx $seven,r11,$sp
  2602. addi r11,r11,32
  2603. stvx $seven,r10,$sp
  2604. addi r10,r10,32
  2605. stvx $seven,r11,$sp
  2606. addi r11,r11,32
  2607. stvx $seven,r10,$sp
  2608. addi r10,r10,32
  2609. stvx $seven,r11,$sp
  2610. addi r11,r11,32
  2611. stvx $seven,r10,$sp
  2612. addi r10,r10,32
  2613. stvx $seven,r11,$sp
  2614. addi r11,r11,32
  2615. mtspr 256,$vrsave
  2616. lvx v20,r10,$sp # ABI says so
  2617. addi r10,r10,32
  2618. lvx v21,r11,$sp
  2619. addi r11,r11,32
  2620. lvx v22,r10,$sp
  2621. addi r10,r10,32
  2622. lvx v23,r11,$sp
  2623. addi r11,r11,32
  2624. lvx v24,r10,$sp
  2625. addi r10,r10,32
  2626. lvx v25,r11,$sp
  2627. addi r11,r11,32
  2628. lvx v26,r10,$sp
  2629. addi r10,r10,32
  2630. lvx v27,r11,$sp
  2631. addi r11,r11,32
  2632. lvx v28,r10,$sp
  2633. addi r10,r10,32
  2634. lvx v29,r11,$sp
  2635. addi r11,r11,32
  2636. lvx v30,r10,$sp
  2637. lvx v31,r11,$sp
  2638. $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  2639. $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  2640. $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  2641. $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  2642. $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  2643. $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  2644. addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
  2645. blr
  2646. .long 0
  2647. .byte 0,12,0x04,1,0x80,6,6,0
  2648. .long 0
  2649. .align 5
  2650. _aesp8_xts_enc5x:
  2651. vcipher $out0,$out0,v24
  2652. vcipher $out1,$out1,v24
  2653. vcipher $out2,$out2,v24
  2654. vcipher $out3,$out3,v24
  2655. vcipher $out4,$out4,v24
  2656. lvx v24,$x20,$key_ # round[3]
  2657. addi $key_,$key_,0x20
  2658. vcipher $out0,$out0,v25
  2659. vcipher $out1,$out1,v25
  2660. vcipher $out2,$out2,v25
  2661. vcipher $out3,$out3,v25
  2662. vcipher $out4,$out4,v25
  2663. lvx v25,$x10,$key_ # round[4]
  2664. bdnz _aesp8_xts_enc5x
  2665. add $inp,$inp,$taillen
  2666. cmpwi $taillen,0
  2667. vcipher $out0,$out0,v24
  2668. vcipher $out1,$out1,v24
  2669. vcipher $out2,$out2,v24
  2670. vcipher $out3,$out3,v24
  2671. vcipher $out4,$out4,v24
  2672. subi $inp,$inp,16
  2673. vcipher $out0,$out0,v25
  2674. vcipher $out1,$out1,v25
  2675. vcipher $out2,$out2,v25
  2676. vcipher $out3,$out3,v25
  2677. vcipher $out4,$out4,v25
  2678. vxor $twk0,$twk0,v31
  2679. vcipher $out0,$out0,v26
  2680. lvsr $inpperm,r0,$taillen # $in5 is no more
  2681. vcipher $out1,$out1,v26
  2682. vcipher $out2,$out2,v26
  2683. vcipher $out3,$out3,v26
  2684. vcipher $out4,$out4,v26
  2685. vxor $in1,$twk1,v31
  2686. vcipher $out0,$out0,v27
  2687. lvx_u $in0,0,$inp
  2688. vcipher $out1,$out1,v27
  2689. vcipher $out2,$out2,v27
  2690. vcipher $out3,$out3,v27
  2691. vcipher $out4,$out4,v27
  2692. vxor $in2,$twk2,v31
  2693. addi $key_,$sp,$FRAME+15 # rewind $key_
  2694. vcipher $out0,$out0,v28
  2695. vcipher $out1,$out1,v28
  2696. vcipher $out2,$out2,v28
  2697. vcipher $out3,$out3,v28
  2698. vcipher $out4,$out4,v28
  2699. lvx v24,$x00,$key_ # re-pre-load round[1]
  2700. vxor $in3,$twk3,v31
  2701. vcipher $out0,$out0,v29
  2702. le?vperm $in0,$in0,$in0,$leperm
  2703. vcipher $out1,$out1,v29
  2704. vcipher $out2,$out2,v29
  2705. vcipher $out3,$out3,v29
  2706. vcipher $out4,$out4,v29
  2707. lvx v25,$x10,$key_ # re-pre-load round[2]
  2708. vxor $in4,$twk4,v31
  2709. vcipher $out0,$out0,v30
  2710. vperm $in0,$in0,$in0,$inpperm
  2711. vcipher $out1,$out1,v30
  2712. vcipher $out2,$out2,v30
  2713. vcipher $out3,$out3,v30
  2714. vcipher $out4,$out4,v30
  2715. vcipherlast $out0,$out0,$twk0
  2716. vcipherlast $out1,$out1,$in1
  2717. vcipherlast $out2,$out2,$in2
  2718. vcipherlast $out3,$out3,$in3
  2719. vcipherlast $out4,$out4,$in4
  2720. blr
  2721. .long 0
  2722. .byte 0,12,0x14,0,0,0,0,0
  2723. .align 5
  2724. _aesp8_xts_decrypt6x:
  2725. $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
  2726. mflr r11
  2727. li r7,`$FRAME+8*16+15`
  2728. li r3,`$FRAME+8*16+31`
  2729. $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
  2730. stvx v20,r7,$sp # ABI says so
  2731. addi r7,r7,32
  2732. stvx v21,r3,$sp
  2733. addi r3,r3,32
  2734. stvx v22,r7,$sp
  2735. addi r7,r7,32
  2736. stvx v23,r3,$sp
  2737. addi r3,r3,32
  2738. stvx v24,r7,$sp
  2739. addi r7,r7,32
  2740. stvx v25,r3,$sp
  2741. addi r3,r3,32
  2742. stvx v26,r7,$sp
  2743. addi r7,r7,32
  2744. stvx v27,r3,$sp
  2745. addi r3,r3,32
  2746. stvx v28,r7,$sp
  2747. addi r7,r7,32
  2748. stvx v29,r3,$sp
  2749. addi r3,r3,32
  2750. stvx v30,r7,$sp
  2751. stvx v31,r3,$sp
  2752. li r0,-1
  2753. stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
  2754. li $x10,0x10
  2755. $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  2756. li $x20,0x20
  2757. $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  2758. li $x30,0x30
  2759. $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  2760. li $x40,0x40
  2761. $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  2762. li $x50,0x50
  2763. $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  2764. li $x60,0x60
  2765. $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  2766. li $x70,0x70
  2767. mtspr 256,r0
  2768. subi $rounds,$rounds,3 # -4 in total
  2769. lvx $rndkey0,$x00,$key1 # load key schedule
  2770. lvx v30,$x10,$key1
  2771. addi $key1,$key1,0x20
  2772. lvx v31,$x00,$key1
  2773. ?vperm $rndkey0,$rndkey0,v30,$keyperm
  2774. addi $key_,$sp,$FRAME+15
  2775. mtctr $rounds
  2776. Load_xts_dec_key:
  2777. ?vperm v24,v30,v31,$keyperm
  2778. lvx v30,$x10,$key1
  2779. addi $key1,$key1,0x20
  2780. stvx v24,$x00,$key_ # off-load round[1]
  2781. ?vperm v25,v31,v30,$keyperm
  2782. lvx v31,$x00,$key1
  2783. stvx v25,$x10,$key_ # off-load round[2]
  2784. addi $key_,$key_,0x20
  2785. bdnz Load_xts_dec_key
  2786. lvx v26,$x10,$key1
  2787. ?vperm v24,v30,v31,$keyperm
  2788. lvx v27,$x20,$key1
  2789. stvx v24,$x00,$key_ # off-load round[3]
  2790. ?vperm v25,v31,v26,$keyperm
  2791. lvx v28,$x30,$key1
  2792. stvx v25,$x10,$key_ # off-load round[4]
  2793. addi $key_,$sp,$FRAME+15 # rewind $key_
  2794. ?vperm v26,v26,v27,$keyperm
  2795. lvx v29,$x40,$key1
  2796. ?vperm v27,v27,v28,$keyperm
  2797. lvx v30,$x50,$key1
  2798. ?vperm v28,v28,v29,$keyperm
  2799. lvx v31,$x60,$key1
  2800. ?vperm v29,v29,v30,$keyperm
  2801. lvx $twk5,$x70,$key1 # borrow $twk5
  2802. ?vperm v30,v30,v31,$keyperm
  2803. lvx v24,$x00,$key_ # pre-load round[1]
  2804. ?vperm v31,v31,$twk5,$keyperm
  2805. lvx v25,$x10,$key_ # pre-load round[2]
  2806. vperm $in0,$inout,$inptail,$inpperm
  2807. subi $inp,$inp,31 # undo "caller"
  2808. vxor $twk0,$tweak,$rndkey0
  2809. vsrab $tmp,$tweak,$seven # next tweak value
  2810. vaddubm $tweak,$tweak,$tweak
  2811. vsldoi $tmp,$tmp,$tmp,15
  2812. vand $tmp,$tmp,$eighty7
  2813. vxor $out0,$in0,$twk0
  2814. vxor $tweak,$tweak,$tmp
  2815. lvx_u $in1,$x10,$inp
  2816. vxor $twk1,$tweak,$rndkey0
  2817. vsrab $tmp,$tweak,$seven # next tweak value
  2818. vaddubm $tweak,$tweak,$tweak
  2819. vsldoi $tmp,$tmp,$tmp,15
  2820. le?vperm $in1,$in1,$in1,$leperm
  2821. vand $tmp,$tmp,$eighty7
  2822. vxor $out1,$in1,$twk1
  2823. vxor $tweak,$tweak,$tmp
  2824. lvx_u $in2,$x20,$inp
  2825. andi. $taillen,$len,15
  2826. vxor $twk2,$tweak,$rndkey0
  2827. vsrab $tmp,$tweak,$seven # next tweak value
  2828. vaddubm $tweak,$tweak,$tweak
  2829. vsldoi $tmp,$tmp,$tmp,15
  2830. le?vperm $in2,$in2,$in2,$leperm
  2831. vand $tmp,$tmp,$eighty7
  2832. vxor $out2,$in2,$twk2
  2833. vxor $tweak,$tweak,$tmp
  2834. lvx_u $in3,$x30,$inp
  2835. sub $len,$len,$taillen
  2836. vxor $twk3,$tweak,$rndkey0
  2837. vsrab $tmp,$tweak,$seven # next tweak value
  2838. vaddubm $tweak,$tweak,$tweak
  2839. vsldoi $tmp,$tmp,$tmp,15
  2840. le?vperm $in3,$in3,$in3,$leperm
  2841. vand $tmp,$tmp,$eighty7
  2842. vxor $out3,$in3,$twk3
  2843. vxor $tweak,$tweak,$tmp
  2844. lvx_u $in4,$x40,$inp
  2845. subi $len,$len,0x60
  2846. vxor $twk4,$tweak,$rndkey0
  2847. vsrab $tmp,$tweak,$seven # next tweak value
  2848. vaddubm $tweak,$tweak,$tweak
  2849. vsldoi $tmp,$tmp,$tmp,15
  2850. le?vperm $in4,$in4,$in4,$leperm
  2851. vand $tmp,$tmp,$eighty7
  2852. vxor $out4,$in4,$twk4
  2853. vxor $tweak,$tweak,$tmp
  2854. lvx_u $in5,$x50,$inp
  2855. addi $inp,$inp,0x60
  2856. vxor $twk5,$tweak,$rndkey0
  2857. vsrab $tmp,$tweak,$seven # next tweak value
  2858. vaddubm $tweak,$tweak,$tweak
  2859. vsldoi $tmp,$tmp,$tmp,15
  2860. le?vperm $in5,$in5,$in5,$leperm
  2861. vand $tmp,$tmp,$eighty7
  2862. vxor $out5,$in5,$twk5
  2863. vxor $tweak,$tweak,$tmp
  2864. vxor v31,v31,$rndkey0
  2865. mtctr $rounds
  2866. b Loop_xts_dec6x
  2867. .align 5
  2868. Loop_xts_dec6x:
  2869. vncipher $out0,$out0,v24
  2870. vncipher $out1,$out1,v24
  2871. vncipher $out2,$out2,v24
  2872. vncipher $out3,$out3,v24
  2873. vncipher $out4,$out4,v24
  2874. vncipher $out5,$out5,v24
  2875. lvx v24,$x20,$key_ # round[3]
  2876. addi $key_,$key_,0x20
  2877. vncipher $out0,$out0,v25
  2878. vncipher $out1,$out1,v25
  2879. vncipher $out2,$out2,v25
  2880. vncipher $out3,$out3,v25
  2881. vncipher $out4,$out4,v25
  2882. vncipher $out5,$out5,v25
  2883. lvx v25,$x10,$key_ # round[4]
  2884. bdnz Loop_xts_dec6x
  2885. subic $len,$len,96 # $len-=96
  2886. vxor $in0,$twk0,v31 # xor with last round key
  2887. vncipher $out0,$out0,v24
  2888. vncipher $out1,$out1,v24
  2889. vsrab $tmp,$tweak,$seven # next tweak value
  2890. vxor $twk0,$tweak,$rndkey0
  2891. vaddubm $tweak,$tweak,$tweak
  2892. vncipher $out2,$out2,v24
  2893. vncipher $out3,$out3,v24
  2894. vsldoi $tmp,$tmp,$tmp,15
  2895. vncipher $out4,$out4,v24
  2896. vncipher $out5,$out5,v24
  2897. subfe. r0,r0,r0 # borrow?-1:0
  2898. vand $tmp,$tmp,$eighty7
  2899. vncipher $out0,$out0,v25
  2900. vncipher $out1,$out1,v25
  2901. vxor $tweak,$tweak,$tmp
  2902. vncipher $out2,$out2,v25
  2903. vncipher $out3,$out3,v25
  2904. vxor $in1,$twk1,v31
  2905. vsrab $tmp,$tweak,$seven # next tweak value
  2906. vxor $twk1,$tweak,$rndkey0
  2907. vncipher $out4,$out4,v25
  2908. vncipher $out5,$out5,v25
  2909. and r0,r0,$len
  2910. vaddubm $tweak,$tweak,$tweak
  2911. vsldoi $tmp,$tmp,$tmp,15
  2912. vncipher $out0,$out0,v26
  2913. vncipher $out1,$out1,v26
  2914. vand $tmp,$tmp,$eighty7
  2915. vncipher $out2,$out2,v26
  2916. vncipher $out3,$out3,v26
  2917. vxor $tweak,$tweak,$tmp
  2918. vncipher $out4,$out4,v26
  2919. vncipher $out5,$out5,v26
  2920. add $inp,$inp,r0 # $inp is adjusted in such
  2921. # way that at exit from the
  2922. # loop inX-in5 are loaded
  2923. # with last "words"
  2924. vxor $in2,$twk2,v31
  2925. vsrab $tmp,$tweak,$seven # next tweak value
  2926. vxor $twk2,$tweak,$rndkey0
  2927. vaddubm $tweak,$tweak,$tweak
  2928. vncipher $out0,$out0,v27
  2929. vncipher $out1,$out1,v27
  2930. vsldoi $tmp,$tmp,$tmp,15
  2931. vncipher $out2,$out2,v27
  2932. vncipher $out3,$out3,v27
  2933. vand $tmp,$tmp,$eighty7
  2934. vncipher $out4,$out4,v27
  2935. vncipher $out5,$out5,v27
  2936. addi $key_,$sp,$FRAME+15 # rewind $key_
  2937. vxor $tweak,$tweak,$tmp
  2938. vncipher $out0,$out0,v28
  2939. vncipher $out1,$out1,v28
  2940. vxor $in3,$twk3,v31
  2941. vsrab $tmp,$tweak,$seven # next tweak value
  2942. vxor $twk3,$tweak,$rndkey0
  2943. vncipher $out2,$out2,v28
  2944. vncipher $out3,$out3,v28
  2945. vaddubm $tweak,$tweak,$tweak
  2946. vsldoi $tmp,$tmp,$tmp,15
  2947. vncipher $out4,$out4,v28
  2948. vncipher $out5,$out5,v28
  2949. lvx v24,$x00,$key_ # re-pre-load round[1]
  2950. vand $tmp,$tmp,$eighty7
  2951. vncipher $out0,$out0,v29
  2952. vncipher $out1,$out1,v29
  2953. vxor $tweak,$tweak,$tmp
  2954. vncipher $out2,$out2,v29
  2955. vncipher $out3,$out3,v29
  2956. vxor $in4,$twk4,v31
  2957. vsrab $tmp,$tweak,$seven # next tweak value
  2958. vxor $twk4,$tweak,$rndkey0
  2959. vncipher $out4,$out4,v29
  2960. vncipher $out5,$out5,v29
  2961. lvx v25,$x10,$key_ # re-pre-load round[2]
  2962. vaddubm $tweak,$tweak,$tweak
  2963. vsldoi $tmp,$tmp,$tmp,15
  2964. vncipher $out0,$out0,v30
  2965. vncipher $out1,$out1,v30
  2966. vand $tmp,$tmp,$eighty7
  2967. vncipher $out2,$out2,v30
  2968. vncipher $out3,$out3,v30
  2969. vxor $tweak,$tweak,$tmp
  2970. vncipher $out4,$out4,v30
  2971. vncipher $out5,$out5,v30
  2972. vxor $in5,$twk5,v31
  2973. vsrab $tmp,$tweak,$seven # next tweak value
  2974. vxor $twk5,$tweak,$rndkey0
  2975. vncipherlast $out0,$out0,$in0
  2976. lvx_u $in0,$x00,$inp # load next input block
  2977. vaddubm $tweak,$tweak,$tweak
  2978. vsldoi $tmp,$tmp,$tmp,15
  2979. vncipherlast $out1,$out1,$in1
  2980. lvx_u $in1,$x10,$inp
  2981. vncipherlast $out2,$out2,$in2
  2982. le?vperm $in0,$in0,$in0,$leperm
  2983. lvx_u $in2,$x20,$inp
  2984. vand $tmp,$tmp,$eighty7
  2985. vncipherlast $out3,$out3,$in3
  2986. le?vperm $in1,$in1,$in1,$leperm
  2987. lvx_u $in3,$x30,$inp
  2988. vncipherlast $out4,$out4,$in4
  2989. le?vperm $in2,$in2,$in2,$leperm
  2990. lvx_u $in4,$x40,$inp
  2991. vxor $tweak,$tweak,$tmp
  2992. vncipherlast $out5,$out5,$in5
  2993. le?vperm $in3,$in3,$in3,$leperm
  2994. lvx_u $in5,$x50,$inp
  2995. addi $inp,$inp,0x60
  2996. le?vperm $in4,$in4,$in4,$leperm
  2997. le?vperm $in5,$in5,$in5,$leperm
  2998. le?vperm $out0,$out0,$out0,$leperm
  2999. le?vperm $out1,$out1,$out1,$leperm
  3000. stvx_u $out0,$x00,$out # store output
  3001. vxor $out0,$in0,$twk0
  3002. le?vperm $out2,$out2,$out2,$leperm
  3003. stvx_u $out1,$x10,$out
  3004. vxor $out1,$in1,$twk1
  3005. le?vperm $out3,$out3,$out3,$leperm
  3006. stvx_u $out2,$x20,$out
  3007. vxor $out2,$in2,$twk2
  3008. le?vperm $out4,$out4,$out4,$leperm
  3009. stvx_u $out3,$x30,$out
  3010. vxor $out3,$in3,$twk3
  3011. le?vperm $out5,$out5,$out5,$leperm
  3012. stvx_u $out4,$x40,$out
  3013. vxor $out4,$in4,$twk4
  3014. stvx_u $out5,$x50,$out
  3015. vxor $out5,$in5,$twk5
  3016. addi $out,$out,0x60
  3017. mtctr $rounds
  3018. beq Loop_xts_dec6x # did $len-=96 borrow?
  3019. addic. $len,$len,0x60
  3020. beq Lxts_dec6x_zero
  3021. cmpwi $len,0x20
  3022. blt Lxts_dec6x_one
  3023. nop
  3024. beq Lxts_dec6x_two
  3025. cmpwi $len,0x40
  3026. blt Lxts_dec6x_three
  3027. nop
  3028. beq Lxts_dec6x_four
  3029. Lxts_dec6x_five:
  3030. vxor $out0,$in1,$twk0
  3031. vxor $out1,$in2,$twk1
  3032. vxor $out2,$in3,$twk2
  3033. vxor $out3,$in4,$twk3
  3034. vxor $out4,$in5,$twk4
  3035. bl _aesp8_xts_dec5x
  3036. le?vperm $out0,$out0,$out0,$leperm
  3037. vmr $twk0,$twk5 # unused tweak
  3038. vxor $twk1,$tweak,$rndkey0
  3039. le?vperm $out1,$out1,$out1,$leperm
  3040. stvx_u $out0,$x00,$out # store output
  3041. vxor $out0,$in0,$twk1
  3042. le?vperm $out2,$out2,$out2,$leperm
  3043. stvx_u $out1,$x10,$out
  3044. le?vperm $out3,$out3,$out3,$leperm
  3045. stvx_u $out2,$x20,$out
  3046. le?vperm $out4,$out4,$out4,$leperm
  3047. stvx_u $out3,$x30,$out
  3048. stvx_u $out4,$x40,$out
  3049. addi $out,$out,0x50
  3050. bne Lxts_dec6x_steal
  3051. b Lxts_dec6x_done
  3052. .align 4
  3053. Lxts_dec6x_four:
  3054. vxor $out0,$in2,$twk0
  3055. vxor $out1,$in3,$twk1
  3056. vxor $out2,$in4,$twk2
  3057. vxor $out3,$in5,$twk3
  3058. vxor $out4,$out4,$out4
  3059. bl _aesp8_xts_dec5x
  3060. le?vperm $out0,$out0,$out0,$leperm
  3061. vmr $twk0,$twk4 # unused tweak
  3062. vmr $twk1,$twk5
  3063. le?vperm $out1,$out1,$out1,$leperm
  3064. stvx_u $out0,$x00,$out # store output
  3065. vxor $out0,$in0,$twk5
  3066. le?vperm $out2,$out2,$out2,$leperm
  3067. stvx_u $out1,$x10,$out
  3068. le?vperm $out3,$out3,$out3,$leperm
  3069. stvx_u $out2,$x20,$out
  3070. stvx_u $out3,$x30,$out
  3071. addi $out,$out,0x40
  3072. bne Lxts_dec6x_steal
  3073. b Lxts_dec6x_done
  3074. .align 4
  3075. Lxts_dec6x_three:
  3076. vxor $out0,$in3,$twk0
  3077. vxor $out1,$in4,$twk1
  3078. vxor $out2,$in5,$twk2
  3079. vxor $out3,$out3,$out3
  3080. vxor $out4,$out4,$out4
  3081. bl _aesp8_xts_dec5x
  3082. le?vperm $out0,$out0,$out0,$leperm
  3083. vmr $twk0,$twk3 # unused tweak
  3084. vmr $twk1,$twk4
  3085. le?vperm $out1,$out1,$out1,$leperm
  3086. stvx_u $out0,$x00,$out # store output
  3087. vxor $out0,$in0,$twk4
  3088. le?vperm $out2,$out2,$out2,$leperm
  3089. stvx_u $out1,$x10,$out
  3090. stvx_u $out2,$x20,$out
  3091. addi $out,$out,0x30
  3092. bne Lxts_dec6x_steal
  3093. b Lxts_dec6x_done
  3094. .align 4
  3095. Lxts_dec6x_two:
  3096. vxor $out0,$in4,$twk0
  3097. vxor $out1,$in5,$twk1
  3098. vxor $out2,$out2,$out2
  3099. vxor $out3,$out3,$out3
  3100. vxor $out4,$out4,$out4
  3101. bl _aesp8_xts_dec5x
  3102. le?vperm $out0,$out0,$out0,$leperm
  3103. vmr $twk0,$twk2 # unused tweak
  3104. vmr $twk1,$twk3
  3105. le?vperm $out1,$out1,$out1,$leperm
  3106. stvx_u $out0,$x00,$out # store output
  3107. vxor $out0,$in0,$twk3
  3108. stvx_u $out1,$x10,$out
  3109. addi $out,$out,0x20
  3110. bne Lxts_dec6x_steal
  3111. b Lxts_dec6x_done
  3112. .align 4
  3113. Lxts_dec6x_one:
  3114. vxor $out0,$in5,$twk0
  3115. nop
  3116. Loop_xts_dec1x:
  3117. vncipher $out0,$out0,v24
  3118. lvx v24,$x20,$key_ # round[3]
  3119. addi $key_,$key_,0x20
  3120. vncipher $out0,$out0,v25
  3121. lvx v25,$x10,$key_ # round[4]
  3122. bdnz Loop_xts_dec1x
  3123. subi r0,$taillen,1
  3124. vncipher $out0,$out0,v24
  3125. andi. r0,r0,16
  3126. cmpwi $taillen,0
  3127. vncipher $out0,$out0,v25
  3128. sub $inp,$inp,r0
  3129. vncipher $out0,$out0,v26
  3130. lvx_u $in0,0,$inp
  3131. vncipher $out0,$out0,v27
  3132. addi $key_,$sp,$FRAME+15 # rewind $key_
  3133. vncipher $out0,$out0,v28
  3134. lvx v24,$x00,$key_ # re-pre-load round[1]
  3135. vncipher $out0,$out0,v29
  3136. lvx v25,$x10,$key_ # re-pre-load round[2]
  3137. vxor $twk0,$twk0,v31
  3138. le?vperm $in0,$in0,$in0,$leperm
  3139. vncipher $out0,$out0,v30
  3140. mtctr $rounds
  3141. vncipherlast $out0,$out0,$twk0
  3142. vmr $twk0,$twk1 # unused tweak
  3143. vmr $twk1,$twk2
  3144. le?vperm $out0,$out0,$out0,$leperm
  3145. stvx_u $out0,$x00,$out # store output
  3146. addi $out,$out,0x10
  3147. vxor $out0,$in0,$twk2
  3148. bne Lxts_dec6x_steal
  3149. b Lxts_dec6x_done
  3150. .align 4
  3151. Lxts_dec6x_zero:
  3152. cmpwi $taillen,0
  3153. beq Lxts_dec6x_done
  3154. lvx_u $in0,0,$inp
  3155. le?vperm $in0,$in0,$in0,$leperm
  3156. vxor $out0,$in0,$twk1
  3157. Lxts_dec6x_steal:
  3158. vncipher $out0,$out0,v24
  3159. lvx v24,$x20,$key_ # round[3]
  3160. addi $key_,$key_,0x20
  3161. vncipher $out0,$out0,v25
  3162. lvx v25,$x10,$key_ # round[4]
  3163. bdnz Lxts_dec6x_steal
  3164. add $inp,$inp,$taillen
  3165. vncipher $out0,$out0,v24
  3166. cmpwi $taillen,0
  3167. vncipher $out0,$out0,v25
  3168. lvx_u $in0,0,$inp
  3169. vncipher $out0,$out0,v26
  3170. lvsr $inpperm,0,$taillen # $in5 is no more
  3171. vncipher $out0,$out0,v27
  3172. addi $key_,$sp,$FRAME+15 # rewind $key_
  3173. vncipher $out0,$out0,v28
  3174. lvx v24,$x00,$key_ # re-pre-load round[1]
  3175. vncipher $out0,$out0,v29
  3176. lvx v25,$x10,$key_ # re-pre-load round[2]
  3177. vxor $twk1,$twk1,v31
  3178. le?vperm $in0,$in0,$in0,$leperm
  3179. vncipher $out0,$out0,v30
  3180. vperm $in0,$in0,$in0,$inpperm
  3181. vncipherlast $tmp,$out0,$twk1
  3182. le?vperm $out0,$tmp,$tmp,$leperm
  3183. le?stvx_u $out0,0,$out
  3184. be?stvx_u $tmp,0,$out
  3185. vxor $out0,$out0,$out0
  3186. vspltisb $out1,-1
  3187. vperm $out0,$out0,$out1,$inpperm
  3188. vsel $out0,$in0,$tmp,$out0
  3189. vxor $out0,$out0,$twk0
  3190. subi r30,$out,1
  3191. mtctr $taillen
  3192. Loop_xts_dec6x_steal:
  3193. lbzu r0,1(r30)
  3194. stb r0,16(r30)
  3195. bdnz Loop_xts_dec6x_steal
  3196. li $taillen,0
  3197. mtctr $rounds
  3198. b Loop_xts_dec1x # one more time...
  3199. .align 4
  3200. Lxts_dec6x_done:
  3201. ${UCMP}i $ivp,0
  3202. beq Lxts_dec6x_ret
  3203. vxor $tweak,$twk0,$rndkey0
  3204. le?vperm $tweak,$tweak,$tweak,$leperm
  3205. stvx_u $tweak,0,$ivp
  3206. Lxts_dec6x_ret:
  3207. mtlr r11
  3208. li r10,`$FRAME+15`
  3209. li r11,`$FRAME+31`
  3210. stvx $seven,r10,$sp # wipe copies of round keys
  3211. addi r10,r10,32
  3212. stvx $seven,r11,$sp
  3213. addi r11,r11,32
  3214. stvx $seven,r10,$sp
  3215. addi r10,r10,32
  3216. stvx $seven,r11,$sp
  3217. addi r11,r11,32
  3218. stvx $seven,r10,$sp
  3219. addi r10,r10,32
  3220. stvx $seven,r11,$sp
  3221. addi r11,r11,32
  3222. stvx $seven,r10,$sp
  3223. addi r10,r10,32
  3224. stvx $seven,r11,$sp
  3225. addi r11,r11,32
  3226. mtspr 256,$vrsave
  3227. lvx v20,r10,$sp # ABI says so
  3228. addi r10,r10,32
  3229. lvx v21,r11,$sp
  3230. addi r11,r11,32
  3231. lvx v22,r10,$sp
  3232. addi r10,r10,32
  3233. lvx v23,r11,$sp
  3234. addi r11,r11,32
  3235. lvx v24,r10,$sp
  3236. addi r10,r10,32
  3237. lvx v25,r11,$sp
  3238. addi r11,r11,32
  3239. lvx v26,r10,$sp
  3240. addi r10,r10,32
  3241. lvx v27,r11,$sp
  3242. addi r11,r11,32
  3243. lvx v28,r10,$sp
  3244. addi r10,r10,32
  3245. lvx v29,r11,$sp
  3246. addi r11,r11,32
  3247. lvx v30,r10,$sp
  3248. lvx v31,r11,$sp
  3249. $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  3250. $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  3251. $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  3252. $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  3253. $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  3254. $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  3255. addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
  3256. blr
  3257. .long 0
  3258. .byte 0,12,0x04,1,0x80,6,6,0
  3259. .long 0
  3260. .align 5
  3261. _aesp8_xts_dec5x:
  3262. vncipher $out0,$out0,v24
  3263. vncipher $out1,$out1,v24
  3264. vncipher $out2,$out2,v24
  3265. vncipher $out3,$out3,v24
  3266. vncipher $out4,$out4,v24
  3267. lvx v24,$x20,$key_ # round[3]
  3268. addi $key_,$key_,0x20
  3269. vncipher $out0,$out0,v25
  3270. vncipher $out1,$out1,v25
  3271. vncipher $out2,$out2,v25
  3272. vncipher $out3,$out3,v25
  3273. vncipher $out4,$out4,v25
  3274. lvx v25,$x10,$key_ # round[4]
  3275. bdnz _aesp8_xts_dec5x
  3276. subi r0,$taillen,1
  3277. vncipher $out0,$out0,v24
  3278. vncipher $out1,$out1,v24
  3279. vncipher $out2,$out2,v24
  3280. vncipher $out3,$out3,v24
  3281. vncipher $out4,$out4,v24
  3282. andi. r0,r0,16
  3283. cmpwi $taillen,0
  3284. vncipher $out0,$out0,v25
  3285. vncipher $out1,$out1,v25
  3286. vncipher $out2,$out2,v25
  3287. vncipher $out3,$out3,v25
  3288. vncipher $out4,$out4,v25
  3289. vxor $twk0,$twk0,v31
  3290. sub $inp,$inp,r0
  3291. vncipher $out0,$out0,v26
  3292. vncipher $out1,$out1,v26
  3293. vncipher $out2,$out2,v26
  3294. vncipher $out3,$out3,v26
  3295. vncipher $out4,$out4,v26
  3296. vxor $in1,$twk1,v31
  3297. vncipher $out0,$out0,v27
  3298. lvx_u $in0,0,$inp
  3299. vncipher $out1,$out1,v27
  3300. vncipher $out2,$out2,v27
  3301. vncipher $out3,$out3,v27
  3302. vncipher $out4,$out4,v27
  3303. vxor $in2,$twk2,v31
  3304. addi $key_,$sp,$FRAME+15 # rewind $key_
  3305. vncipher $out0,$out0,v28
  3306. vncipher $out1,$out1,v28
  3307. vncipher $out2,$out2,v28
  3308. vncipher $out3,$out3,v28
  3309. vncipher $out4,$out4,v28
  3310. lvx v24,$x00,$key_ # re-pre-load round[1]
  3311. vxor $in3,$twk3,v31
  3312. vncipher $out0,$out0,v29
  3313. le?vperm $in0,$in0,$in0,$leperm
  3314. vncipher $out1,$out1,v29
  3315. vncipher $out2,$out2,v29
  3316. vncipher $out3,$out3,v29
  3317. vncipher $out4,$out4,v29
  3318. lvx v25,$x10,$key_ # re-pre-load round[2]
  3319. vxor $in4,$twk4,v31
  3320. vncipher $out0,$out0,v30
  3321. vncipher $out1,$out1,v30
  3322. vncipher $out2,$out2,v30
  3323. vncipher $out3,$out3,v30
  3324. vncipher $out4,$out4,v30
  3325. vncipherlast $out0,$out0,$twk0
  3326. vncipherlast $out1,$out1,$in1
  3327. vncipherlast $out2,$out2,$in2
  3328. vncipherlast $out3,$out3,$in3
  3329. vncipherlast $out4,$out4,$in4
  3330. mtctr $rounds
  3331. blr
  3332. .long 0
  3333. .byte 0,12,0x14,0,0,0,0,0
  3334. ___
  3335. }} }}}
  3336. my $consts=1;
  3337. foreach(split("\n",$code)) {
  3338. s/\`([^\`]*)\`/eval($1)/geo;
  3339. # constants table endian-specific conversion
  3340. if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
  3341. my $conv=$3;
  3342. my @bytes=();
  3343. # convert to endian-agnostic format
  3344. if ($1 eq "long") {
  3345. foreach (split(/,\s*/,$2)) {
  3346. my $l = /^0/?oct:int;
  3347. push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
  3348. }
  3349. } else {
  3350. @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
  3351. }
  3352. # little-endian conversion
  3353. if ($flavour =~ /le$/o) {
  3354. SWITCH: for($conv) {
  3355. /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
  3356. /\?rev/ && do { @bytes=reverse(@bytes); last; };
  3357. }
  3358. }
  3359. #emit
  3360. print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
  3361. next;
  3362. }
  3363. $consts=0 if (m/Lconsts:/o); # end of table
  3364. # instructions prefixed with '?' are endian-specific and need
  3365. # to be adjusted accordingly...
  3366. if ($flavour =~ /le$/o) { # little-endian
  3367. s/le\?//o or
  3368. s/be\?/#be#/o or
  3369. s/\?lvsr/lvsl/o or
  3370. s/\?lvsl/lvsr/o or
  3371. s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
  3372. s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
  3373. s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
  3374. } else { # big-endian
  3375. s/le\?/#le#/o or
  3376. s/be\?//o or
  3377. s/\?([a-z]+)/$1/o;
  3378. }
  3379. print $_,"\n";
  3380. }
  3381. close STDOUT;