sum_ref.go 20 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529
  1. // Copyright 2012 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package poly1305
  5. // Based on original, public domain implementation from NaCl by D. J.
  6. // Bernstein.
  7. import "math"
  8. const (
  9. alpham80 = 0.00000000558793544769287109375
  10. alpham48 = 24.0
  11. alpham16 = 103079215104.0
  12. alpha0 = 6755399441055744.0
  13. alpha18 = 1770887431076116955136.0
  14. alpha32 = 29014219670751100192948224.0
  15. alpha50 = 7605903601369376408980219232256.0
  16. alpha64 = 124615124604835863084731911901282304.0
  17. alpha82 = 32667107224410092492483962313449748299776.0
  18. alpha96 = 535217884764734955396857238543560676143529984.0
  19. alpha112 = 35076039295941670036888435985190792471742381031424.0
  20. alpha130 = 9194973245195333150150082162901855101712434733101613056.0
  21. scale = 0.0000000000000000000000000000000000000036734198463196484624023016788195177431833298649127735047148490821200539357960224151611328125
  22. offset0 = 6755408030990331.0
  23. offset1 = 29014256564239239022116864.0
  24. offset2 = 124615283061160854719918951570079744.0
  25. offset3 = 535219245894202480694386063513315216128475136.0
  26. )
  27. // Sum generates an authenticator for m using a one-time key and puts the
  28. // 16-byte result into out. Authenticating two different messages with the same
  29. // key allows an attacker to forge messages at will.
  30. func Sum(out *[16]byte, m []byte, key *[32]byte) {
  31. r := key
  32. s := key[16:]
  33. var (
  34. y7 float64
  35. y6 float64
  36. y1 float64
  37. y0 float64
  38. y5 float64
  39. y4 float64
  40. x7 float64
  41. x6 float64
  42. x1 float64
  43. x0 float64
  44. y3 float64
  45. y2 float64
  46. x5 float64
  47. r3lowx0 float64
  48. x4 float64
  49. r0lowx6 float64
  50. x3 float64
  51. r3highx0 float64
  52. x2 float64
  53. r0highx6 float64
  54. r0lowx0 float64
  55. sr1lowx6 float64
  56. r0highx0 float64
  57. sr1highx6 float64
  58. sr3low float64
  59. r1lowx0 float64
  60. sr2lowx6 float64
  61. r1highx0 float64
  62. sr2highx6 float64
  63. r2lowx0 float64
  64. sr3lowx6 float64
  65. r2highx0 float64
  66. sr3highx6 float64
  67. r1highx4 float64
  68. r1lowx4 float64
  69. r0highx4 float64
  70. r0lowx4 float64
  71. sr3highx4 float64
  72. sr3lowx4 float64
  73. sr2highx4 float64
  74. sr2lowx4 float64
  75. r0lowx2 float64
  76. r0highx2 float64
  77. r1lowx2 float64
  78. r1highx2 float64
  79. r2lowx2 float64
  80. r2highx2 float64
  81. sr3lowx2 float64
  82. sr3highx2 float64
  83. z0 float64
  84. z1 float64
  85. z2 float64
  86. z3 float64
  87. m0 int64
  88. m1 int64
  89. m2 int64
  90. m3 int64
  91. m00 uint32
  92. m01 uint32
  93. m02 uint32
  94. m03 uint32
  95. m10 uint32
  96. m11 uint32
  97. m12 uint32
  98. m13 uint32
  99. m20 uint32
  100. m21 uint32
  101. m22 uint32
  102. m23 uint32
  103. m30 uint32
  104. m31 uint32
  105. m32 uint32
  106. m33 uint64
  107. lbelow2 int32
  108. lbelow3 int32
  109. lbelow4 int32
  110. lbelow5 int32
  111. lbelow6 int32
  112. lbelow7 int32
  113. lbelow8 int32
  114. lbelow9 int32
  115. lbelow10 int32
  116. lbelow11 int32
  117. lbelow12 int32
  118. lbelow13 int32
  119. lbelow14 int32
  120. lbelow15 int32
  121. s00 uint32
  122. s01 uint32
  123. s02 uint32
  124. s03 uint32
  125. s10 uint32
  126. s11 uint32
  127. s12 uint32
  128. s13 uint32
  129. s20 uint32
  130. s21 uint32
  131. s22 uint32
  132. s23 uint32
  133. s30 uint32
  134. s31 uint32
  135. s32 uint32
  136. s33 uint32
  137. bits32 uint64
  138. f uint64
  139. f0 uint64
  140. f1 uint64
  141. f2 uint64
  142. f3 uint64
  143. f4 uint64
  144. g uint64
  145. g0 uint64
  146. g1 uint64
  147. g2 uint64
  148. g3 uint64
  149. g4 uint64
  150. )
  151. var p int32
  152. l := int32(len(m))
  153. r00 := uint32(r[0])
  154. r01 := uint32(r[1])
  155. r02 := uint32(r[2])
  156. r0 := int64(2151)
  157. r03 := uint32(r[3])
  158. r03 &= 15
  159. r0 <<= 51
  160. r10 := uint32(r[4])
  161. r10 &= 252
  162. r01 <<= 8
  163. r0 += int64(r00)
  164. r11 := uint32(r[5])
  165. r02 <<= 16
  166. r0 += int64(r01)
  167. r12 := uint32(r[6])
  168. r03 <<= 24
  169. r0 += int64(r02)
  170. r13 := uint32(r[7])
  171. r13 &= 15
  172. r1 := int64(2215)
  173. r0 += int64(r03)
  174. d0 := r0
  175. r1 <<= 51
  176. r2 := int64(2279)
  177. r20 := uint32(r[8])
  178. r20 &= 252
  179. r11 <<= 8
  180. r1 += int64(r10)
  181. r21 := uint32(r[9])
  182. r12 <<= 16
  183. r1 += int64(r11)
  184. r22 := uint32(r[10])
  185. r13 <<= 24
  186. r1 += int64(r12)
  187. r23 := uint32(r[11])
  188. r23 &= 15
  189. r2 <<= 51
  190. r1 += int64(r13)
  191. d1 := r1
  192. r21 <<= 8
  193. r2 += int64(r20)
  194. r30 := uint32(r[12])
  195. r30 &= 252
  196. r22 <<= 16
  197. r2 += int64(r21)
  198. r31 := uint32(r[13])
  199. r23 <<= 24
  200. r2 += int64(r22)
  201. r32 := uint32(r[14])
  202. r2 += int64(r23)
  203. r3 := int64(2343)
  204. d2 := r2
  205. r3 <<= 51
  206. r33 := uint32(r[15])
  207. r33 &= 15
  208. r31 <<= 8
  209. r3 += int64(r30)
  210. r32 <<= 16
  211. r3 += int64(r31)
  212. r33 <<= 24
  213. r3 += int64(r32)
  214. r3 += int64(r33)
  215. h0 := alpha32 - alpha32
  216. d3 := r3
  217. h1 := alpha32 - alpha32
  218. h2 := alpha32 - alpha32
  219. h3 := alpha32 - alpha32
  220. h4 := alpha32 - alpha32
  221. r0low := math.Float64frombits(uint64(d0))
  222. h5 := alpha32 - alpha32
  223. r1low := math.Float64frombits(uint64(d1))
  224. h6 := alpha32 - alpha32
  225. r2low := math.Float64frombits(uint64(d2))
  226. h7 := alpha32 - alpha32
  227. r0low -= alpha0
  228. r1low -= alpha32
  229. r2low -= alpha64
  230. r0high := r0low + alpha18
  231. r3low := math.Float64frombits(uint64(d3))
  232. r1high := r1low + alpha50
  233. sr1low := scale * r1low
  234. r2high := r2low + alpha82
  235. sr2low := scale * r2low
  236. r0high -= alpha18
  237. r0high_stack := r0high
  238. r3low -= alpha96
  239. r1high -= alpha50
  240. r1high_stack := r1high
  241. sr1high := sr1low + alpham80
  242. r0low -= r0high
  243. r2high -= alpha82
  244. sr3low = scale * r3low
  245. sr2high := sr2low + alpham48
  246. r1low -= r1high
  247. r1low_stack := r1low
  248. sr1high -= alpham80
  249. sr1high_stack := sr1high
  250. r2low -= r2high
  251. r2low_stack := r2low
  252. sr2high -= alpham48
  253. sr2high_stack := sr2high
  254. r3high := r3low + alpha112
  255. r0low_stack := r0low
  256. sr1low -= sr1high
  257. sr1low_stack := sr1low
  258. sr3high := sr3low + alpham16
  259. r2high_stack := r2high
  260. sr2low -= sr2high
  261. sr2low_stack := sr2low
  262. r3high -= alpha112
  263. r3high_stack := r3high
  264. sr3high -= alpham16
  265. sr3high_stack := sr3high
  266. r3low -= r3high
  267. r3low_stack := r3low
  268. sr3low -= sr3high
  269. sr3low_stack := sr3low
  270. if l < 16 {
  271. goto addatmost15bytes
  272. }
  273. m00 = uint32(m[p+0])
  274. m0 = 2151
  275. m0 <<= 51
  276. m1 = 2215
  277. m01 = uint32(m[p+1])
  278. m1 <<= 51
  279. m2 = 2279
  280. m02 = uint32(m[p+2])
  281. m2 <<= 51
  282. m3 = 2343
  283. m03 = uint32(m[p+3])
  284. m10 = uint32(m[p+4])
  285. m01 <<= 8
  286. m0 += int64(m00)
  287. m11 = uint32(m[p+5])
  288. m02 <<= 16
  289. m0 += int64(m01)
  290. m12 = uint32(m[p+6])
  291. m03 <<= 24
  292. m0 += int64(m02)
  293. m13 = uint32(m[p+7])
  294. m3 <<= 51
  295. m0 += int64(m03)
  296. m20 = uint32(m[p+8])
  297. m11 <<= 8
  298. m1 += int64(m10)
  299. m21 = uint32(m[p+9])
  300. m12 <<= 16
  301. m1 += int64(m11)
  302. m22 = uint32(m[p+10])
  303. m13 <<= 24
  304. m1 += int64(m12)
  305. m23 = uint32(m[p+11])
  306. m1 += int64(m13)
  307. m30 = uint32(m[p+12])
  308. m21 <<= 8
  309. m2 += int64(m20)
  310. m31 = uint32(m[p+13])
  311. m22 <<= 16
  312. m2 += int64(m21)
  313. m32 = uint32(m[p+14])
  314. m23 <<= 24
  315. m2 += int64(m22)
  316. m33 = uint64(m[p+15])
  317. m2 += int64(m23)
  318. d0 = m0
  319. m31 <<= 8
  320. m3 += int64(m30)
  321. d1 = m1
  322. m32 <<= 16
  323. m3 += int64(m31)
  324. d2 = m2
  325. m33 += 256
  326. m33 <<= 24
  327. m3 += int64(m32)
  328. m3 += int64(m33)
  329. d3 = m3
  330. p += 16
  331. l -= 16
  332. z0 = math.Float64frombits(uint64(d0))
  333. z1 = math.Float64frombits(uint64(d1))
  334. z2 = math.Float64frombits(uint64(d2))
  335. z3 = math.Float64frombits(uint64(d3))
  336. z0 -= alpha0
  337. z1 -= alpha32
  338. z2 -= alpha64
  339. z3 -= alpha96
  340. h0 += z0
  341. h1 += z1
  342. h3 += z2
  343. h5 += z3
  344. if l < 16 {
  345. goto multiplyaddatmost15bytes
  346. }
  347. multiplyaddatleast16bytes:
  348. m2 = 2279
  349. m20 = uint32(m[p+8])
  350. y7 = h7 + alpha130
  351. m2 <<= 51
  352. m3 = 2343
  353. m21 = uint32(m[p+9])
  354. y6 = h6 + alpha130
  355. m3 <<= 51
  356. m0 = 2151
  357. m22 = uint32(m[p+10])
  358. y1 = h1 + alpha32
  359. m0 <<= 51
  360. m1 = 2215
  361. m23 = uint32(m[p+11])
  362. y0 = h0 + alpha32
  363. m1 <<= 51
  364. m30 = uint32(m[p+12])
  365. y7 -= alpha130
  366. m21 <<= 8
  367. m2 += int64(m20)
  368. m31 = uint32(m[p+13])
  369. y6 -= alpha130
  370. m22 <<= 16
  371. m2 += int64(m21)
  372. m32 = uint32(m[p+14])
  373. y1 -= alpha32
  374. m23 <<= 24
  375. m2 += int64(m22)
  376. m33 = uint64(m[p+15])
  377. y0 -= alpha32
  378. m2 += int64(m23)
  379. m00 = uint32(m[p+0])
  380. y5 = h5 + alpha96
  381. m31 <<= 8
  382. m3 += int64(m30)
  383. m01 = uint32(m[p+1])
  384. y4 = h4 + alpha96
  385. m32 <<= 16
  386. m02 = uint32(m[p+2])
  387. x7 = h7 - y7
  388. y7 *= scale
  389. m33 += 256
  390. m03 = uint32(m[p+3])
  391. x6 = h6 - y6
  392. y6 *= scale
  393. m33 <<= 24
  394. m3 += int64(m31)
  395. m10 = uint32(m[p+4])
  396. x1 = h1 - y1
  397. m01 <<= 8
  398. m3 += int64(m32)
  399. m11 = uint32(m[p+5])
  400. x0 = h0 - y0
  401. m3 += int64(m33)
  402. m0 += int64(m00)
  403. m12 = uint32(m[p+6])
  404. y5 -= alpha96
  405. m02 <<= 16
  406. m0 += int64(m01)
  407. m13 = uint32(m[p+7])
  408. y4 -= alpha96
  409. m03 <<= 24
  410. m0 += int64(m02)
  411. d2 = m2
  412. x1 += y7
  413. m0 += int64(m03)
  414. d3 = m3
  415. x0 += y6
  416. m11 <<= 8
  417. m1 += int64(m10)
  418. d0 = m0
  419. x7 += y5
  420. m12 <<= 16
  421. m1 += int64(m11)
  422. x6 += y4
  423. m13 <<= 24
  424. m1 += int64(m12)
  425. y3 = h3 + alpha64
  426. m1 += int64(m13)
  427. d1 = m1
  428. y2 = h2 + alpha64
  429. x0 += x1
  430. x6 += x7
  431. y3 -= alpha64
  432. r3low = r3low_stack
  433. y2 -= alpha64
  434. r0low = r0low_stack
  435. x5 = h5 - y5
  436. r3lowx0 = r3low * x0
  437. r3high = r3high_stack
  438. x4 = h4 - y4
  439. r0lowx6 = r0low * x6
  440. r0high = r0high_stack
  441. x3 = h3 - y3
  442. r3highx0 = r3high * x0
  443. sr1low = sr1low_stack
  444. x2 = h2 - y2
  445. r0highx6 = r0high * x6
  446. sr1high = sr1high_stack
  447. x5 += y3
  448. r0lowx0 = r0low * x0
  449. r1low = r1low_stack
  450. h6 = r3lowx0 + r0lowx6
  451. sr1lowx6 = sr1low * x6
  452. r1high = r1high_stack
  453. x4 += y2
  454. r0highx0 = r0high * x0
  455. sr2low = sr2low_stack
  456. h7 = r3highx0 + r0highx6
  457. sr1highx6 = sr1high * x6
  458. sr2high = sr2high_stack
  459. x3 += y1
  460. r1lowx0 = r1low * x0
  461. r2low = r2low_stack
  462. h0 = r0lowx0 + sr1lowx6
  463. sr2lowx6 = sr2low * x6
  464. r2high = r2high_stack
  465. x2 += y0
  466. r1highx0 = r1high * x0
  467. sr3low = sr3low_stack
  468. h1 = r0highx0 + sr1highx6
  469. sr2highx6 = sr2high * x6
  470. sr3high = sr3high_stack
  471. x4 += x5
  472. r2lowx0 = r2low * x0
  473. z2 = math.Float64frombits(uint64(d2))
  474. h2 = r1lowx0 + sr2lowx6
  475. sr3lowx6 = sr3low * x6
  476. x2 += x3
  477. r2highx0 = r2high * x0
  478. z3 = math.Float64frombits(uint64(d3))
  479. h3 = r1highx0 + sr2highx6
  480. sr3highx6 = sr3high * x6
  481. r1highx4 = r1high * x4
  482. z2 -= alpha64
  483. h4 = r2lowx0 + sr3lowx6
  484. r1lowx4 = r1low * x4
  485. r0highx4 = r0high * x4
  486. z3 -= alpha96
  487. h5 = r2highx0 + sr3highx6
  488. r0lowx4 = r0low * x4
  489. h7 += r1highx4
  490. sr3highx4 = sr3high * x4
  491. h6 += r1lowx4
  492. sr3lowx4 = sr3low * x4
  493. h5 += r0highx4
  494. sr2highx4 = sr2high * x4
  495. h4 += r0lowx4
  496. sr2lowx4 = sr2low * x4
  497. h3 += sr3highx4
  498. r0lowx2 = r0low * x2
  499. h2 += sr3lowx4
  500. r0highx2 = r0high * x2
  501. h1 += sr2highx4
  502. r1lowx2 = r1low * x2
  503. h0 += sr2lowx4
  504. r1highx2 = r1high * x2
  505. h2 += r0lowx2
  506. r2lowx2 = r2low * x2
  507. h3 += r0highx2
  508. r2highx2 = r2high * x2
  509. h4 += r1lowx2
  510. sr3lowx2 = sr3low * x2
  511. h5 += r1highx2
  512. sr3highx2 = sr3high * x2
  513. p += 16
  514. l -= 16
  515. h6 += r2lowx2
  516. h7 += r2highx2
  517. z1 = math.Float64frombits(uint64(d1))
  518. h0 += sr3lowx2
  519. z0 = math.Float64frombits(uint64(d0))
  520. h1 += sr3highx2
  521. z1 -= alpha32
  522. z0 -= alpha0
  523. h5 += z3
  524. h3 += z2
  525. h1 += z1
  526. h0 += z0
  527. if l >= 16 {
  528. goto multiplyaddatleast16bytes
  529. }
  530. multiplyaddatmost15bytes:
  531. y7 = h7 + alpha130
  532. y6 = h6 + alpha130
  533. y1 = h1 + alpha32
  534. y0 = h0 + alpha32
  535. y7 -= alpha130
  536. y6 -= alpha130
  537. y1 -= alpha32
  538. y0 -= alpha32
  539. y5 = h5 + alpha96
  540. y4 = h4 + alpha96
  541. x7 = h7 - y7
  542. y7 *= scale
  543. x6 = h6 - y6
  544. y6 *= scale
  545. x1 = h1 - y1
  546. x0 = h0 - y0
  547. y5 -= alpha96
  548. y4 -= alpha96
  549. x1 += y7
  550. x0 += y6
  551. x7 += y5
  552. x6 += y4
  553. y3 = h3 + alpha64
  554. y2 = h2 + alpha64
  555. x0 += x1
  556. x6 += x7
  557. y3 -= alpha64
  558. r3low = r3low_stack
  559. y2 -= alpha64
  560. r0low = r0low_stack
  561. x5 = h5 - y5
  562. r3lowx0 = r3low * x0
  563. r3high = r3high_stack
  564. x4 = h4 - y4
  565. r0lowx6 = r0low * x6
  566. r0high = r0high_stack
  567. x3 = h3 - y3
  568. r3highx0 = r3high * x0
  569. sr1low = sr1low_stack
  570. x2 = h2 - y2
  571. r0highx6 = r0high * x6
  572. sr1high = sr1high_stack
  573. x5 += y3
  574. r0lowx0 = r0low * x0
  575. r1low = r1low_stack
  576. h6 = r3lowx0 + r0lowx6
  577. sr1lowx6 = sr1low * x6
  578. r1high = r1high_stack
  579. x4 += y2
  580. r0highx0 = r0high * x0
  581. sr2low = sr2low_stack
  582. h7 = r3highx0 + r0highx6
  583. sr1highx6 = sr1high * x6
  584. sr2high = sr2high_stack
  585. x3 += y1
  586. r1lowx0 = r1low * x0
  587. r2low = r2low_stack
  588. h0 = r0lowx0 + sr1lowx6
  589. sr2lowx6 = sr2low * x6
  590. r2high = r2high_stack
  591. x2 += y0
  592. r1highx0 = r1high * x0
  593. sr3low = sr3low_stack
  594. h1 = r0highx0 + sr1highx6
  595. sr2highx6 = sr2high * x6
  596. sr3high = sr3high_stack
  597. x4 += x5
  598. r2lowx0 = r2low * x0
  599. h2 = r1lowx0 + sr2lowx6
  600. sr3lowx6 = sr3low * x6
  601. x2 += x3
  602. r2highx0 = r2high * x0
  603. h3 = r1highx0 + sr2highx6
  604. sr3highx6 = sr3high * x6
  605. r1highx4 = r1high * x4
  606. h4 = r2lowx0 + sr3lowx6
  607. r1lowx4 = r1low * x4
  608. r0highx4 = r0high * x4
  609. h5 = r2highx0 + sr3highx6
  610. r0lowx4 = r0low * x4
  611. h7 += r1highx4
  612. sr3highx4 = sr3high * x4
  613. h6 += r1lowx4
  614. sr3lowx4 = sr3low * x4
  615. h5 += r0highx4
  616. sr2highx4 = sr2high * x4
  617. h4 += r0lowx4
  618. sr2lowx4 = sr2low * x4
  619. h3 += sr3highx4
  620. r0lowx2 = r0low * x2
  621. h2 += sr3lowx4
  622. r0highx2 = r0high * x2
  623. h1 += sr2highx4
  624. r1lowx2 = r1low * x2
  625. h0 += sr2lowx4
  626. r1highx2 = r1high * x2
  627. h2 += r0lowx2
  628. r2lowx2 = r2low * x2
  629. h3 += r0highx2
  630. r2highx2 = r2high * x2
  631. h4 += r1lowx2
  632. sr3lowx2 = sr3low * x2
  633. h5 += r1highx2
  634. sr3highx2 = sr3high * x2
  635. h6 += r2lowx2
  636. h7 += r2highx2
  637. h0 += sr3lowx2
  638. h1 += sr3highx2
  639. addatmost15bytes:
  640. if l == 0 {
  641. goto nomorebytes
  642. }
  643. lbelow2 = l - 2
  644. lbelow3 = l - 3
  645. lbelow2 >>= 31
  646. lbelow4 = l - 4
  647. m00 = uint32(m[p+0])
  648. lbelow3 >>= 31
  649. p += lbelow2
  650. m01 = uint32(m[p+1])
  651. lbelow4 >>= 31
  652. p += lbelow3
  653. m02 = uint32(m[p+2])
  654. p += lbelow4
  655. m0 = 2151
  656. m03 = uint32(m[p+3])
  657. m0 <<= 51
  658. m1 = 2215
  659. m0 += int64(m00)
  660. m01 &^= uint32(lbelow2)
  661. m02 &^= uint32(lbelow3)
  662. m01 -= uint32(lbelow2)
  663. m01 <<= 8
  664. m03 &^= uint32(lbelow4)
  665. m0 += int64(m01)
  666. lbelow2 -= lbelow3
  667. m02 += uint32(lbelow2)
  668. lbelow3 -= lbelow4
  669. m02 <<= 16
  670. m03 += uint32(lbelow3)
  671. m03 <<= 24
  672. m0 += int64(m02)
  673. m0 += int64(m03)
  674. lbelow5 = l - 5
  675. lbelow6 = l - 6
  676. lbelow7 = l - 7
  677. lbelow5 >>= 31
  678. lbelow8 = l - 8
  679. lbelow6 >>= 31
  680. p += lbelow5
  681. m10 = uint32(m[p+4])
  682. lbelow7 >>= 31
  683. p += lbelow6
  684. m11 = uint32(m[p+5])
  685. lbelow8 >>= 31
  686. p += lbelow7
  687. m12 = uint32(m[p+6])
  688. m1 <<= 51
  689. p += lbelow8
  690. m13 = uint32(m[p+7])
  691. m10 &^= uint32(lbelow5)
  692. lbelow4 -= lbelow5
  693. m10 += uint32(lbelow4)
  694. lbelow5 -= lbelow6
  695. m11 &^= uint32(lbelow6)
  696. m11 += uint32(lbelow5)
  697. m11 <<= 8
  698. m1 += int64(m10)
  699. m1 += int64(m11)
  700. m12 &^= uint32(lbelow7)
  701. lbelow6 -= lbelow7
  702. m13 &^= uint32(lbelow8)
  703. m12 += uint32(lbelow6)
  704. lbelow7 -= lbelow8
  705. m12 <<= 16
  706. m13 += uint32(lbelow7)
  707. m13 <<= 24
  708. m1 += int64(m12)
  709. m1 += int64(m13)
  710. m2 = 2279
  711. lbelow9 = l - 9
  712. m3 = 2343
  713. lbelow10 = l - 10
  714. lbelow11 = l - 11
  715. lbelow9 >>= 31
  716. lbelow12 = l - 12
  717. lbelow10 >>= 31
  718. p += lbelow9
  719. m20 = uint32(m[p+8])
  720. lbelow11 >>= 31
  721. p += lbelow10
  722. m21 = uint32(m[p+9])
  723. lbelow12 >>= 31
  724. p += lbelow11
  725. m22 = uint32(m[p+10])
  726. m2 <<= 51
  727. p += lbelow12
  728. m23 = uint32(m[p+11])
  729. m20 &^= uint32(lbelow9)
  730. lbelow8 -= lbelow9
  731. m20 += uint32(lbelow8)
  732. lbelow9 -= lbelow10
  733. m21 &^= uint32(lbelow10)
  734. m21 += uint32(lbelow9)
  735. m21 <<= 8
  736. m2 += int64(m20)
  737. m2 += int64(m21)
  738. m22 &^= uint32(lbelow11)
  739. lbelow10 -= lbelow11
  740. m23 &^= uint32(lbelow12)
  741. m22 += uint32(lbelow10)
  742. lbelow11 -= lbelow12
  743. m22 <<= 16
  744. m23 += uint32(lbelow11)
  745. m23 <<= 24
  746. m2 += int64(m22)
  747. m3 <<= 51
  748. lbelow13 = l - 13
  749. lbelow13 >>= 31
  750. lbelow14 = l - 14
  751. lbelow14 >>= 31
  752. p += lbelow13
  753. lbelow15 = l - 15
  754. m30 = uint32(m[p+12])
  755. lbelow15 >>= 31
  756. p += lbelow14
  757. m31 = uint32(m[p+13])
  758. p += lbelow15
  759. m2 += int64(m23)
  760. m32 = uint32(m[p+14])
  761. m30 &^= uint32(lbelow13)
  762. lbelow12 -= lbelow13
  763. m30 += uint32(lbelow12)
  764. lbelow13 -= lbelow14
  765. m3 += int64(m30)
  766. m31 &^= uint32(lbelow14)
  767. m31 += uint32(lbelow13)
  768. m32 &^= uint32(lbelow15)
  769. m31 <<= 8
  770. lbelow14 -= lbelow15
  771. m3 += int64(m31)
  772. m32 += uint32(lbelow14)
  773. d0 = m0
  774. m32 <<= 16
  775. m33 = uint64(lbelow15 + 1)
  776. d1 = m1
  777. m33 <<= 24
  778. m3 += int64(m32)
  779. d2 = m2
  780. m3 += int64(m33)
  781. d3 = m3
  782. z3 = math.Float64frombits(uint64(d3))
  783. z2 = math.Float64frombits(uint64(d2))
  784. z1 = math.Float64frombits(uint64(d1))
  785. z0 = math.Float64frombits(uint64(d0))
  786. z3 -= alpha96
  787. z2 -= alpha64
  788. z1 -= alpha32
  789. z0 -= alpha0
  790. h5 += z3
  791. h3 += z2
  792. h1 += z1
  793. h0 += z0
  794. y7 = h7 + alpha130
  795. y6 = h6 + alpha130
  796. y1 = h1 + alpha32
  797. y0 = h0 + alpha32
  798. y7 -= alpha130
  799. y6 -= alpha130
  800. y1 -= alpha32
  801. y0 -= alpha32
  802. y5 = h5 + alpha96
  803. y4 = h4 + alpha96
  804. x7 = h7 - y7
  805. y7 *= scale
  806. x6 = h6 - y6
  807. y6 *= scale
  808. x1 = h1 - y1
  809. x0 = h0 - y0
  810. y5 -= alpha96
  811. y4 -= alpha96
  812. x1 += y7
  813. x0 += y6
  814. x7 += y5
  815. x6 += y4
  816. y3 = h3 + alpha64
  817. y2 = h2 + alpha64
  818. x0 += x1
  819. x6 += x7
  820. y3 -= alpha64
  821. r3low = r3low_stack
  822. y2 -= alpha64
  823. r0low = r0low_stack
  824. x5 = h5 - y5
  825. r3lowx0 = r3low * x0
  826. r3high = r3high_stack
  827. x4 = h4 - y4
  828. r0lowx6 = r0low * x6
  829. r0high = r0high_stack
  830. x3 = h3 - y3
  831. r3highx0 = r3high * x0
  832. sr1low = sr1low_stack
  833. x2 = h2 - y2
  834. r0highx6 = r0high * x6
  835. sr1high = sr1high_stack
  836. x5 += y3
  837. r0lowx0 = r0low * x0
  838. r1low = r1low_stack
  839. h6 = r3lowx0 + r0lowx6
  840. sr1lowx6 = sr1low * x6
  841. r1high = r1high_stack
  842. x4 += y2
  843. r0highx0 = r0high * x0
  844. sr2low = sr2low_stack
  845. h7 = r3highx0 + r0highx6
  846. sr1highx6 = sr1high * x6
  847. sr2high = sr2high_stack
  848. x3 += y1
  849. r1lowx0 = r1low * x0
  850. r2low = r2low_stack
  851. h0 = r0lowx0 + sr1lowx6
  852. sr2lowx6 = sr2low * x6
  853. r2high = r2high_stack
  854. x2 += y0
  855. r1highx0 = r1high * x0
  856. sr3low = sr3low_stack
  857. h1 = r0highx0 + sr1highx6
  858. sr2highx6 = sr2high * x6
  859. sr3high = sr3high_stack
  860. x4 += x5
  861. r2lowx0 = r2low * x0
  862. h2 = r1lowx0 + sr2lowx6
  863. sr3lowx6 = sr3low * x6
  864. x2 += x3
  865. r2highx0 = r2high * x0
  866. h3 = r1highx0 + sr2highx6
  867. sr3highx6 = sr3high * x6
  868. r1highx4 = r1high * x4
  869. h4 = r2lowx0 + sr3lowx6
  870. r1lowx4 = r1low * x4
  871. r0highx4 = r0high * x4
  872. h5 = r2highx0 + sr3highx6
  873. r0lowx4 = r0low * x4
  874. h7 += r1highx4
  875. sr3highx4 = sr3high * x4
  876. h6 += r1lowx4
  877. sr3lowx4 = sr3low * x4
  878. h5 += r0highx4
  879. sr2highx4 = sr2high * x4
  880. h4 += r0lowx4
  881. sr2lowx4 = sr2low * x4
  882. h3 += sr3highx4
  883. r0lowx2 = r0low * x2
  884. h2 += sr3lowx4
  885. r0highx2 = r0high * x2
  886. h1 += sr2highx4
  887. r1lowx2 = r1low * x2
  888. h0 += sr2lowx4
  889. r1highx2 = r1high * x2
  890. h2 += r0lowx2
  891. r2lowx2 = r2low * x2
  892. h3 += r0highx2
  893. r2highx2 = r2high * x2
  894. h4 += r1lowx2
  895. sr3lowx2 = sr3low * x2
  896. h5 += r1highx2
  897. sr3highx2 = sr3high * x2
  898. h6 += r2lowx2
  899. h7 += r2highx2
  900. h0 += sr3lowx2
  901. h1 += sr3highx2
  902. nomorebytes:
  903. y7 = h7 + alpha130
  904. y0 = h0 + alpha32
  905. y1 = h1 + alpha32
  906. y2 = h2 + alpha64
  907. y7 -= alpha130
  908. y3 = h3 + alpha64
  909. y4 = h4 + alpha96
  910. y5 = h5 + alpha96
  911. x7 = h7 - y7
  912. y7 *= scale
  913. y0 -= alpha32
  914. y1 -= alpha32
  915. y2 -= alpha64
  916. h6 += x7
  917. y3 -= alpha64
  918. y4 -= alpha96
  919. y5 -= alpha96
  920. y6 = h6 + alpha130
  921. x0 = h0 - y0
  922. x1 = h1 - y1
  923. x2 = h2 - y2
  924. y6 -= alpha130
  925. x0 += y7
  926. x3 = h3 - y3
  927. x4 = h4 - y4
  928. x5 = h5 - y5
  929. x6 = h6 - y6
  930. y6 *= scale
  931. x2 += y0
  932. x3 += y1
  933. x4 += y2
  934. x0 += y6
  935. x5 += y3
  936. x6 += y4
  937. x2 += x3
  938. x0 += x1
  939. x4 += x5
  940. x6 += y5
  941. x2 += offset1
  942. d1 = int64(math.Float64bits(x2))
  943. x0 += offset0
  944. d0 = int64(math.Float64bits(x0))
  945. x4 += offset2
  946. d2 = int64(math.Float64bits(x4))
  947. x6 += offset3
  948. d3 = int64(math.Float64bits(x6))
  949. f0 = uint64(d0)
  950. f1 = uint64(d1)
  951. bits32 = math.MaxUint64
  952. f2 = uint64(d2)
  953. bits32 >>= 32
  954. f3 = uint64(d3)
  955. f = f0 >> 32
  956. f0 &= bits32
  957. f &= 255
  958. f1 += f
  959. g0 = f0 + 5
  960. g = g0 >> 32
  961. g0 &= bits32
  962. f = f1 >> 32
  963. f1 &= bits32
  964. f &= 255
  965. g1 = f1 + g
  966. g = g1 >> 32
  967. f2 += f
  968. f = f2 >> 32
  969. g1 &= bits32
  970. f2 &= bits32
  971. f &= 255
  972. f3 += f
  973. g2 = f2 + g
  974. g = g2 >> 32
  975. g2 &= bits32
  976. f4 = f3 >> 32
  977. f3 &= bits32
  978. f4 &= 255
  979. g3 = f3 + g
  980. g = g3 >> 32
  981. g3 &= bits32
  982. g4 = f4 + g
  983. g4 = g4 - 4
  984. s00 = uint32(s[0])
  985. f = uint64(int64(g4) >> 63)
  986. s01 = uint32(s[1])
  987. f0 &= f
  988. g0 &^= f
  989. s02 = uint32(s[2])
  990. f1 &= f
  991. f0 |= g0
  992. s03 = uint32(s[3])
  993. g1 &^= f
  994. f2 &= f
  995. s10 = uint32(s[4])
  996. f3 &= f
  997. g2 &^= f
  998. s11 = uint32(s[5])
  999. g3 &^= f
  1000. f1 |= g1
  1001. s12 = uint32(s[6])
  1002. f2 |= g2
  1003. f3 |= g3
  1004. s13 = uint32(s[7])
  1005. s01 <<= 8
  1006. f0 += uint64(s00)
  1007. s20 = uint32(s[8])
  1008. s02 <<= 16
  1009. f0 += uint64(s01)
  1010. s21 = uint32(s[9])
  1011. s03 <<= 24
  1012. f0 += uint64(s02)
  1013. s22 = uint32(s[10])
  1014. s11 <<= 8
  1015. f1 += uint64(s10)
  1016. s23 = uint32(s[11])
  1017. s12 <<= 16
  1018. f1 += uint64(s11)
  1019. s30 = uint32(s[12])
  1020. s13 <<= 24
  1021. f1 += uint64(s12)
  1022. s31 = uint32(s[13])
  1023. f0 += uint64(s03)
  1024. f1 += uint64(s13)
  1025. s32 = uint32(s[14])
  1026. s21 <<= 8
  1027. f2 += uint64(s20)
  1028. s33 = uint32(s[15])
  1029. s22 <<= 16
  1030. f2 += uint64(s21)
  1031. s23 <<= 24
  1032. f2 += uint64(s22)
  1033. s31 <<= 8
  1034. f3 += uint64(s30)
  1035. s32 <<= 16
  1036. f3 += uint64(s31)
  1037. s33 <<= 24
  1038. f3 += uint64(s32)
  1039. f2 += uint64(s23)
  1040. f3 += uint64(s33)
  1041. out[0] = byte(f0)
  1042. f0 >>= 8
  1043. out[1] = byte(f0)
  1044. f0 >>= 8
  1045. out[2] = byte(f0)
  1046. f0 >>= 8
  1047. out[3] = byte(f0)
  1048. f0 >>= 8
  1049. f1 += f0
  1050. out[4] = byte(f1)
  1051. f1 >>= 8
  1052. out[5] = byte(f1)
  1053. f1 >>= 8
  1054. out[6] = byte(f1)
  1055. f1 >>= 8
  1056. out[7] = byte(f1)
  1057. f1 >>= 8
  1058. f2 += f1
  1059. out[8] = byte(f2)
  1060. f2 >>= 8
  1061. out[9] = byte(f2)
  1062. f2 >>= 8
  1063. out[10] = byte(f2)
  1064. f2 >>= 8
  1065. out[11] = byte(f2)
  1066. f2 >>= 8
  1067. f3 += f2
  1068. out[12] = byte(f3)
  1069. f3 >>= 8
  1070. out[13] = byte(f3)
  1071. f3 >>= 8
  1072. out[14] = byte(f3)
  1073. f3 >>= 8
  1074. out[15] = byte(f3)
  1075. }