mont25519_amd64.go 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. // Copyright 2012 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package curve25519
  5. // These functions are implemented in the .s files. The names of the functions
  6. // in the rest of the file are also taken from the SUPERCOP sources to help
  7. // people following along.
  8. func cswap(*[5]uint64, uint64)
  9. func ladderstep(*[5][5]uint64)
  10. func freeze(inout *[5]uint64)
  11. func mul(dest, a, b *[5]uint64)
  12. func square(out, in *[5]uint64)
  13. // mladder uses a Montgomery ladder to calculate (xr/zr) *= s.
  14. func mladder(xr, zr *[5]uint64, s *[32]byte) {
  15. var work [5][5]uint64
  16. work[0] = *xr
  17. setint(&work[1], 1)
  18. setint(&work[2], 0)
  19. work[3] = *xr
  20. setint(&work[4], 1)
  21. j := uint(6)
  22. var prevbit byte
  23. for i := 31; i >= 0; i-- {
  24. for j < 8 {
  25. bit := ((*s)[i] >> j) & 1
  26. swap := bit ^ prevbit
  27. prevbit = bit
  28. cswap(&work[1], uint64(swap))
  29. ladderstep(&work)
  30. j--
  31. }
  32. j = 7
  33. }
  34. *xr = work[1]
  35. *zr = work[2]
  36. }
  37. func scalarMult(out, in, base *[32]byte) {
  38. var e [32]byte
  39. copy(e[:], (*in)[:])
  40. e[0] &= 248
  41. e[31] &= 127
  42. e[31] |= 64
  43. var t, z [5]uint64
  44. unpack(&t, base)
  45. mladder(&t, &z, &e)
  46. invert(&z, &z)
  47. mul(&t, &t, &z)
  48. pack(out, &t)
  49. }
  50. func setint(r *[5]uint64, v uint64) {
  51. r[0] = v
  52. r[1] = 0
  53. r[2] = 0
  54. r[3] = 0
  55. r[4] = 0
  56. }
  57. // unpack sets r = x where r consists of 5, 51-bit limbs in little-endian
  58. // order.
  59. func unpack(r *[5]uint64, x *[32]byte) {
  60. r[0] = uint64(x[0]) |
  61. uint64(x[1])<<8 |
  62. uint64(x[2])<<16 |
  63. uint64(x[3])<<24 |
  64. uint64(x[4])<<32 |
  65. uint64(x[5])<<40 |
  66. uint64(x[6]&7)<<48
  67. r[1] = uint64(x[6])>>3 |
  68. uint64(x[7])<<5 |
  69. uint64(x[8])<<13 |
  70. uint64(x[9])<<21 |
  71. uint64(x[10])<<29 |
  72. uint64(x[11])<<37 |
  73. uint64(x[12]&63)<<45
  74. r[2] = uint64(x[12])>>6 |
  75. uint64(x[13])<<2 |
  76. uint64(x[14])<<10 |
  77. uint64(x[15])<<18 |
  78. uint64(x[16])<<26 |
  79. uint64(x[17])<<34 |
  80. uint64(x[18])<<42 |
  81. uint64(x[19]&1)<<50
  82. r[3] = uint64(x[19])>>1 |
  83. uint64(x[20])<<7 |
  84. uint64(x[21])<<15 |
  85. uint64(x[22])<<23 |
  86. uint64(x[23])<<31 |
  87. uint64(x[24])<<39 |
  88. uint64(x[25]&15)<<47
  89. r[4] = uint64(x[25])>>4 |
  90. uint64(x[26])<<4 |
  91. uint64(x[27])<<12 |
  92. uint64(x[28])<<20 |
  93. uint64(x[29])<<28 |
  94. uint64(x[30])<<36 |
  95. uint64(x[31]&127)<<44
  96. }
  97. // pack sets out = x where out is the usual, little-endian form of the 5,
  98. // 51-bit limbs in x.
  99. func pack(out *[32]byte, x *[5]uint64) {
  100. t := *x
  101. freeze(&t)
  102. out[0] = byte(t[0])
  103. out[1] = byte(t[0] >> 8)
  104. out[2] = byte(t[0] >> 16)
  105. out[3] = byte(t[0] >> 24)
  106. out[4] = byte(t[0] >> 32)
  107. out[5] = byte(t[0] >> 40)
  108. out[6] = byte(t[0] >> 48)
  109. out[6] ^= byte(t[1]<<3) & 0xf8
  110. out[7] = byte(t[1] >> 5)
  111. out[8] = byte(t[1] >> 13)
  112. out[9] = byte(t[1] >> 21)
  113. out[10] = byte(t[1] >> 29)
  114. out[11] = byte(t[1] >> 37)
  115. out[12] = byte(t[1] >> 45)
  116. out[12] ^= byte(t[2]<<6) & 0xc0
  117. out[13] = byte(t[2] >> 2)
  118. out[14] = byte(t[2] >> 10)
  119. out[15] = byte(t[2] >> 18)
  120. out[16] = byte(t[2] >> 26)
  121. out[17] = byte(t[2] >> 34)
  122. out[18] = byte(t[2] >> 42)
  123. out[19] = byte(t[2] >> 50)
  124. out[19] ^= byte(t[3]<<1) & 0xfe
  125. out[20] = byte(t[3] >> 7)
  126. out[21] = byte(t[3] >> 15)
  127. out[22] = byte(t[3] >> 23)
  128. out[23] = byte(t[3] >> 31)
  129. out[24] = byte(t[3] >> 39)
  130. out[25] = byte(t[3] >> 47)
  131. out[25] ^= byte(t[4]<<4) & 0xf0
  132. out[26] = byte(t[4] >> 4)
  133. out[27] = byte(t[4] >> 12)
  134. out[28] = byte(t[4] >> 20)
  135. out[29] = byte(t[4] >> 28)
  136. out[30] = byte(t[4] >> 36)
  137. out[31] = byte(t[4] >> 44)
  138. }
  139. // invert calculates r = x^-1 mod p using Fermat's little theorem.
  140. func invert(r *[5]uint64, x *[5]uint64) {
  141. var z2, z9, z11, z2_5_0, z2_10_0, z2_20_0, z2_50_0, z2_100_0, t [5]uint64
  142. square(&z2, x) /* 2 */
  143. square(&t, &z2) /* 4 */
  144. square(&t, &t) /* 8 */
  145. mul(&z9, &t, x) /* 9 */
  146. mul(&z11, &z9, &z2) /* 11 */
  147. square(&t, &z11) /* 22 */
  148. mul(&z2_5_0, &t, &z9) /* 2^5 - 2^0 = 31 */
  149. square(&t, &z2_5_0) /* 2^6 - 2^1 */
  150. for i := 1; i < 5; i++ { /* 2^20 - 2^10 */
  151. square(&t, &t)
  152. }
  153. mul(&z2_10_0, &t, &z2_5_0) /* 2^10 - 2^0 */
  154. square(&t, &z2_10_0) /* 2^11 - 2^1 */
  155. for i := 1; i < 10; i++ { /* 2^20 - 2^10 */
  156. square(&t, &t)
  157. }
  158. mul(&z2_20_0, &t, &z2_10_0) /* 2^20 - 2^0 */
  159. square(&t, &z2_20_0) /* 2^21 - 2^1 */
  160. for i := 1; i < 20; i++ { /* 2^40 - 2^20 */
  161. square(&t, &t)
  162. }
  163. mul(&t, &t, &z2_20_0) /* 2^40 - 2^0 */
  164. square(&t, &t) /* 2^41 - 2^1 */
  165. for i := 1; i < 10; i++ { /* 2^50 - 2^10 */
  166. square(&t, &t)
  167. }
  168. mul(&z2_50_0, &t, &z2_10_0) /* 2^50 - 2^0 */
  169. square(&t, &z2_50_0) /* 2^51 - 2^1 */
  170. for i := 1; i < 50; i++ { /* 2^100 - 2^50 */
  171. square(&t, &t)
  172. }
  173. mul(&z2_100_0, &t, &z2_50_0) /* 2^100 - 2^0 */
  174. square(&t, &z2_100_0) /* 2^101 - 2^1 */
  175. for i := 1; i < 100; i++ { /* 2^200 - 2^100 */
  176. square(&t, &t)
  177. }
  178. mul(&t, &t, &z2_100_0) /* 2^200 - 2^0 */
  179. square(&t, &t) /* 2^201 - 2^1 */
  180. for i := 1; i < 50; i++ { /* 2^250 - 2^50 */
  181. square(&t, &t)
  182. }
  183. mul(&t, &t, &z2_50_0) /* 2^250 - 2^0 */
  184. square(&t, &t) /* 2^251 - 2^1 */
  185. square(&t, &t) /* 2^252 - 2^2 */
  186. square(&t, &t) /* 2^253 - 2^3 */
  187. square(&t, &t) /* 2^254 - 2^4 */
  188. square(&t, &t) /* 2^255 - 2^5 */
  189. mul(r, &t, &z11) /* 2^255 - 21 */
  190. }