blamka_generic.go 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. // Copyright 2017 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package argon2
  5. var useSSE4 bool
  6. func processBlockGeneric(out, in1, in2 *block, xor bool) {
  7. var t block
  8. for i := range t {
  9. t[i] = in1[i] ^ in2[i]
  10. }
  11. for i := 0; i < blockLength; i += 16 {
  12. blamkaGeneric(
  13. &t[i+0], &t[i+1], &t[i+2], &t[i+3],
  14. &t[i+4], &t[i+5], &t[i+6], &t[i+7],
  15. &t[i+8], &t[i+9], &t[i+10], &t[i+11],
  16. &t[i+12], &t[i+13], &t[i+14], &t[i+15],
  17. )
  18. }
  19. for i := 0; i < blockLength/8; i += 2 {
  20. blamkaGeneric(
  21. &t[i], &t[i+1], &t[16+i], &t[16+i+1],
  22. &t[32+i], &t[32+i+1], &t[48+i], &t[48+i+1],
  23. &t[64+i], &t[64+i+1], &t[80+i], &t[80+i+1],
  24. &t[96+i], &t[96+i+1], &t[112+i], &t[112+i+1],
  25. )
  26. }
  27. if xor {
  28. for i := range t {
  29. out[i] ^= in1[i] ^ in2[i] ^ t[i]
  30. }
  31. } else {
  32. for i := range t {
  33. out[i] = in1[i] ^ in2[i] ^ t[i]
  34. }
  35. }
  36. }
  37. func blamkaGeneric(t00, t01, t02, t03, t04, t05, t06, t07, t08, t09, t10, t11, t12, t13, t14, t15 *uint64) {
  38. v00, v01, v02, v03 := *t00, *t01, *t02, *t03
  39. v04, v05, v06, v07 := *t04, *t05, *t06, *t07
  40. v08, v09, v10, v11 := *t08, *t09, *t10, *t11
  41. v12, v13, v14, v15 := *t12, *t13, *t14, *t15
  42. v00 += v04 + 2*uint64(uint32(v00))*uint64(uint32(v04))
  43. v12 ^= v00
  44. v12 = v12>>32 | v12<<32
  45. v08 += v12 + 2*uint64(uint32(v08))*uint64(uint32(v12))
  46. v04 ^= v08
  47. v04 = v04>>24 | v04<<40
  48. v00 += v04 + 2*uint64(uint32(v00))*uint64(uint32(v04))
  49. v12 ^= v00
  50. v12 = v12>>16 | v12<<48
  51. v08 += v12 + 2*uint64(uint32(v08))*uint64(uint32(v12))
  52. v04 ^= v08
  53. v04 = v04>>63 | v04<<1
  54. v01 += v05 + 2*uint64(uint32(v01))*uint64(uint32(v05))
  55. v13 ^= v01
  56. v13 = v13>>32 | v13<<32
  57. v09 += v13 + 2*uint64(uint32(v09))*uint64(uint32(v13))
  58. v05 ^= v09
  59. v05 = v05>>24 | v05<<40
  60. v01 += v05 + 2*uint64(uint32(v01))*uint64(uint32(v05))
  61. v13 ^= v01
  62. v13 = v13>>16 | v13<<48
  63. v09 += v13 + 2*uint64(uint32(v09))*uint64(uint32(v13))
  64. v05 ^= v09
  65. v05 = v05>>63 | v05<<1
  66. v02 += v06 + 2*uint64(uint32(v02))*uint64(uint32(v06))
  67. v14 ^= v02
  68. v14 = v14>>32 | v14<<32
  69. v10 += v14 + 2*uint64(uint32(v10))*uint64(uint32(v14))
  70. v06 ^= v10
  71. v06 = v06>>24 | v06<<40
  72. v02 += v06 + 2*uint64(uint32(v02))*uint64(uint32(v06))
  73. v14 ^= v02
  74. v14 = v14>>16 | v14<<48
  75. v10 += v14 + 2*uint64(uint32(v10))*uint64(uint32(v14))
  76. v06 ^= v10
  77. v06 = v06>>63 | v06<<1
  78. v03 += v07 + 2*uint64(uint32(v03))*uint64(uint32(v07))
  79. v15 ^= v03
  80. v15 = v15>>32 | v15<<32
  81. v11 += v15 + 2*uint64(uint32(v11))*uint64(uint32(v15))
  82. v07 ^= v11
  83. v07 = v07>>24 | v07<<40
  84. v03 += v07 + 2*uint64(uint32(v03))*uint64(uint32(v07))
  85. v15 ^= v03
  86. v15 = v15>>16 | v15<<48
  87. v11 += v15 + 2*uint64(uint32(v11))*uint64(uint32(v15))
  88. v07 ^= v11
  89. v07 = v07>>63 | v07<<1
  90. v00 += v05 + 2*uint64(uint32(v00))*uint64(uint32(v05))
  91. v15 ^= v00
  92. v15 = v15>>32 | v15<<32
  93. v10 += v15 + 2*uint64(uint32(v10))*uint64(uint32(v15))
  94. v05 ^= v10
  95. v05 = v05>>24 | v05<<40
  96. v00 += v05 + 2*uint64(uint32(v00))*uint64(uint32(v05))
  97. v15 ^= v00
  98. v15 = v15>>16 | v15<<48
  99. v10 += v15 + 2*uint64(uint32(v10))*uint64(uint32(v15))
  100. v05 ^= v10
  101. v05 = v05>>63 | v05<<1
  102. v01 += v06 + 2*uint64(uint32(v01))*uint64(uint32(v06))
  103. v12 ^= v01
  104. v12 = v12>>32 | v12<<32
  105. v11 += v12 + 2*uint64(uint32(v11))*uint64(uint32(v12))
  106. v06 ^= v11
  107. v06 = v06>>24 | v06<<40
  108. v01 += v06 + 2*uint64(uint32(v01))*uint64(uint32(v06))
  109. v12 ^= v01
  110. v12 = v12>>16 | v12<<48
  111. v11 += v12 + 2*uint64(uint32(v11))*uint64(uint32(v12))
  112. v06 ^= v11
  113. v06 = v06>>63 | v06<<1
  114. v02 += v07 + 2*uint64(uint32(v02))*uint64(uint32(v07))
  115. v13 ^= v02
  116. v13 = v13>>32 | v13<<32
  117. v08 += v13 + 2*uint64(uint32(v08))*uint64(uint32(v13))
  118. v07 ^= v08
  119. v07 = v07>>24 | v07<<40
  120. v02 += v07 + 2*uint64(uint32(v02))*uint64(uint32(v07))
  121. v13 ^= v02
  122. v13 = v13>>16 | v13<<48
  123. v08 += v13 + 2*uint64(uint32(v08))*uint64(uint32(v13))
  124. v07 ^= v08
  125. v07 = v07>>63 | v07<<1
  126. v03 += v04 + 2*uint64(uint32(v03))*uint64(uint32(v04))
  127. v14 ^= v03
  128. v14 = v14>>32 | v14<<32
  129. v09 += v14 + 2*uint64(uint32(v09))*uint64(uint32(v14))
  130. v04 ^= v09
  131. v04 = v04>>24 | v04<<40
  132. v03 += v04 + 2*uint64(uint32(v03))*uint64(uint32(v04))
  133. v14 ^= v03
  134. v14 = v14>>16 | v14<<48
  135. v09 += v14 + 2*uint64(uint32(v09))*uint64(uint32(v14))
  136. v04 ^= v09
  137. v04 = v04>>63 | v04<<1
  138. *t00, *t01, *t02, *t03 = v00, v01, v02, v03
  139. *t04, *t05, *t06, *t07 = v04, v05, v06, v07
  140. *t08, *t09, *t10, *t11 = v08, v09, v10, v11
  141. *t12, *t13, *t14, *t15 = v12, v13, v14, v15
  142. }