sum_amd64.s 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. // Copyright 2012 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // +build amd64,!gccgo,!appengine
  5. #include "textflag.h"
  6. #define POLY1305_ADD(msg, h0, h1, h2) \
  7. ADDQ 0(msg), h0; \
  8. ADCQ 8(msg), h1; \
  9. ADCQ $1, h2; \
  10. LEAQ 16(msg), msg
  11. #define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3) \
  12. MOVQ r0, AX; \
  13. MULQ h0; \
  14. MOVQ AX, t0; \
  15. MOVQ DX, t1; \
  16. MOVQ r0, AX; \
  17. MULQ h1; \
  18. ADDQ AX, t1; \
  19. ADCQ $0, DX; \
  20. MOVQ r0, t2; \
  21. IMULQ h2, t2; \
  22. ADDQ DX, t2; \
  23. \
  24. MOVQ r1, AX; \
  25. MULQ h0; \
  26. ADDQ AX, t1; \
  27. ADCQ $0, DX; \
  28. MOVQ DX, h0; \
  29. MOVQ r1, t3; \
  30. IMULQ h2, t3; \
  31. MOVQ r1, AX; \
  32. MULQ h1; \
  33. ADDQ AX, t2; \
  34. ADCQ DX, t3; \
  35. ADDQ h0, t2; \
  36. ADCQ $0, t3; \
  37. \
  38. MOVQ t0, h0; \
  39. MOVQ t1, h1; \
  40. MOVQ t2, h2; \
  41. ANDQ $3, h2; \
  42. MOVQ t2, t0; \
  43. ANDQ $0xFFFFFFFFFFFFFFFC, t0; \
  44. ADDQ t0, h0; \
  45. ADCQ t3, h1; \
  46. ADCQ $0, h2; \
  47. SHRQ $2, t3, t2; \
  48. SHRQ $2, t3; \
  49. ADDQ t2, h0; \
  50. ADCQ t3, h1; \
  51. ADCQ $0, h2
  52. DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
  53. DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
  54. GLOBL ·poly1305Mask<>(SB), RODATA, $16
  55. // func update(state *[7]uint64, msg []byte)
  56. TEXT ·update(SB), $0-32
  57. MOVQ state+0(FP), DI
  58. MOVQ msg_base+8(FP), SI
  59. MOVQ msg_len+16(FP), R15
  60. MOVQ 0(DI), R8 // h0
  61. MOVQ 8(DI), R9 // h1
  62. MOVQ 16(DI), R10 // h2
  63. MOVQ 24(DI), R11 // r0
  64. MOVQ 32(DI), R12 // r1
  65. CMPQ R15, $16
  66. JB bytes_between_0_and_15
  67. loop:
  68. POLY1305_ADD(SI, R8, R9, R10)
  69. multiply:
  70. POLY1305_MUL(R8, R9, R10, R11, R12, BX, CX, R13, R14)
  71. SUBQ $16, R15
  72. CMPQ R15, $16
  73. JAE loop
  74. bytes_between_0_and_15:
  75. TESTQ R15, R15
  76. JZ done
  77. MOVQ $1, BX
  78. XORQ CX, CX
  79. XORQ R13, R13
  80. ADDQ R15, SI
  81. flush_buffer:
  82. SHLQ $8, BX, CX
  83. SHLQ $8, BX
  84. MOVB -1(SI), R13
  85. XORQ R13, BX
  86. DECQ SI
  87. DECQ R15
  88. JNZ flush_buffer
  89. ADDQ BX, R8
  90. ADCQ CX, R9
  91. ADCQ $0, R10
  92. MOVQ $16, R15
  93. JMP multiply
  94. done:
  95. MOVQ R8, 0(DI)
  96. MOVQ R9, 8(DI)
  97. MOVQ R10, 16(DI)
  98. RET
  99. // func initialize(state *[7]uint64, key *[32]byte)
  100. TEXT ·initialize(SB), $0-16
  101. MOVQ state+0(FP), DI
  102. MOVQ key+8(FP), SI
  103. // state[0...7] is initialized with zero
  104. MOVOU 0(SI), X0
  105. MOVOU 16(SI), X1
  106. MOVOU ·poly1305Mask<>(SB), X2
  107. PAND X2, X0
  108. MOVOU X0, 24(DI)
  109. MOVOU X1, 40(DI)
  110. RET
  111. // func finalize(tag *[TagSize]byte, state *[7]uint64)
  112. TEXT ·finalize(SB), $0-16
  113. MOVQ tag+0(FP), DI
  114. MOVQ state+8(FP), SI
  115. MOVQ 0(SI), AX
  116. MOVQ 8(SI), BX
  117. MOVQ 16(SI), CX
  118. MOVQ AX, R8
  119. MOVQ BX, R9
  120. SUBQ $0xFFFFFFFFFFFFFFFB, AX
  121. SBBQ $0xFFFFFFFFFFFFFFFF, BX
  122. SBBQ $3, CX
  123. CMOVQCS R8, AX
  124. CMOVQCS R9, BX
  125. ADDQ 40(SI), AX
  126. ADCQ 48(SI), BX
  127. MOVQ AX, 0(DI)
  128. MOVQ BX, 8(DI)
  129. RET