poly1305_amd64.s 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. // Copyright 2012 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // +build amd64,!gccgo,!appengine
  5. #include "textflag.h"
  6. #define POLY1305_ADD(msg, h0, h1, h2) \
  7. ADDQ 0(msg), h0; \
  8. ADCQ 8(msg), h1; \
  9. ADCQ $1, h2; \
  10. LEAQ 16(msg), msg
  11. #define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3) \
  12. MOVQ r0, AX; \
  13. MULQ h0; \
  14. MOVQ AX, t0; \
  15. MOVQ DX, t1; \
  16. MOVQ r0, AX; \
  17. MULQ h1; \
  18. ADDQ AX, t1; \
  19. ADCQ $0, DX; \
  20. MOVQ r0, t2; \
  21. IMULQ h2, t2; \
  22. ADDQ DX, t2; \
  23. \
  24. MOVQ r1, AX; \
  25. MULQ h0; \
  26. ADDQ AX, t1; \
  27. ADCQ $0, DX; \
  28. MOVQ DX, h0; \
  29. MOVQ r1, t3; \
  30. IMULQ h2, t3; \
  31. MOVQ r1, AX; \
  32. MULQ h1; \
  33. ADDQ AX, t2; \
  34. ADCQ DX, t3; \
  35. ADDQ h0, t2; \
  36. ADCQ $0, t3; \
  37. \
  38. MOVQ t0, h0; \
  39. MOVQ t1, h1; \
  40. MOVQ t2, h2; \
  41. ANDQ $3, h2; \
  42. MOVQ t2, t0; \
  43. ANDQ $0xFFFFFFFFFFFFFFFC, t0; \
  44. ADDQ t0, h0; \
  45. ADCQ t3, h1; \
  46. ADCQ $0, h2; \
  47. SHRQ $2, t3, t2; \
  48. SHRQ $2, t3; \
  49. ADDQ t2, h0; \
  50. ADCQ t3, h1; \
  51. ADCQ $0, h2
  52. DATA poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
  53. DATA poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
  54. GLOBL poly1305Mask<>(SB), RODATA, $16
  55. // func poly1305(out *[16]byte, m *byte, mlen uint64, key *[32]key)
  56. TEXT ·poly1305(SB), $0-32
  57. MOVQ out+0(FP), DI
  58. MOVQ m+8(FP), SI
  59. MOVQ mlen+16(FP), R15
  60. MOVQ key+24(FP), AX
  61. MOVQ SP, BP
  62. ANDQ $0xFFFFFFFFFFFFFFF0, SP
  63. SUBQ $32, SP
  64. MOVOU 0(AX), X0
  65. MOVOU 16(AX), X1
  66. MOVOU poly1305Mask<>(SB), X2
  67. PAND X2, X0
  68. MOVO X0, 0(SP)
  69. MOVO X1, 16(SP)
  70. XORQ R8, R8 // h0
  71. XORQ R9, R9 // h1
  72. XORQ R10, R10 // h2
  73. MOVQ 0(SP), R11 // r0
  74. MOVQ 8(SP), R12 // r1
  75. CMPQ R15, $16
  76. JB bytes_between_0_and_15
  77. loop:
  78. POLY1305_ADD(SI, R8, R9, R10)
  79. multiply:
  80. POLY1305_MUL(R8, R9, R10, R11, R12, BX, CX, R13, R14)
  81. SUBQ $16, R15
  82. CMPQ R15, $16
  83. JAE loop
  84. bytes_between_0_and_15:
  85. TESTQ R15, R15
  86. JZ done
  87. MOVQ $1, BX
  88. XORQ CX, CX
  89. XORQ R13, R13
  90. ADDQ R15, SI
  91. flush_buffer:
  92. SHLQ $8, BX, CX
  93. SHLQ $8, BX
  94. MOVB -1(SI), R13
  95. XORQ R13, BX
  96. DECQ SI
  97. DECQ R15
  98. JNZ flush_buffer
  99. ADDQ BX, R8
  100. ADCQ CX, R9
  101. ADCQ $0, R10
  102. MOVQ $16, R15
  103. JMP multiply
  104. done:
  105. MOVQ R8, AX
  106. MOVQ R9, BX
  107. SUBQ $0xFFFFFFFFFFFFFFFB, AX
  108. SBBQ $0xFFFFFFFFFFFFFFFF, BX
  109. SBBQ $3, R10
  110. CMOVQCS R8, AX
  111. CMOVQCS R9, BX
  112. ADDQ 16(SP), AX
  113. ADCQ 24(SP), BX
  114. MOVQ BP, SP
  115. MOVQ AX, 0(DI)
  116. MOVQ BX, 8(DI)
  117. RET