poly1305_amd64.s 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. // Copyright 2012 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // +build amd64,!gccgo,!appengine
  5. #include "textflag.h"
  6. #define POLY1305_ADD(msg, h0, h1, h2) \
  7. ADDQ 0(msg), h0; \
  8. ADCQ 8(msg), h1; \
  9. ADCQ $1, h2; \
  10. LEAQ 16(msg), msg
  11. #define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3) \
  12. MOVQ r0, AX; \
  13. MULQ h0; \
  14. MOVQ AX, t0; \
  15. MOVQ DX, t1; \
  16. MOVQ r0, AX; \
  17. MULQ h1; \
  18. ADDQ AX, t1; \
  19. ADCQ $0, DX; \
  20. MOVQ r0, t2; \
  21. IMULQ h2, t2; \
  22. ADDQ DX, t2; \
  23. \
  24. MOVQ r1, AX; \
  25. MULQ h0; \
  26. ADDQ AX, t1; \
  27. ADCQ $0, DX; \
  28. MOVQ DX, h0; \
  29. MOVQ r1, t3; \
  30. IMULQ h2, t3; \
  31. MOVQ r1, AX; \
  32. MULQ h1; \
  33. ADDQ AX, t2; \
  34. ADCQ DX, t3; \
  35. ADDQ h0, t2; \
  36. ADCQ $0, t3; \
  37. \
  38. MOVQ t0, h0; \
  39. MOVQ t1, h1; \
  40. MOVQ t2, h2; \
  41. ANDQ $3, h2; \
  42. MOVQ t2, t0; \
  43. ANDQ $0xFFFFFFFFFFFFFFFC, t0; \
  44. ADDQ t0, h0; \
  45. ADCQ t3, h1; \
  46. ADCQ $0, h2; \
  47. SHRQ $2, t3, t2; \
  48. SHRQ $2, t3; \
  49. ADDQ t2, h0; \
  50. ADCQ t3, h1; \
  51. ADCQ $0, h2
  52. DATA poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
  53. DATA poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
  54. GLOBL poly1305Mask<>(SB), RODATA, $16
  55. // func poly1305(out *[16]byte, m *byte, mlen uint64, key *[32]key)
  56. TEXT ·poly1305(SB), $0-32
  57. MOVQ out+0(FP), DI
  58. MOVQ m+8(FP), SI
  59. MOVQ mlen+16(FP), R15
  60. MOVQ key+24(FP), AX
  61. MOVQ SP, BP
  62. ANDQ $0xFFFFFFFFFFFFFFF0, SP
  63. SUBQ $32, SP
  64. MOVOU 0(AX), X0
  65. MOVOU 16(AX), X1
  66. MOVOU poly1305Mask<>(SB), X2
  67. PAND X2, X0
  68. MOVO X0, 0(SP)
  69. MOVO X1, 16(SP)
  70. XORQ R8, R8 // h0
  71. XORQ R9, R9 // h1
  72. XORQ R10, R10 // h2
  73. MOVQ 0(SP), R11 // r0
  74. MOVQ 8(SP), R12 // r1
  75. CMPQ R15, $16
  76. JB bytes_between_0_and_15
  77. loop:
  78. POLY1305_ADD(SI, R8, R9, R10)
  79. multiply:
  80. POLY1305_MUL(R8, R9, R10, R11, R12, BX, CX, R13, R14)
  81. SUBQ $16, R15
  82. CMPQ R15, $16
  83. JAE loop
  84. bytes_between_0_and_15:
  85. TESTQ R15, R15
  86. JZ done
  87. MOVQ $1, BX
  88. XORQ CX, CX
  89. XORQ R13, R13
  90. ADDQ R15, SI
  91. flush_buffer:
  92. SHLQ $8, BX, CX
  93. SHLQ $8, BX
  94. MOVB -1(SI), R13
  95. XORQ R13, BX
  96. DECQ SI
  97. DECQ R15
  98. JNZ flush_buffer
  99. ADDQ BX, R8
  100. ADCQ CX, R9
  101. ADCQ $0, R10
  102. MOVQ $16, R15
  103. JMP multiply
  104. done:
  105. MOVQ R8, AX
  106. MOVQ R9, BX
  107. SUBQ $0xFFFFFFFFFFFFFFFB, AX
  108. SBBQ $0xFFFFFFFFFFFFFFFF, BX
  109. CMOVQCS R8, AX
  110. CMOVQCS R9, BX
  111. ADDQ 16(SP), AX
  112. ADCQ 24(SP), BX
  113. MOVQ BP, SP
  114. MOVQ AX, 0(DI)
  115. MOVQ BX, 8(DI)
  116. RET