encode_amd64.s 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. // Copyright 2016 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // +build !appengine
  5. // +build gc
  6. // +build !noasm
  7. #include "textflag.h"
  8. // The asm code generally follows the pure Go code in encode_other.go, except
  9. // where marked with a "!!!".
  10. // ----------------------------------------------------------------------------
  11. // func emitCopy(dst []byte, offset, length int) int
  12. //
  13. // All local variables fit into registers. The register allocation:
  14. // - BX offset
  15. // - CX length
  16. // - SI &dst[0]
  17. // - DI &dst[i]
  18. TEXT ·emitCopy(SB), NOSPLIT, $0-48
  19. MOVQ dst_base+0(FP), DI
  20. MOVQ DI, SI
  21. MOVQ offset+24(FP), BX
  22. MOVQ length+32(FP), CX
  23. loop0:
  24. // for length >= 68 { etc }
  25. CMPL CX, $68
  26. JLT step1
  27. // Emit a length 64 copy, encoded as 3 bytes.
  28. MOVB $0xfe, 0(DI)
  29. MOVW BX, 1(DI)
  30. ADDQ $3, DI
  31. SUBL $64, CX
  32. JMP loop0
  33. step1:
  34. // if length > 64 { etc }
  35. CMPL CX, $64
  36. JLE step2
  37. // Emit a length 60 copy, encoded as 3 bytes.
  38. MOVB $0xee, 0(DI)
  39. MOVW BX, 1(DI)
  40. ADDQ $3, DI
  41. SUBL $60, CX
  42. step2:
  43. // if length >= 12 || offset >= 2048 { goto step3 }
  44. CMPL CX, $12
  45. JGE step3
  46. CMPL BX, $2048
  47. JGE step3
  48. // Emit the remaining copy, encoded as 2 bytes.
  49. MOVB BX, 1(DI)
  50. SHRL $8, BX
  51. SHLB $5, BX
  52. SUBB $4, CX
  53. SHLB $2, CX
  54. ORB CX, BX
  55. ORB $1, BX
  56. MOVB BX, 0(DI)
  57. ADDQ $2, DI
  58. // Return the number of bytes written.
  59. SUBQ SI, DI
  60. MOVQ DI, ret+40(FP)
  61. RET
  62. step3:
  63. // Emit the remaining copy, encoded as 3 bytes.
  64. SUBL $1, CX
  65. SHLB $2, CX
  66. ORB $2, CX
  67. MOVB CX, 0(DI)
  68. MOVW BX, 1(DI)
  69. ADDQ $3, DI
  70. // Return the number of bytes written.
  71. SUBQ SI, DI
  72. MOVQ DI, ret+40(FP)
  73. RET
  74. // ----------------------------------------------------------------------------
  75. // func extendMatch(src []byte, i, j int) int
  76. //
  77. // All local variables fit into registers. The register allocation:
  78. // - CX &src[0]
  79. // - DX &src[len(src)]
  80. // - SI &src[i]
  81. // - DI &src[j]
  82. // - R9 &src[len(src) - 8]
  83. TEXT ·extendMatch(SB), NOSPLIT, $0-48
  84. MOVQ src_base+0(FP), CX
  85. MOVQ src_len+8(FP), DX
  86. MOVQ i+24(FP), SI
  87. MOVQ j+32(FP), DI
  88. ADDQ CX, DX
  89. ADDQ CX, SI
  90. ADDQ CX, DI
  91. MOVQ DX, R9
  92. SUBQ $8, R9
  93. cmp8:
  94. // As long as we are 8 or more bytes before the end of src, we can load and
  95. // compare 8 bytes at a time. If those 8 bytes are equal, repeat.
  96. CMPQ DI, R9
  97. JA cmp1
  98. MOVQ (SI), AX
  99. MOVQ (DI), BX
  100. CMPQ AX, BX
  101. JNE bsf
  102. ADDQ $8, SI
  103. ADDQ $8, DI
  104. JMP cmp8
  105. bsf:
  106. // If those 8 bytes were not equal, XOR the two 8 byte values, and return
  107. // the index of the first byte that differs. The BSF instruction finds the
  108. // least significant 1 bit, the amd64 architecture is little-endian, and
  109. // the shift by 3 converts a bit index to a byte index.
  110. XORQ AX, BX
  111. BSFQ BX, BX
  112. SHRQ $3, BX
  113. ADDQ BX, DI
  114. // Convert from &src[ret] to ret.
  115. SUBQ CX, DI
  116. MOVQ DI, ret+40(FP)
  117. RET
  118. cmp1:
  119. // In src's tail, compare 1 byte at a time.
  120. CMPQ DI, DX
  121. JAE end
  122. MOVB (SI), AX
  123. MOVB (DI), BX
  124. CMPB AX, BX
  125. JNE end
  126. ADDQ $1, SI
  127. ADDQ $1, DI
  128. JMP cmp1
  129. end:
  130. // Convert from &src[ret] to ret.
  131. SUBQ CX, DI
  132. MOVQ DI, ret+40(FP)
  133. RET