encode_amd64.s 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. // Copyright 2016 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // +build !appengine
  5. // +build gc
  6. // +build !noasm
  7. #include "textflag.h"
  8. // The asm code generally follows the pure Go code in encode_other.go, except
  9. // where marked with a "!!!".
  10. // func extendMatch(src []byte, i, j int) int
  11. //
  12. // All local variables fit into registers. The register allocation:
  13. // - CX &src[0]
  14. // - DX &src[len(src)]
  15. // - SI &src[i]
  16. // - DI &src[j]
  17. // - R9 &src[len(src) - 8]
  18. TEXT ·extendMatch(SB), NOSPLIT, $0-48
  19. MOVQ src_base+0(FP), CX
  20. MOVQ src_len+8(FP), DX
  21. MOVQ i+24(FP), SI
  22. MOVQ j+32(FP), DI
  23. ADDQ CX, DX
  24. ADDQ CX, SI
  25. ADDQ CX, DI
  26. MOVQ DX, R9
  27. SUBQ $8, R9
  28. cmp8:
  29. // As long as we are 8 or more bytes before the end of src, we can load and
  30. // compare 8 bytes at a time. If those 8 bytes are equal, repeat.
  31. CMPQ DI, R9
  32. JA cmp1
  33. MOVQ (SI), AX
  34. MOVQ (DI), BX
  35. CMPQ AX, BX
  36. JNE bsf
  37. ADDQ $8, SI
  38. ADDQ $8, DI
  39. JMP cmp8
  40. bsf:
  41. // If those 8 bytes were not equal, XOR the two 8 byte values, and return
  42. // the index of the first byte that differs. The BSF instruction finds the
  43. // least significant 1 bit, the amd64 architecture is little-endian, and
  44. // the shift by 3 converts a bit index to a byte index.
  45. XORQ AX, BX
  46. BSFQ BX, BX
  47. SHRQ $3, BX
  48. ADDQ BX, DI
  49. // Convert from &src[ret] to ret.
  50. SUBQ CX, DI
  51. MOVQ DI, ret+40(FP)
  52. RET
  53. cmp1:
  54. // In src's tail, compare 1 byte at a time.
  55. CMPQ DI, DX
  56. JAE end
  57. MOVB (SI), AX
  58. MOVB (DI), BX
  59. CMPB AX, BX
  60. JNE end
  61. ADDQ $1, SI
  62. ADDQ $1, DI
  63. JMP cmp1
  64. end:
  65. // Convert from &src[ret] to ret.
  66. SUBQ CX, DI
  67. MOVQ DI, ret+40(FP)
  68. RET