decode_arm.s 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. // +build gc
  2. // +build !noasm
  3. #include "textflag.h"
  4. // Register allocation.
  5. #define dst R0
  6. #define dstorig R1
  7. #define src R2
  8. #define dstend R3
  9. #define srcend R4
  10. #define match R5 // Match address.
  11. #define token R6
  12. #define len R7 // Literal and match lengths.
  13. #define offset R5 // Match offset; overlaps with match.
  14. #define tmp1 R8
  15. #define tmp2 R9
  16. #define tmp3 R12
  17. #define minMatch $4
  18. // func decodeBlock(dst, src []byte) int
  19. TEXT ·decodeBlock(SB), NOFRAME|NOSPLIT, $-4-28
  20. MOVW dst_ptr+0(FP), dst
  21. MOVW dst_len+4(FP), dstend
  22. MOVW src_ptr+12(FP), src
  23. MOVW src_len+16(FP), srcend
  24. CMP $0, srcend
  25. BEQ shortSrc
  26. ADD dst, dstend
  27. ADD src, srcend
  28. MOVW dst, dstorig
  29. loop:
  30. CMP src, srcend
  31. BEQ end
  32. // Read token. Extract literal length.
  33. MOVBU.P 1(src), token
  34. MOVW token >> 4, len
  35. CMP $15, len
  36. BNE readLitlenDone
  37. readLitlenLoop:
  38. CMP src, srcend
  39. BEQ shortSrc
  40. MOVBU.P 1(src), tmp1
  41. ADD tmp1, len
  42. CMP $255, tmp1
  43. BEQ readLitlenLoop
  44. readLitlenDone:
  45. CMP $0, len
  46. BEQ copyLiteralDone
  47. // Bounds check dst+len and src+len.
  48. ADD dst, len, tmp1
  49. ADD src, len, tmp2
  50. CMP dstend, tmp1
  51. BHI shortDst
  52. CMP srcend, tmp2
  53. BHI shortSrc
  54. // Copy literal.
  55. CMP $4, len
  56. BLO copyLiteralFinish
  57. // Copy 0-3 bytes until src is aligned.
  58. TST $1, src
  59. MOVBU.NE.P 1(src), tmp1
  60. MOVB.NE.P tmp1, 1(dst)
  61. SUB.NE $1, len
  62. TST $2, src
  63. MOVHU.NE.P 2(src), tmp2
  64. MOVB.NE.P tmp2, 1(dst)
  65. MOVW.NE tmp2 >> 8, tmp1
  66. MOVB.NE.P tmp1, 1(dst)
  67. SUB.NE $2, len
  68. CMP $4, len
  69. BLO copyLiteralFinish
  70. copyLiteralLoop:
  71. // Aligned load, unaligned write.
  72. SUB $4, len
  73. MOVW.P 4(src), tmp1
  74. MOVW tmp1 >> 8, tmp2
  75. MOVB tmp2, 1(dst)
  76. MOVW tmp1 >> 16, tmp3
  77. MOVB tmp3, 2(dst)
  78. MOVW tmp1 >> 24, tmp2
  79. MOVB tmp2, 3(dst)
  80. MOVB.P tmp1, 4(dst)
  81. CMP $4, len
  82. BHS copyLiteralLoop
  83. copyLiteralFinish:
  84. // Copy remaining 0-3 bytes.
  85. TST $2, len
  86. MOVHU.NE.P 2(src), tmp2
  87. MOVB.NE.P tmp2, 1(dst)
  88. MOVW.NE tmp2 >> 8, tmp1
  89. MOVB.NE.P tmp1, 1(dst)
  90. TST $1, len
  91. MOVBU.NE.P 1(src), tmp1
  92. MOVB.NE.P tmp1, 1(dst)
  93. copyLiteralDone:
  94. CMP src, srcend
  95. BEQ end
  96. // Read offset.
  97. ADD $2, src
  98. CMP srcend, src
  99. BHI shortSrc
  100. MOVBU -2(src), offset
  101. MOVBU -1(src), tmp1
  102. ORR tmp1 << 8, offset
  103. CMP $0, offset
  104. BEQ corrupt
  105. // Read match length.
  106. AND $15, token, len
  107. CMP $15, len
  108. BNE readMatchlenDone
  109. readMatchlenLoop:
  110. CMP src, srcend
  111. BEQ shortSrc
  112. MOVBU.P 1(src), tmp1
  113. ADD tmp1, len
  114. CMP $255, tmp1
  115. BEQ readMatchlenLoop
  116. readMatchlenDone:
  117. ADD minMatch, len
  118. ADD dst, len, tmp1
  119. CMP dstend, tmp1
  120. BHI shortDst
  121. SUB offset, dst, match
  122. CMP dstorig, match
  123. BLO corrupt
  124. copyMatch:
  125. // Simple byte-at-a-time copy.
  126. SUB.S $1, len
  127. MOVBU.P 1(match), tmp2
  128. MOVB.P tmp2, 1(dst)
  129. BNE copyMatch
  130. B loop
  131. // The three error cases have distinct labels so we can put different
  132. // return codes here when debugging, or if the error returns need to
  133. // be changed.
  134. shortDst:
  135. shortSrc:
  136. corrupt:
  137. MOVW $-1, tmp1
  138. MOVW tmp1, ret+24(FP)
  139. RET
  140. end:
  141. SUB dstorig, dst, tmp1
  142. MOVW tmp1, ret+24(FP)
  143. RET