| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308 |
- // Copyright 2016 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- #include "textflag.h"
- // func decode(dst, src []byte) int
- //
- // The asm code generally follows the pure Go code in decode_other.go, except
- // where marked with a "!!!".
- //
- // All local variables fit into registers. The non-zero stack size is only to
- // spill registers and push args when issuing a CALL. The register allocation:
- // - AX scratch
- // - BX scratch
- // - CX length or x
- // - DX offset
- // - SI &src[s]
- // - DI &dst[d]
- // + R8 dst_base
- // + R9 dst_len
- // + R10 dst_base + dst_len
- // + R11 src_base
- // + R12 src_len
- // + R13 src_base + src_len
- // - R14 unused
- // - R15 used by doCopy
- //
- // The registers R8-R13 (marked with a "+") are set at the start of the
- // function, and after a CALL returns, and are not otherwise modified.
- //
- // The d variable is implicitly DI - R8, and len(dst)-d is R10 - DI.
- // The s variable is implicitly SI - R11, and len(src)-s is R13 - SI.
- TEXT ·decode(SB), NOSPLIT, $48-56
- // Initialize SI, DI and R8-R13.
- MOVQ dst_base+0(FP), R8
- MOVQ dst_len+8(FP), R9
- MOVQ R8, DI
- MOVQ R8, R10
- ADDQ R9, R10
- MOVQ src_base+24(FP), R11
- MOVQ src_len+32(FP), R12
- MOVQ R11, SI
- MOVQ R11, R13
- ADDQ R12, R13
- loop:
- // for s < len(src)
- CMPQ SI, R13
- JEQ end
- // CX = uint32(src[s])
- //
- // switch src[s] & 0x03
- MOVBLZX (SI), CX
- MOVL CX, BX
- ANDL $3, BX
- CMPL BX, $1
- JAE tagCopy
- // ----------------------------------------
- // The code below handles literal tags.
- // case tagLiteral:
- // x := uint32(src[s] >> 2)
- // switch
- SHRL $2, CX
- CMPL CX, $60
- JAE tagLit60Plus
- // case x < 60:
- // s++
- INCQ SI
- doLit:
- // This is the end of the inner "switch", when we have a literal tag.
- //
- // We assume that CX == x and x fits in a uint32, where x is the variable
- // used in the pure Go decode_other.go code.
- // length = int(x) + 1
- //
- // Unlike the pure Go code, we don't need to check if length <= 0 because
- // CX can hold 64 bits, so the increment cannot overflow.
- INCQ CX
- // Prepare to check if copying length bytes will run past the end of dst or
- // src.
- //
- // AX = len(dst) - d
- // BX = len(src) - s
- MOVQ R10, AX
- SUBQ DI, AX
- MOVQ R13, BX
- SUBQ SI, BX
- // if length > len(dst)-d || length > len(src)-s { etc }
- CMPQ CX, AX
- JGT errCorrupt
- CMPQ CX, BX
- JGT errCorrupt
- // copy(dst[d:], src[s:s+length])
- //
- // This means calling runtime·memmove(&dst[d], &src[s], length), so we push
- // DI, SI and CX as arguments. Coincidentally, we also need to spill those
- // three registers to the stack, to save local variables across the CALL.
- MOVQ DI, 0(SP)
- MOVQ SI, 8(SP)
- MOVQ CX, 16(SP)
- MOVQ DI, 24(SP)
- MOVQ SI, 32(SP)
- MOVQ CX, 40(SP)
- CALL runtime·memmove(SB)
- // Restore local variables: unspill registers from the stack and
- // re-calculate R8-R13.
- MOVQ 24(SP), DI
- MOVQ 32(SP), SI
- MOVQ 40(SP), CX
- MOVQ dst_base+0(FP), R8
- MOVQ dst_len+8(FP), R9
- MOVQ R8, R10
- ADDQ R9, R10
- MOVQ src_base+24(FP), R11
- MOVQ src_len+32(FP), R12
- MOVQ R11, R13
- ADDQ R12, R13
- // d += length
- // s += length
- ADDQ CX, DI
- ADDQ CX, SI
- JMP loop
- tagLit60Plus:
- // !!! This fragment does the
- //
- // s += x - 58; if uint(s) > uint(len(src)) { etc }
- //
- // checks. In the asm version, we code it once instead of once per switch case.
- ADDQ CX, SI
- SUBQ $58, SI
- MOVQ SI, BX
- SUBQ R11, BX
- CMPQ BX, R12
- JA errCorrupt
- // case x == 60:
- CMPL CX, $61
- JEQ tagLit61
- JA tagLit62Plus
- // x = uint32(src[s-1])
- MOVBLZX -1(SI), CX
- JMP doLit
- tagLit61:
- // case x == 61:
- // x = uint32(src[s-2]) | uint32(src[s-1])<<8
- MOVWLZX -2(SI), CX
- JMP doLit
- tagLit62Plus:
- CMPL CX, $62
- JA tagLit63
- // case x == 62:
- // x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
- MOVWLZX -3(SI), CX
- MOVBLZX -1(SI), BX
- SHLL $16, BX
- ORL BX, CX
- JMP doLit
- tagLit63:
- // case x == 63:
- // x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
- MOVL -4(SI), CX
- JMP doLit
- // The code above handles literal tags.
- // ----------------------------------------
- // The code below handles copy tags.
- tagCopy2:
- // case tagCopy2:
- // s += 3
- ADDQ $3, SI
- // if uint(s) > uint(len(src)) { etc }
- MOVQ SI, BX
- SUBQ R11, BX
- CMPQ BX, R12
- JA errCorrupt
- // length = 1 + int(src[s-3])>>2
- SHRQ $2, CX
- INCQ CX
- // offset = int(src[s-2]) | int(src[s-1])<<8
- MOVWQZX -2(SI), DX
- JMP doCopy
- tagCopy:
- // We have a copy tag. We assume that:
- // - BX == src[s] & 0x03
- // - CX == src[s]
- CMPQ BX, $2
- JEQ tagCopy2
- JA errUC4T
- // case tagCopy1:
- // s += 2
- ADDQ $2, SI
- // if uint(s) > uint(len(src)) { etc }
- MOVQ SI, BX
- SUBQ R11, BX
- CMPQ BX, R12
- JA errCorrupt
- // offset = int(src[s-2])&0xe0<<3 | int(src[s-1])
- MOVQ CX, DX
- ANDQ $0xe0, DX
- SHLQ $3, DX
- MOVBQZX -1(SI), BX
- ORQ BX, DX
- // length = 4 + int(src[s-2])>>2&0x7
- SHRQ $2, CX
- ANDQ $7, CX
- ADDQ $4, CX
- doCopy:
- // This is the end of the outer "switch", when we have a copy tag.
- //
- // We assume that:
- // - CX == length && CX > 0
- // - DX == offset
- // if offset <= 0 { etc }
- CMPQ DX, $0
- JLE errCorrupt
- // if d < offset { etc }
- MOVQ DI, BX
- SUBQ R8, BX
- CMPQ BX, DX
- JLT errCorrupt
- // if length > len(dst)-d { etc }
- MOVQ R10, BX
- SUBQ DI, BX
- CMPQ CX, BX
- JGT errCorrupt
- // forwardCopy(dst[d:d+length], dst[d-offset:]); d += length
- //
- // Set:
- // - R15 = &dst[d-offset]
- MOVQ DI, R15
- SUBQ DX, R15
- verySlowForwardCopy:
- // verySlowForwardCopy is a simple implementation of forward copy. In C
- // parlance, this is a do/while loop instead of a while loop, since we know
- // that length > 0. In Go syntax:
- //
- // for {
- // dst[d] = dst[d - offset]
- // d++
- // length--
- // if length == 0 {
- // break
- // }
- // }
- MOVB (R15), BX
- MOVB BX, (DI)
- INCQ R15
- INCQ DI
- DECQ CX
- JNZ verySlowForwardCopy
- JMP loop
- // The code above handles copy tags.
- // ----------------------------------------
- end:
- // This is the end of the "for s < len(src)".
- //
- // if d != len(dst) { etc }
- CMPQ DI, R10
- JNE errCorrupt
- // return 0
- MOVQ $0, ret+48(FP)
- RET
- errCorrupt:
- // return decodeErrCodeCorrupt
- MOVQ $1, ret+48(FP)
- RET
- errUC4T:
- // return decodeErrCodeUnsupportedCopy4Tag
- MOVQ $3, ret+48(FP)
- RET
|