// Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // +build !appengine // +build gc // +build !noasm #include "textflag.h" // The asm code generally follows the pure Go code in encode_other.go, except // where marked with a "!!!". // ---------------------------------------------------------------------------- // func emitLiteral(dst, lit []byte) int // // All local variables fit into registers. The register allocation: // - AX return value // - BX n // - CX len(lit) // - SI &lit[0] // - DI &dst[i] // // The 24 bytes of stack space is to call runtime·memmove. TEXT ·emitLiteral(SB), NOSPLIT, $24-56 MOVQ dst_base+0(FP), DI MOVQ lit_base+24(FP), SI MOVQ lit_len+32(FP), CX MOVQ CX, AX MOVL CX, BX SUBL $1, BX CMPL BX, $60 JLT oneByte CMPL BX, $256 JLT twoBytes threeBytes: MOVB $0xf4, 0(DI) MOVW BX, 1(DI) ADDQ $3, DI ADDQ $3, AX JMP end twoBytes: MOVB $0xf0, 0(DI) MOVB BX, 1(DI) ADDQ $2, DI ADDQ $2, AX JMP end oneByte: SHLB $2, BX MOVB BX, 0(DI) ADDQ $1, DI ADDQ $1, AX end: MOVQ AX, ret+48(FP) // copy(dst[i:], lit) // // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push // DI, SI and CX as arguments. MOVQ DI, 0(SP) MOVQ SI, 8(SP) MOVQ CX, 16(SP) CALL runtime·memmove(SB) RET // ---------------------------------------------------------------------------- // func emitCopy(dst []byte, offset, length int) int // // All local variables fit into registers. The register allocation: // - BX offset // - CX length // - SI &dst[0] // - DI &dst[i] TEXT ·emitCopy(SB), NOSPLIT, $0-48 MOVQ dst_base+0(FP), DI MOVQ DI, SI MOVQ offset+24(FP), BX MOVQ length+32(FP), CX loop0: // for length >= 68 { etc } CMPL CX, $68 JLT step1 // Emit a length 64 copy, encoded as 3 bytes. MOVB $0xfe, 0(DI) MOVW BX, 1(DI) ADDQ $3, DI SUBL $64, CX JMP loop0 step1: // if length > 64 { etc } CMPL CX, $64 JLE step2 // Emit a length 60 copy, encoded as 3 bytes. MOVB $0xee, 0(DI) MOVW BX, 1(DI) ADDQ $3, DI SUBL $60, CX step2: // if length >= 12 || offset >= 2048 { goto step3 } CMPL CX, $12 JGE step3 CMPL BX, $2048 JGE step3 // Emit the remaining copy, encoded as 2 bytes. MOVB BX, 1(DI) SHRL $8, BX SHLB $5, BX SUBB $4, CX SHLB $2, CX ORB CX, BX ORB $1, BX MOVB BX, 0(DI) ADDQ $2, DI // Return the number of bytes written. SUBQ SI, DI MOVQ DI, ret+40(FP) RET step3: // Emit the remaining copy, encoded as 3 bytes. SUBL $1, CX SHLB $2, CX ORB $2, CX MOVB CX, 0(DI) MOVW BX, 1(DI) ADDQ $3, DI // Return the number of bytes written. SUBQ SI, DI MOVQ DI, ret+40(FP) RET // ---------------------------------------------------------------------------- // func extendMatch(src []byte, i, j int) int // // All local variables fit into registers. The register allocation: // - CX &src[0] // - DX &src[len(src)] // - SI &src[i] // - DI &src[j] // - R9 &src[len(src) - 8] TEXT ·extendMatch(SB), NOSPLIT, $0-48 MOVQ src_base+0(FP), CX MOVQ src_len+8(FP), DX MOVQ i+24(FP), SI MOVQ j+32(FP), DI ADDQ CX, DX ADDQ CX, SI ADDQ CX, DI MOVQ DX, R9 SUBQ $8, R9 cmp8: // As long as we are 8 or more bytes before the end of src, we can load and // compare 8 bytes at a time. If those 8 bytes are equal, repeat. CMPQ DI, R9 JA cmp1 MOVQ (SI), AX MOVQ (DI), BX CMPQ AX, BX JNE bsf ADDQ $8, SI ADDQ $8, DI JMP cmp8 bsf: // If those 8 bytes were not equal, XOR the two 8 byte values, and return // the index of the first byte that differs. The BSF instruction finds the // least significant 1 bit, the amd64 architecture is little-endian, and // the shift by 3 converts a bit index to a byte index. XORQ AX, BX BSFQ BX, BX SHRQ $3, BX ADDQ BX, DI // Convert from &src[ret] to ret. SUBQ CX, DI MOVQ DI, ret+40(FP) RET cmp1: // In src's tail, compare 1 byte at a time. CMPQ DI, DX JAE end MOVB (SI), AX MOVB (DI), BX CMPB AX, BX JNE end ADDQ $1, SI ADDQ $1, DI JMP cmp1 end: // Convert from &src[ret] to ret. SUBQ CX, DI MOVQ DI, ret+40(FP) RET