// Code generated by command: go run gen.go -out encodeblock_amd64.s -stubs encodeblock_amd64.go. DO NOT EDIT. // +build !appengine // +build !noasm // +build gc #include "textflag.h" // func encodeBlockAsm(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBlockAsm(SB), $65560-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000200, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeBlockAsm: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeBlockAsm MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeBlockAsm: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x06, BP LEAL 4(CX)(BP*1), BP MOVL 8(SP), DI CMPL BP, DI JGT emit_remainder_encodeBlockAsm MOVL BP, 20(SP) MOVQ $0x0000cf1bbcdcbf9b, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x10, R9 IMULQ R8, R9 SHRQ $0x32, R9 SHLQ $0x10, R10 IMULQ R8, R10 SHRQ $0x32, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x10, R9 IMULQ R8, R9 SHRQ $0x32, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeBlockAsm LEAL 1(CX), SI MOVL 12(SP), DI MOVL SI, BP SUBL 16(SP), BP JZ repeat_extend_back_end_encodeBlockAsm repeat_extend_back_loop_encodeBlockAsm: CMPL SI, DI JLE repeat_extend_back_end_encodeBlockAsm MOVB -1(DX)(BP*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeBlockAsm LEAL -1(SI), SI DECL BP JNZ repeat_extend_back_loop_encodeBlockAsm repeat_extend_back_end_encodeBlockAsm: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsm MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(BP*1), R9 SUBL BP, R8 MOVL R8, BP SUBL $0x01, BP JC emit_literal_done_repeat_emit_encodeBlockAsm CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeBlockAsm CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm CMPL BP, $0x00010000 JLT three_bytes_repeat_emit_encodeBlockAsm CMPL BP, $0x01000000 JLT four_bytes_repeat_emit_encodeBlockAsm MOVB $0xfc, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP memmove_repeat_emit_encodeBlockAsm four_bytes_repeat_emit_encodeBlockAsm: MOVL BP, R10 SHRL $0x10, R10 MOVB $0xf8, (AX) MOVW BP, 1(AX) MOVB R10, 3(AX) ADDQ $0x04, AX JMP memmove_repeat_emit_encodeBlockAsm three_bytes_repeat_emit_encodeBlockAsm: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_repeat_emit_encodeBlockAsm two_bytes_repeat_emit_encodeBlockAsm: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX JMP memmove_repeat_emit_encodeBlockAsm one_byte_repeat_emit_encodeBlockAsm: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeBlockAsm: LEAQ (AX)(R8*1), BP NOP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_tail: TESTQ R8, R8 JEQ memmove_end_copy_repeat_emit_encodeBlockAsm CMPQ R8, $0x02 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_1or2 CMPQ R8, $0x04 JB emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_3 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_4 CMPQ R8, $0x08 JB emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_5through7 JE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_9through16 CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32 CMPQ R8, $0x40 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64 CMPQ R8, $0x80 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_65through128 CMPQ R8, $0x00000100 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_129through256 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_256through2048 emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_1or2: MOVB (R9), R10 MOVB -1(R9)(R8*1), R9 MOVB R10, (AX) MOVB R9, -1(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_4: MOVL (R9), R10 MOVL R10, (AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_3: MOVW (R9), R10 MOVB 2(R9), R9 MOVW R10, (AX) MOVB R9, 2(AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_5through7: MOVL (R9), R10 MOVL -4(R9)(R8*1), R9 MOVL R10, (AX) MOVL R9, -4(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8: MOVQ (R9), R10 MOVQ R10, (AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_9through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R9 MOVQ R10, (AX) MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_65through128: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU 32(R9), X2 MOVOU 48(R9), X3 MOVOU -64(R9)(R8*1), X12 MOVOU -48(R9)(R8*1), X13 MOVOU -32(R9)(R8*1), X14 MOVOU -16(R9)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_129through256: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU 32(R9), X2 MOVOU 48(R9), X3 MOVOU 64(R9), X4 MOVOU 80(R9), X5 MOVOU 96(R9), X6 MOVOU 112(R9), X7 MOVOU -128(R9)(R8*1), X8 MOVOU -112(R9)(R8*1), X9 MOVOU -96(R9)(R8*1), X10 MOVOU -80(R9)(R8*1), X11 MOVOU -64(R9)(R8*1), X12 MOVOU -48(R9)(R8*1), X13 MOVOU -32(R9)(R8*1), X14 MOVOU -16(R9)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(R8*1) MOVOU X9, -112(AX)(R8*1) MOVOU X10, -96(AX)(R8*1) MOVOU X11, -80(AX)(R8*1) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_256through2048: LEAQ -256(R8), R8 MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU 32(R9), X2 MOVOU 48(R9), X3 MOVOU 64(R9), X4 MOVOU 80(R9), X5 MOVOU 96(R9), X6 MOVOU 112(R9), X7 MOVOU 128(R9), X8 MOVOU 144(R9), X9 MOVOU 160(R9), X10 MOVOU 176(R9), X11 MOVOU 192(R9), X12 MOVOU 208(R9), X13 MOVOU 224(R9), X14 MOVOU 240(R9), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ R8, $0x00000100 LEAQ 256(R9), R9 LEAQ 256(AX), AX JGE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_256through2048 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_tail memmove_end_copy_repeat_emit_encodeBlockAsm: MOVQ BP, AX emit_literal_done_repeat_emit_encodeBlockAsm: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), R8 SUBL CX, R8 LEAQ (DX)(CX*1), R9 LEAQ (DX)(BP*1), BP XORL R11, R11 CMPL R8, $0x08 JL matchlen_single_repeat_extend matchlen_loopback_repeat_extend: MOVQ (R9)(R11*1), R10 XORQ (BP)(R11*1), R10 TESTQ R10, R10 JZ matchlen_loop_repeat_extend BSFQ R10, R10 SARQ $0x03, R10 LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeBlockAsm matchlen_loop_repeat_extend: LEAL -8(R8), R8 LEAL 8(R11), R11 CMPL R8, $0x08 JGE matchlen_loopback_repeat_extend matchlen_single_repeat_extend: TESTL R8, R8 JZ repeat_extend_forward_end_encodeBlockAsm matchlen_single_loopback_repeat_extend: MOVB (R9)(R11*1), R10 CMPB (BP)(R11*1), R10 JNE repeat_extend_forward_end_encodeBlockAsm LEAL 1(R11), R11 DECL R8 JNZ matchlen_single_loopback_repeat_extend repeat_extend_forward_end_encodeBlockAsm: ADDL R11, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI TESTL DI, DI JZ repeat_as_copy_encodeBlockAsm emit_repeat_again_match_repeat_encodeBlockAsm: MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_match_repeat_encodeBlockAsm CMPL DI, $0x0c JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm CMPL SI, $0x00000800 JLT repeat_two_offset_match_repeat_encodeBlockAsm cant_repeat_two_offset_match_repeat_encodeBlockAsm: CMPL BP, $0x00000104 JLT repeat_three_match_repeat_encodeBlockAsm CMPL BP, $0x00010100 JLT repeat_four_match_repeat_encodeBlockAsm CMPL BP, $0x0100ffff JLT repeat_five_match_repeat_encodeBlockAsm LEAL -16842747(BP), BP MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_repeat_encodeBlockAsm repeat_five_match_repeat_encodeBlockAsm: LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm repeat_four_match_repeat_encodeBlockAsm: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm repeat_three_match_repeat_encodeBlockAsm: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm repeat_two_match_repeat_encodeBlockAsm: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_match_repeat_encodeBlockAsm: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm repeat_as_copy_encodeBlockAsm: CMPL SI, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeBlockAsm four_bytes_loop_back_repeat_as_copy_encodeBlockAsm: CMPL BP, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm MOVB $0xff, (AX) MOVL SI, 1(AX) LEAL -64(BP), BP ADDQ $0x05, AX CMPL BP, $0x04 JL four_bytes_remain_repeat_as_copy_encodeBlockAsm emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy: MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy: CMPL BP, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy CMPL BP, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy CMPL BP, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy LEAL -16842747(BP), BP MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy: LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm four_bytes_remain_repeat_as_copy_encodeBlockAsm: TESTL BP, BP JZ repeat_end_emit_encodeBlockAsm MOVB $0x03, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVL SI, 1(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm two_byte_offset_repeat_as_copy_encodeBlockAsm: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short: MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short: CMPL BP, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short CMPL BP, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short CMPL BP, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short LEAL -16842747(BP), BP MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short: LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm JMP two_byte_offset_repeat_as_copy_encodeBlockAsm two_byte_offset_short_repeat_as_copy_encodeBlockAsm: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsm CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm emit_copy_three_repeat_as_copy_encodeBlockAsm: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeBlockAsm: MOVL CX, 12(SP) CMPL CX, 8(SP) JGE emit_remainder_encodeBlockAsm JMP search_loop_encodeBlockAsm no_repeat_found_encodeBlockAsm: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeBlockAsm SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeBlockAsm MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeBlockAsm MOVL 20(SP), CX JMP search_loop_encodeBlockAsm candidate3_match_encodeBlockAsm: ADDL $0x02, CX JMP candidate_match_encodeBlockAsm candidate2_match_encodeBlockAsm: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeBlockAsm: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeBlockAsm match_extend_back_loop_encodeBlockAsm: CMPL CX, SI JLE match_extend_back_end_encodeBlockAsm MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeBlockAsm LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeBlockAsm JMP match_extend_back_loop_encodeBlockAsm match_extend_back_end_encodeBlockAsm: MOVL CX, SI SUBL 12(SP), SI LEAQ 4(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeBlockAsm MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeBlockAsm MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 MOVL R8, DI SUBL $0x01, DI JC emit_literal_done_match_emit_encodeBlockAsm CMPL DI, $0x3c JLT one_byte_match_emit_encodeBlockAsm CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm CMPL DI, $0x00010000 JLT three_bytes_match_emit_encodeBlockAsm CMPL DI, $0x01000000 JLT four_bytes_match_emit_encodeBlockAsm MOVB $0xfc, (AX) MOVL DI, 1(AX) ADDQ $0x05, AX JMP memmove_match_emit_encodeBlockAsm four_bytes_match_emit_encodeBlockAsm: MOVL DI, R9 SHRL $0x10, R9 MOVB $0xf8, (AX) MOVW DI, 1(AX) MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_match_emit_encodeBlockAsm three_bytes_match_emit_encodeBlockAsm: MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_match_emit_encodeBlockAsm two_bytes_match_emit_encodeBlockAsm: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX JMP memmove_match_emit_encodeBlockAsm one_byte_match_emit_encodeBlockAsm: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBlockAsm: LEAQ (AX)(R8*1), DI NOP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_tail: TESTQ R8, R8 JEQ memmove_end_copy_match_emit_encodeBlockAsm CMPQ R8, $0x02 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_1or2 CMPQ R8, $0x04 JB emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_3 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_4 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_5through7 JE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_9through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32 CMPQ R8, $0x40 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64 CMPQ R8, $0x80 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_65through128 CMPQ R8, $0x00000100 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_129through256 JMP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_256through2048 emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), SI MOVB R9, (AX) MOVB SI, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_4: MOVL (SI), R9 MOVL R9, (AX) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), SI MOVW R9, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_5through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), SI MOVL R9, (AX) MOVL SI, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8: MOVQ (SI), R9 MOVQ R9, (AX) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_9through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), SI MOVQ R9, (AX) MOVQ SI, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_65through128: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_129through256: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU -128(SI)(R8*1), X8 MOVOU -112(SI)(R8*1), X9 MOVOU -96(SI)(R8*1), X10 MOVOU -80(SI)(R8*1), X11 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(R8*1) MOVOU X9, -112(AX)(R8*1) MOVOU X10, -96(AX)(R8*1) MOVOU X11, -80(AX)(R8*1) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_256through2048: LEAQ -256(R8), R8 MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU 128(SI), X8 MOVOU 144(SI), X9 MOVOU 160(SI), X10 MOVOU 176(SI), X11 MOVOU 192(SI), X12 MOVOU 208(SI), X13 MOVOU 224(SI), X14 MOVOU 240(SI), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ R8, $0x00000100 LEAQ 256(SI), SI LEAQ 256(AX), AX JGE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_256through2048 JMP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_tail memmove_end_copy_match_emit_encodeBlockAsm: MOVQ DI, AX emit_literal_done_match_emit_encodeBlockAsm: match_nolit_loop_encodeBlockAsm: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeBlockAsm matchlen_loopback_match_nolit_encodeBlockAsm: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeBlockAsm BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeBlockAsm matchlen_loop_match_nolit_encodeBlockAsm: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsm matchlen_single_match_nolit_encodeBlockAsm: TESTL SI, SI JZ match_nolit_end_encodeBlockAsm matchlen_single_loopback_match_nolit_encodeBlockAsm: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeBlockAsm LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm match_nolit_end_encodeBlockAsm: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 CMPL BP, $0x00010000 JL two_byte_offset_match_nolit_encodeBlockAsm four_bytes_loop_back_match_nolit_encodeBlockAsm: CMPL R9, $0x40 JLE four_bytes_remain_match_nolit_encodeBlockAsm MOVB $0xff, (AX) MOVL BP, 1(AX) LEAL -64(R9), R9 ADDQ $0x05, AX CMPL R9, $0x04 JL four_bytes_remain_match_nolit_encodeBlockAsm emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy: MOVL R9, SI LEAL -4(R9), R9 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm_emit_copy CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy: CMPL R9, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm_emit_copy CMPL R9, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm_emit_copy CMPL R9, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm_emit_copy LEAL -16842747(R9), R9 MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy repeat_five_match_nolit_encodeBlockAsm_emit_copy: LEAL -65536(R9), R9 MOVL R9, BP MOVW $0x001d, (AX) MOVW R9, 2(AX) SARL $0x10, BP MOVB BP, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_four_match_nolit_encodeBlockAsm_emit_copy: LEAL -256(R9), R9 MOVW $0x0019, (AX) MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_three_match_nolit_encodeBlockAsm_emit_copy: LEAL -4(R9), R9 MOVW $0x0015, (AX) MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_match_nolit_encodeBlockAsm_emit_copy: SHLL $0x02, R9 ORL $0x01, R9 MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy: XORQ SI, SI LEAL 1(SI)(R9*4), R9 MOVB BP, 1(AX) SARL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm JMP four_bytes_loop_back_match_nolit_encodeBlockAsm four_bytes_remain_match_nolit_encodeBlockAsm: TESTL R9, R9 JZ match_nolit_emitcopy_end_encodeBlockAsm MOVB $0x03, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm two_byte_offset_match_nolit_encodeBlockAsm: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsm MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short: MOVL R9, SI LEAL -4(R9), R9 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short: CMPL R9, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm_emit_copy_short CMPL R9, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm_emit_copy_short CMPL R9, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm_emit_copy_short LEAL -16842747(R9), R9 MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short repeat_five_match_nolit_encodeBlockAsm_emit_copy_short: LEAL -65536(R9), R9 MOVL R9, BP MOVW $0x001d, (AX) MOVW R9, 2(AX) SARL $0x10, BP MOVB BP, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_four_match_nolit_encodeBlockAsm_emit_copy_short: LEAL -256(R9), R9 MOVW $0x0019, (AX) MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_three_match_nolit_encodeBlockAsm_emit_copy_short: LEAL -4(R9), R9 MOVW $0x0015, (AX) MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_match_nolit_encodeBlockAsm_emit_copy_short: SHLL $0x02, R9 ORL $0x01, R9 MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short: XORQ SI, SI LEAL 1(SI)(R9*4), R9 MOVB BP, 1(AX) SARL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm JMP two_byte_offset_match_nolit_encodeBlockAsm two_byte_offset_short_match_nolit_encodeBlockAsm: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsm CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeBlockAsm MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm emit_copy_three_match_nolit_encodeBlockAsm: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeBlockAsm: MOVL CX, 12(SP) CMPL CX, 8(SP) JGE emit_remainder_encodeBlockAsm CMPQ AX, (SP) JL match_nolit_dst_ok_encodeBlockAsm MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm: MOVQ -2(DX)(CX*1), SI MOVQ $0x0000cf1bbcdcbf9b, BP MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, R8 SHLQ $0x10, DI IMULQ BP, DI SHRQ $0x32, DI SHLQ $0x10, R8 IMULQ BP, R8 SHRQ $0x32, R8 LEAL -2(CX), R9 MOVL 24(SP)(R8*4), BP MOVL R9, 24(SP)(DI*4) MOVL CX, 24(SP)(R8*4) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeBlockAsm INCL CX JMP search_loop_encodeBlockAsm emit_remainder_encodeBlockAsm: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 4(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeBlockAsm MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsm: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeBlockAsm MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP MOVL BP, DX SUBL $0x01, DX JC emit_literal_done_emit_remainder_encodeBlockAsm CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsm CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeBlockAsm CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeBlockAsm MOVB $0xfc, (AX) MOVL DX, 1(AX) ADDQ $0x05, AX JMP memmove_emit_remainder_encodeBlockAsm four_bytes_emit_remainder_encodeBlockAsm: MOVL DX, BX SHRL $0x10, BX MOVB $0xf8, (AX) MOVW DX, 1(AX) MOVB BL, 3(AX) ADDQ $0x04, AX JMP memmove_emit_remainder_encodeBlockAsm three_bytes_emit_remainder_encodeBlockAsm: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_emit_remainder_encodeBlockAsm two_bytes_emit_remainder_encodeBlockAsm: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX JMP memmove_emit_remainder_encodeBlockAsm one_byte_emit_remainder_encodeBlockAsm: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeBlockAsm: LEAQ (AX)(BP*1), DX MOVL BP, BX NOP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_tail: TESTQ BX, BX JEQ memmove_end_copy_emit_remainder_encodeBlockAsm CMPQ BX, $0x02 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2 CMPQ BX, $0x04 JB emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_5through7 JE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_9through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32 CMPQ BX, $0x40 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64 CMPQ BX, $0x80 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_65through128 CMPQ BX, $0x00000100 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_129through256 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_256through2048 emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4: MOVL (CX), BP MOVL BP, (AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_5through7: MOVL (CX), BP MOVL -4(CX)(BX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8: MOVQ (CX), BP MOVQ BP, (AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_9through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_65through128: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU -64(CX)(BX*1), X12 MOVOU -48(CX)(BX*1), X13 MOVOU -32(CX)(BX*1), X14 MOVOU -16(CX)(BX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(BX*1) MOVOU X13, -48(AX)(BX*1) MOVOU X14, -32(AX)(BX*1) MOVOU X15, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_129through256: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU -128(CX)(BX*1), X8 MOVOU -112(CX)(BX*1), X9 MOVOU -96(CX)(BX*1), X10 MOVOU -80(CX)(BX*1), X11 MOVOU -64(CX)(BX*1), X12 MOVOU -48(CX)(BX*1), X13 MOVOU -32(CX)(BX*1), X14 MOVOU -16(CX)(BX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(BX*1) MOVOU X9, -112(AX)(BX*1) MOVOU X10, -96(AX)(BX*1) MOVOU X11, -80(AX)(BX*1) MOVOU X12, -64(AX)(BX*1) MOVOU X13, -48(AX)(BX*1) MOVOU X14, -32(AX)(BX*1) MOVOU X15, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_256through2048: LEAQ -256(BX), BX MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU 128(CX), X8 MOVOU 144(CX), X9 MOVOU 160(CX), X10 MOVOU 176(CX), X11 MOVOU 192(CX), X12 MOVOU 208(CX), X13 MOVOU 224(CX), X14 MOVOU 240(CX), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ BX, $0x00000100 LEAQ 256(CX), CX LEAQ 256(AX), AX JGE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_256through2048 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_tail memmove_end_copy_emit_remainder_encodeBlockAsm: MOVQ DX, AX emit_literal_done_emit_remainder_encodeBlockAsm: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeBlockAsm12B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBlockAsm12B(SB), $16408-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000080, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeBlockAsm12B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeBlockAsm12B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeBlockAsm12B: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x05, BP LEAL 4(CX)(BP*1), BP MOVL 8(SP), DI CMPL BP, DI JGT emit_remainder_encodeBlockAsm12B MOVL BP, 20(SP) MOVQ $0x000000cf1bbcdcbb, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x18, R9 IMULQ R8, R9 SHRQ $0x34, R9 SHLQ $0x18, R10 IMULQ R8, R10 SHRQ $0x34, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x18, R9 IMULQ R8, R9 SHRQ $0x34, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeBlockAsm12B LEAL 1(CX), SI MOVL 12(SP), DI MOVL SI, BP SUBL 16(SP), BP JZ repeat_extend_back_end_encodeBlockAsm12B repeat_extend_back_loop_encodeBlockAsm12B: CMPL SI, DI JLE repeat_extend_back_end_encodeBlockAsm12B MOVB -1(DX)(BP*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeBlockAsm12B LEAL -1(SI), SI DECL BP JNZ repeat_extend_back_loop_encodeBlockAsm12B repeat_extend_back_end_encodeBlockAsm12B: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsm12B MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(BP*1), R9 SUBL BP, R8 MOVL R8, BP SUBL $0x01, BP JC emit_literal_done_repeat_emit_encodeBlockAsm12B CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeBlockAsm12B CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm12B CMPL BP, $0x00010000 JLT three_bytes_repeat_emit_encodeBlockAsm12B CMPL BP, $0x01000000 JLT four_bytes_repeat_emit_encodeBlockAsm12B MOVB $0xfc, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP memmove_repeat_emit_encodeBlockAsm12B four_bytes_repeat_emit_encodeBlockAsm12B: MOVL BP, R10 SHRL $0x10, R10 MOVB $0xf8, (AX) MOVW BP, 1(AX) MOVB R10, 3(AX) ADDQ $0x04, AX JMP memmove_repeat_emit_encodeBlockAsm12B three_bytes_repeat_emit_encodeBlockAsm12B: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_repeat_emit_encodeBlockAsm12B two_bytes_repeat_emit_encodeBlockAsm12B: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX JMP memmove_repeat_emit_encodeBlockAsm12B one_byte_repeat_emit_encodeBlockAsm12B: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeBlockAsm12B: LEAQ (AX)(R8*1), BP NOP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_tail: TESTQ R8, R8 JEQ memmove_end_copy_repeat_emit_encodeBlockAsm12B CMPQ R8, $0x02 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_1or2 CMPQ R8, $0x04 JB emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_3 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_4 CMPQ R8, $0x08 JB emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_5through7 JE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_9through16 CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32 CMPQ R8, $0x40 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64 CMPQ R8, $0x80 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_65through128 CMPQ R8, $0x00000100 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_129through256 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_256through2048 emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_1or2: MOVB (R9), R10 MOVB -1(R9)(R8*1), R9 MOVB R10, (AX) MOVB R9, -1(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_4: MOVL (R9), R10 MOVL R10, (AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_3: MOVW (R9), R10 MOVB 2(R9), R9 MOVW R10, (AX) MOVB R9, 2(AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_5through7: MOVL (R9), R10 MOVL -4(R9)(R8*1), R9 MOVL R10, (AX) MOVL R9, -4(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8: MOVQ (R9), R10 MOVQ R10, (AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_9through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R9 MOVQ R10, (AX) MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_65through128: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU 32(R9), X2 MOVOU 48(R9), X3 MOVOU -64(R9)(R8*1), X12 MOVOU -48(R9)(R8*1), X13 MOVOU -32(R9)(R8*1), X14 MOVOU -16(R9)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_129through256: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU 32(R9), X2 MOVOU 48(R9), X3 MOVOU 64(R9), X4 MOVOU 80(R9), X5 MOVOU 96(R9), X6 MOVOU 112(R9), X7 MOVOU -128(R9)(R8*1), X8 MOVOU -112(R9)(R8*1), X9 MOVOU -96(R9)(R8*1), X10 MOVOU -80(R9)(R8*1), X11 MOVOU -64(R9)(R8*1), X12 MOVOU -48(R9)(R8*1), X13 MOVOU -32(R9)(R8*1), X14 MOVOU -16(R9)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(R8*1) MOVOU X9, -112(AX)(R8*1) MOVOU X10, -96(AX)(R8*1) MOVOU X11, -80(AX)(R8*1) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_256through2048: LEAQ -256(R8), R8 MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU 32(R9), X2 MOVOU 48(R9), X3 MOVOU 64(R9), X4 MOVOU 80(R9), X5 MOVOU 96(R9), X6 MOVOU 112(R9), X7 MOVOU 128(R9), X8 MOVOU 144(R9), X9 MOVOU 160(R9), X10 MOVOU 176(R9), X11 MOVOU 192(R9), X12 MOVOU 208(R9), X13 MOVOU 224(R9), X14 MOVOU 240(R9), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ R8, $0x00000100 LEAQ 256(R9), R9 LEAQ 256(AX), AX JGE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_256through2048 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_tail memmove_end_copy_repeat_emit_encodeBlockAsm12B: MOVQ BP, AX emit_literal_done_repeat_emit_encodeBlockAsm12B: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), R8 SUBL CX, R8 LEAQ (DX)(CX*1), R9 LEAQ (DX)(BP*1), BP XORL R11, R11 CMPL R8, $0x08 JL matchlen_single_repeat_extend matchlen_loopback_repeat_extend: MOVQ (R9)(R11*1), R10 XORQ (BP)(R11*1), R10 TESTQ R10, R10 JZ matchlen_loop_repeat_extend BSFQ R10, R10 SARQ $0x03, R10 LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeBlockAsm12B matchlen_loop_repeat_extend: LEAL -8(R8), R8 LEAL 8(R11), R11 CMPL R8, $0x08 JGE matchlen_loopback_repeat_extend matchlen_single_repeat_extend: TESTL R8, R8 JZ repeat_extend_forward_end_encodeBlockAsm12B matchlen_single_loopback_repeat_extend: MOVB (R9)(R11*1), R10 CMPB (BP)(R11*1), R10 JNE repeat_extend_forward_end_encodeBlockAsm12B LEAL 1(R11), R11 DECL R8 JNZ matchlen_single_loopback_repeat_extend repeat_extend_forward_end_encodeBlockAsm12B: ADDL R11, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI TESTL DI, DI JZ repeat_as_copy_encodeBlockAsm12B emit_repeat_again_match_repeat_encodeBlockAsm12B: MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_match_repeat_encodeBlockAsm12B CMPL DI, $0x0c JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm12B CMPL SI, $0x00000800 JLT repeat_two_offset_match_repeat_encodeBlockAsm12B cant_repeat_two_offset_match_repeat_encodeBlockAsm12B: CMPL BP, $0x00000104 JLT repeat_three_match_repeat_encodeBlockAsm12B CMPL BP, $0x00010100 JLT repeat_four_match_repeat_encodeBlockAsm12B CMPL BP, $0x0100ffff JLT repeat_five_match_repeat_encodeBlockAsm12B LEAL -16842747(BP), BP MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_repeat_encodeBlockAsm12B repeat_five_match_repeat_encodeBlockAsm12B: LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_four_match_repeat_encodeBlockAsm12B: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_three_match_repeat_encodeBlockAsm12B: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_match_repeat_encodeBlockAsm12B: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_match_repeat_encodeBlockAsm12B: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_as_copy_encodeBlockAsm12B: CMPL SI, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeBlockAsm12B four_bytes_loop_back_repeat_as_copy_encodeBlockAsm12B: CMPL BP, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm12B MOVB $0xff, (AX) MOVL SI, 1(AX) LEAL -64(BP), BP ADDQ $0x05, AX CMPL BP, $0x04 JL four_bytes_remain_repeat_as_copy_encodeBlockAsm12B emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy: MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy: CMPL BP, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy CMPL BP, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy CMPL BP, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy LEAL -16842747(BP), BP MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy: LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12B JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm12B four_bytes_remain_repeat_as_copy_encodeBlockAsm12B: TESTL BP, BP JZ repeat_end_emit_encodeBlockAsm12B MOVB $0x03, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVL SI, 1(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm12B two_byte_offset_repeat_as_copy_encodeBlockAsm12B: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: CMPL BP, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short CMPL BP, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy_short CMPL BP, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy_short LEAL -16842747(BP), BP MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy_short repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12B JMP two_byte_offset_repeat_as_copy_encodeBlockAsm12B two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12B CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12B MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12B emit_copy_three_repeat_as_copy_encodeBlockAsm12B: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeBlockAsm12B: MOVL CX, 12(SP) CMPL CX, 8(SP) JGE emit_remainder_encodeBlockAsm12B JMP search_loop_encodeBlockAsm12B no_repeat_found_encodeBlockAsm12B: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeBlockAsm12B SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeBlockAsm12B MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeBlockAsm12B MOVL 20(SP), CX JMP search_loop_encodeBlockAsm12B candidate3_match_encodeBlockAsm12B: ADDL $0x02, CX JMP candidate_match_encodeBlockAsm12B candidate2_match_encodeBlockAsm12B: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeBlockAsm12B: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeBlockAsm12B match_extend_back_loop_encodeBlockAsm12B: CMPL CX, SI JLE match_extend_back_end_encodeBlockAsm12B MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeBlockAsm12B LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeBlockAsm12B JMP match_extend_back_loop_encodeBlockAsm12B match_extend_back_end_encodeBlockAsm12B: MOVL CX, SI SUBL 12(SP), SI LEAQ 4(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm12B: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeBlockAsm12B MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 MOVL R8, DI SUBL $0x01, DI JC emit_literal_done_match_emit_encodeBlockAsm12B CMPL DI, $0x3c JLT one_byte_match_emit_encodeBlockAsm12B CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm12B CMPL DI, $0x00010000 JLT three_bytes_match_emit_encodeBlockAsm12B CMPL DI, $0x01000000 JLT four_bytes_match_emit_encodeBlockAsm12B MOVB $0xfc, (AX) MOVL DI, 1(AX) ADDQ $0x05, AX JMP memmove_match_emit_encodeBlockAsm12B four_bytes_match_emit_encodeBlockAsm12B: MOVL DI, R9 SHRL $0x10, R9 MOVB $0xf8, (AX) MOVW DI, 1(AX) MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_match_emit_encodeBlockAsm12B three_bytes_match_emit_encodeBlockAsm12B: MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_match_emit_encodeBlockAsm12B two_bytes_match_emit_encodeBlockAsm12B: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX JMP memmove_match_emit_encodeBlockAsm12B one_byte_match_emit_encodeBlockAsm12B: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBlockAsm12B: LEAQ (AX)(R8*1), DI NOP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_tail: TESTQ R8, R8 JEQ memmove_end_copy_match_emit_encodeBlockAsm12B CMPQ R8, $0x02 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_1or2 CMPQ R8, $0x04 JB emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_3 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_4 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_5through7 JE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_9through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32 CMPQ R8, $0x40 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64 CMPQ R8, $0x80 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_65through128 CMPQ R8, $0x00000100 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_129through256 JMP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_256through2048 emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), SI MOVB R9, (AX) MOVB SI, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_4: MOVL (SI), R9 MOVL R9, (AX) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), SI MOVW R9, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_5through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), SI MOVL R9, (AX) MOVL SI, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8: MOVQ (SI), R9 MOVQ R9, (AX) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_9through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), SI MOVQ R9, (AX) MOVQ SI, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_65through128: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_129through256: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU -128(SI)(R8*1), X8 MOVOU -112(SI)(R8*1), X9 MOVOU -96(SI)(R8*1), X10 MOVOU -80(SI)(R8*1), X11 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(R8*1) MOVOU X9, -112(AX)(R8*1) MOVOU X10, -96(AX)(R8*1) MOVOU X11, -80(AX)(R8*1) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_256through2048: LEAQ -256(R8), R8 MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU 128(SI), X8 MOVOU 144(SI), X9 MOVOU 160(SI), X10 MOVOU 176(SI), X11 MOVOU 192(SI), X12 MOVOU 208(SI), X13 MOVOU 224(SI), X14 MOVOU 240(SI), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ R8, $0x00000100 LEAQ 256(SI), SI LEAQ 256(AX), AX JGE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_256through2048 JMP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_tail memmove_end_copy_match_emit_encodeBlockAsm12B: MOVQ DI, AX emit_literal_done_match_emit_encodeBlockAsm12B: match_nolit_loop_encodeBlockAsm12B: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeBlockAsm12B matchlen_loopback_match_nolit_encodeBlockAsm12B: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeBlockAsm12B BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeBlockAsm12B matchlen_loop_match_nolit_encodeBlockAsm12B: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsm12B matchlen_single_match_nolit_encodeBlockAsm12B: TESTL SI, SI JZ match_nolit_end_encodeBlockAsm12B matchlen_single_loopback_match_nolit_encodeBlockAsm12B: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeBlockAsm12B LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm12B match_nolit_end_encodeBlockAsm12B: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 CMPL BP, $0x00010000 JL two_byte_offset_match_nolit_encodeBlockAsm12B four_bytes_loop_back_match_nolit_encodeBlockAsm12B: CMPL R9, $0x40 JLE four_bytes_remain_match_nolit_encodeBlockAsm12B MOVB $0xff, (AX) MOVL BP, 1(AX) LEAL -64(R9), R9 ADDQ $0x05, AX CMPL R9, $0x04 JL four_bytes_remain_match_nolit_encodeBlockAsm12B emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy: MOVL R9, SI LEAL -4(R9), R9 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy: CMPL R9, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm12B_emit_copy CMPL R9, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm12B_emit_copy CMPL R9, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm12B_emit_copy LEAL -16842747(R9), R9 MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy repeat_five_match_nolit_encodeBlockAsm12B_emit_copy: LEAL -65536(R9), R9 MOVL R9, BP MOVW $0x001d, (AX) MOVW R9, 2(AX) SARL $0x10, BP MOVB BP, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_four_match_nolit_encodeBlockAsm12B_emit_copy: LEAL -256(R9), R9 MOVW $0x0019, (AX) MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_three_match_nolit_encodeBlockAsm12B_emit_copy: LEAL -4(R9), R9 MOVW $0x0015, (AX) MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_match_nolit_encodeBlockAsm12B_emit_copy: SHLL $0x02, R9 ORL $0x01, R9 MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy: XORQ SI, SI LEAL 1(SI)(R9*4), R9 MOVB BP, 1(AX) SARL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B JMP four_bytes_loop_back_match_nolit_encodeBlockAsm12B four_bytes_remain_match_nolit_encodeBlockAsm12B: TESTL R9, R9 JZ match_nolit_emitcopy_end_encodeBlockAsm12B MOVB $0x03, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B two_byte_offset_match_nolit_encodeBlockAsm12B: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsm12B MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy_short: MOVL R9, SI LEAL -4(R9), R9 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short: CMPL R9, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short CMPL R9, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm12B_emit_copy_short CMPL R9, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm12B_emit_copy_short LEAL -16842747(R9), R9 MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy_short repeat_five_match_nolit_encodeBlockAsm12B_emit_copy_short: LEAL -65536(R9), R9 MOVL R9, BP MOVW $0x001d, (AX) MOVW R9, 2(AX) SARL $0x10, BP MOVB BP, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_four_match_nolit_encodeBlockAsm12B_emit_copy_short: LEAL -256(R9), R9 MOVW $0x0019, (AX) MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short: LEAL -4(R9), R9 MOVW $0x0015, (AX) MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short: SHLL $0x02, R9 ORL $0x01, R9 MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short: XORQ SI, SI LEAL 1(SI)(R9*4), R9 MOVB BP, 1(AX) SARL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B JMP two_byte_offset_match_nolit_encodeBlockAsm12B two_byte_offset_short_match_nolit_encodeBlockAsm12B: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsm12B CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeBlockAsm12B MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B emit_copy_three_match_nolit_encodeBlockAsm12B: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeBlockAsm12B: MOVL CX, 12(SP) CMPL CX, 8(SP) JGE emit_remainder_encodeBlockAsm12B CMPQ AX, (SP) JL match_nolit_dst_ok_encodeBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm12B: MOVQ -2(DX)(CX*1), SI MOVQ $0x000000cf1bbcdcbb, BP MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, R8 SHLQ $0x18, DI IMULQ BP, DI SHRQ $0x34, DI SHLQ $0x18, R8 IMULQ BP, R8 SHRQ $0x34, R8 LEAL -2(CX), R9 MOVL 24(SP)(R8*4), BP MOVL R9, 24(SP)(DI*4) MOVL CX, 24(SP)(R8*4) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeBlockAsm12B INCL CX JMP search_loop_encodeBlockAsm12B emit_remainder_encodeBlockAsm12B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 4(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsm12B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeBlockAsm12B MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP MOVL BP, DX SUBL $0x01, DX JC emit_literal_done_emit_remainder_encodeBlockAsm12B CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsm12B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm12B CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeBlockAsm12B CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeBlockAsm12B MOVB $0xfc, (AX) MOVL DX, 1(AX) ADDQ $0x05, AX JMP memmove_emit_remainder_encodeBlockAsm12B four_bytes_emit_remainder_encodeBlockAsm12B: MOVL DX, BX SHRL $0x10, BX MOVB $0xf8, (AX) MOVW DX, 1(AX) MOVB BL, 3(AX) ADDQ $0x04, AX JMP memmove_emit_remainder_encodeBlockAsm12B three_bytes_emit_remainder_encodeBlockAsm12B: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_emit_remainder_encodeBlockAsm12B two_bytes_emit_remainder_encodeBlockAsm12B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX JMP memmove_emit_remainder_encodeBlockAsm12B one_byte_emit_remainder_encodeBlockAsm12B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeBlockAsm12B: LEAQ (AX)(BP*1), DX MOVL BP, BX NOP emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_tail: TESTQ BX, BX JEQ memmove_end_copy_emit_remainder_encodeBlockAsm12B CMPQ BX, $0x02 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2 CMPQ BX, $0x04 JB emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_5through7 JE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_9through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32 CMPQ BX, $0x40 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64 CMPQ BX, $0x80 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_65through128 CMPQ BX, $0x00000100 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_129through256 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_256through2048 emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4: MOVL (CX), BP MOVL BP, (AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_5through7: MOVL (CX), BP MOVL -4(CX)(BX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8: MOVQ (CX), BP MOVQ BP, (AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_9through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_65through128: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU -64(CX)(BX*1), X12 MOVOU -48(CX)(BX*1), X13 MOVOU -32(CX)(BX*1), X14 MOVOU -16(CX)(BX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(BX*1) MOVOU X13, -48(AX)(BX*1) MOVOU X14, -32(AX)(BX*1) MOVOU X15, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_129through256: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU -128(CX)(BX*1), X8 MOVOU -112(CX)(BX*1), X9 MOVOU -96(CX)(BX*1), X10 MOVOU -80(CX)(BX*1), X11 MOVOU -64(CX)(BX*1), X12 MOVOU -48(CX)(BX*1), X13 MOVOU -32(CX)(BX*1), X14 MOVOU -16(CX)(BX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(BX*1) MOVOU X9, -112(AX)(BX*1) MOVOU X10, -96(AX)(BX*1) MOVOU X11, -80(AX)(BX*1) MOVOU X12, -64(AX)(BX*1) MOVOU X13, -48(AX)(BX*1) MOVOU X14, -32(AX)(BX*1) MOVOU X15, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_256through2048: LEAQ -256(BX), BX MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU 128(CX), X8 MOVOU 144(CX), X9 MOVOU 160(CX), X10 MOVOU 176(CX), X11 MOVOU 192(CX), X12 MOVOU 208(CX), X13 MOVOU 224(CX), X14 MOVOU 240(CX), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ BX, $0x00000100 LEAQ 256(CX), CX LEAQ 256(AX), AX JGE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_256through2048 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_tail memmove_end_copy_emit_remainder_encodeBlockAsm12B: MOVQ DX, AX emit_literal_done_emit_remainder_encodeBlockAsm12B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeBlockAsm10B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBlockAsm10B(SB), $4120-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000020, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeBlockAsm10B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeBlockAsm10B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeBlockAsm10B: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x05, BP LEAL 4(CX)(BP*1), BP MOVL 8(SP), DI CMPL BP, DI JGT emit_remainder_encodeBlockAsm10B MOVL BP, 20(SP) MOVQ $0x000000cf1bbcdcbb, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x18, R9 IMULQ R8, R9 SHRQ $0x36, R9 SHLQ $0x18, R10 IMULQ R8, R10 SHRQ $0x36, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x18, R9 IMULQ R8, R9 SHRQ $0x36, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeBlockAsm10B LEAL 1(CX), SI MOVL 12(SP), DI MOVL SI, BP SUBL 16(SP), BP JZ repeat_extend_back_end_encodeBlockAsm10B repeat_extend_back_loop_encodeBlockAsm10B: CMPL SI, DI JLE repeat_extend_back_end_encodeBlockAsm10B MOVB -1(DX)(BP*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeBlockAsm10B LEAL -1(SI), SI DECL BP JNZ repeat_extend_back_loop_encodeBlockAsm10B repeat_extend_back_end_encodeBlockAsm10B: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsm10B MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(BP*1), R9 SUBL BP, R8 MOVL R8, BP SUBL $0x01, BP JC emit_literal_done_repeat_emit_encodeBlockAsm10B CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeBlockAsm10B CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm10B CMPL BP, $0x00010000 JLT three_bytes_repeat_emit_encodeBlockAsm10B CMPL BP, $0x01000000 JLT four_bytes_repeat_emit_encodeBlockAsm10B MOVB $0xfc, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP memmove_repeat_emit_encodeBlockAsm10B four_bytes_repeat_emit_encodeBlockAsm10B: MOVL BP, R10 SHRL $0x10, R10 MOVB $0xf8, (AX) MOVW BP, 1(AX) MOVB R10, 3(AX) ADDQ $0x04, AX JMP memmove_repeat_emit_encodeBlockAsm10B three_bytes_repeat_emit_encodeBlockAsm10B: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_repeat_emit_encodeBlockAsm10B two_bytes_repeat_emit_encodeBlockAsm10B: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX JMP memmove_repeat_emit_encodeBlockAsm10B one_byte_repeat_emit_encodeBlockAsm10B: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeBlockAsm10B: LEAQ (AX)(R8*1), BP NOP emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_tail: TESTQ R8, R8 JEQ memmove_end_copy_repeat_emit_encodeBlockAsm10B CMPQ R8, $0x02 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_1or2 CMPQ R8, $0x04 JB emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_3 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_4 CMPQ R8, $0x08 JB emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_5through7 JE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_9through16 CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32 CMPQ R8, $0x40 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64 CMPQ R8, $0x80 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_65through128 CMPQ R8, $0x00000100 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_129through256 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_256through2048 emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_1or2: MOVB (R9), R10 MOVB -1(R9)(R8*1), R9 MOVB R10, (AX) MOVB R9, -1(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_4: MOVL (R9), R10 MOVL R10, (AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_3: MOVW (R9), R10 MOVB 2(R9), R9 MOVW R10, (AX) MOVB R9, 2(AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_5through7: MOVL (R9), R10 MOVL -4(R9)(R8*1), R9 MOVL R10, (AX) MOVL R9, -4(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8: MOVQ (R9), R10 MOVQ R10, (AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_9through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R9 MOVQ R10, (AX) MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_65through128: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU 32(R9), X2 MOVOU 48(R9), X3 MOVOU -64(R9)(R8*1), X12 MOVOU -48(R9)(R8*1), X13 MOVOU -32(R9)(R8*1), X14 MOVOU -16(R9)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_129through256: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU 32(R9), X2 MOVOU 48(R9), X3 MOVOU 64(R9), X4 MOVOU 80(R9), X5 MOVOU 96(R9), X6 MOVOU 112(R9), X7 MOVOU -128(R9)(R8*1), X8 MOVOU -112(R9)(R8*1), X9 MOVOU -96(R9)(R8*1), X10 MOVOU -80(R9)(R8*1), X11 MOVOU -64(R9)(R8*1), X12 MOVOU -48(R9)(R8*1), X13 MOVOU -32(R9)(R8*1), X14 MOVOU -16(R9)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(R8*1) MOVOU X9, -112(AX)(R8*1) MOVOU X10, -96(AX)(R8*1) MOVOU X11, -80(AX)(R8*1) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_256through2048: LEAQ -256(R8), R8 MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU 32(R9), X2 MOVOU 48(R9), X3 MOVOU 64(R9), X4 MOVOU 80(R9), X5 MOVOU 96(R9), X6 MOVOU 112(R9), X7 MOVOU 128(R9), X8 MOVOU 144(R9), X9 MOVOU 160(R9), X10 MOVOU 176(R9), X11 MOVOU 192(R9), X12 MOVOU 208(R9), X13 MOVOU 224(R9), X14 MOVOU 240(R9), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ R8, $0x00000100 LEAQ 256(R9), R9 LEAQ 256(AX), AX JGE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_256through2048 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_tail memmove_end_copy_repeat_emit_encodeBlockAsm10B: MOVQ BP, AX emit_literal_done_repeat_emit_encodeBlockAsm10B: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), R8 SUBL CX, R8 LEAQ (DX)(CX*1), R9 LEAQ (DX)(BP*1), BP XORL R11, R11 CMPL R8, $0x08 JL matchlen_single_repeat_extend matchlen_loopback_repeat_extend: MOVQ (R9)(R11*1), R10 XORQ (BP)(R11*1), R10 TESTQ R10, R10 JZ matchlen_loop_repeat_extend BSFQ R10, R10 SARQ $0x03, R10 LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeBlockAsm10B matchlen_loop_repeat_extend: LEAL -8(R8), R8 LEAL 8(R11), R11 CMPL R8, $0x08 JGE matchlen_loopback_repeat_extend matchlen_single_repeat_extend: TESTL R8, R8 JZ repeat_extend_forward_end_encodeBlockAsm10B matchlen_single_loopback_repeat_extend: MOVB (R9)(R11*1), R10 CMPB (BP)(R11*1), R10 JNE repeat_extend_forward_end_encodeBlockAsm10B LEAL 1(R11), R11 DECL R8 JNZ matchlen_single_loopback_repeat_extend repeat_extend_forward_end_encodeBlockAsm10B: ADDL R11, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI TESTL DI, DI JZ repeat_as_copy_encodeBlockAsm10B emit_repeat_again_match_repeat_encodeBlockAsm10B: MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_match_repeat_encodeBlockAsm10B CMPL DI, $0x0c JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm10B CMPL SI, $0x00000800 JLT repeat_two_offset_match_repeat_encodeBlockAsm10B cant_repeat_two_offset_match_repeat_encodeBlockAsm10B: CMPL BP, $0x00000104 JLT repeat_three_match_repeat_encodeBlockAsm10B CMPL BP, $0x00010100 JLT repeat_four_match_repeat_encodeBlockAsm10B CMPL BP, $0x0100ffff JLT repeat_five_match_repeat_encodeBlockAsm10B LEAL -16842747(BP), BP MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_repeat_encodeBlockAsm10B repeat_five_match_repeat_encodeBlockAsm10B: LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_four_match_repeat_encodeBlockAsm10B: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_three_match_repeat_encodeBlockAsm10B: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_match_repeat_encodeBlockAsm10B: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_offset_match_repeat_encodeBlockAsm10B: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_as_copy_encodeBlockAsm10B: CMPL SI, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeBlockAsm10B four_bytes_loop_back_repeat_as_copy_encodeBlockAsm10B: CMPL BP, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm10B MOVB $0xff, (AX) MOVL SI, 1(AX) LEAL -64(BP), BP ADDQ $0x05, AX CMPL BP, $0x04 JL four_bytes_remain_repeat_as_copy_encodeBlockAsm10B emit_repeat_again_repeat_as_copy_encodeBlockAsm10B_emit_copy: MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy: CMPL BP, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy CMPL BP, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm10B_emit_copy CMPL BP, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm10B_emit_copy LEAL -16842747(BP), BP MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm10B_emit_copy repeat_five_repeat_as_copy_encodeBlockAsm10B_emit_copy: LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_four_repeat_as_copy_encodeBlockAsm10B_emit_copy: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10B JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm10B four_bytes_remain_repeat_as_copy_encodeBlockAsm10B: TESTL BP, BP JZ repeat_end_emit_encodeBlockAsm10B MOVB $0x03, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVL SI, 1(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm10B two_byte_offset_repeat_as_copy_encodeBlockAsm10B: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX emit_repeat_again_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: CMPL BP, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short CMPL BP, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm10B_emit_copy_short CMPL BP, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm10B_emit_copy_short LEAL -16842747(BP), BP MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm10B_emit_copy_short repeat_five_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_four_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10B JMP two_byte_offset_repeat_as_copy_encodeBlockAsm10B two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsm10B CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm10B MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10B emit_copy_three_repeat_as_copy_encodeBlockAsm10B: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeBlockAsm10B: MOVL CX, 12(SP) CMPL CX, 8(SP) JGE emit_remainder_encodeBlockAsm10B JMP search_loop_encodeBlockAsm10B no_repeat_found_encodeBlockAsm10B: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeBlockAsm10B SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeBlockAsm10B MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeBlockAsm10B MOVL 20(SP), CX JMP search_loop_encodeBlockAsm10B candidate3_match_encodeBlockAsm10B: ADDL $0x02, CX JMP candidate_match_encodeBlockAsm10B candidate2_match_encodeBlockAsm10B: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeBlockAsm10B: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeBlockAsm10B match_extend_back_loop_encodeBlockAsm10B: CMPL CX, SI JLE match_extend_back_end_encodeBlockAsm10B MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeBlockAsm10B LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeBlockAsm10B JMP match_extend_back_loop_encodeBlockAsm10B match_extend_back_end_encodeBlockAsm10B: MOVL CX, SI SUBL 12(SP), SI LEAQ 4(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm10B: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeBlockAsm10B MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 MOVL R8, DI SUBL $0x01, DI JC emit_literal_done_match_emit_encodeBlockAsm10B CMPL DI, $0x3c JLT one_byte_match_emit_encodeBlockAsm10B CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm10B CMPL DI, $0x00010000 JLT three_bytes_match_emit_encodeBlockAsm10B CMPL DI, $0x01000000 JLT four_bytes_match_emit_encodeBlockAsm10B MOVB $0xfc, (AX) MOVL DI, 1(AX) ADDQ $0x05, AX JMP memmove_match_emit_encodeBlockAsm10B four_bytes_match_emit_encodeBlockAsm10B: MOVL DI, R9 SHRL $0x10, R9 MOVB $0xf8, (AX) MOVW DI, 1(AX) MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_match_emit_encodeBlockAsm10B three_bytes_match_emit_encodeBlockAsm10B: MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_match_emit_encodeBlockAsm10B two_bytes_match_emit_encodeBlockAsm10B: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX JMP memmove_match_emit_encodeBlockAsm10B one_byte_match_emit_encodeBlockAsm10B: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBlockAsm10B: LEAQ (AX)(R8*1), DI NOP emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_tail: TESTQ R8, R8 JEQ memmove_end_copy_match_emit_encodeBlockAsm10B CMPQ R8, $0x02 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_1or2 CMPQ R8, $0x04 JB emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_3 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_4 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_5through7 JE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_9through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32 CMPQ R8, $0x40 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64 CMPQ R8, $0x80 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_65through128 CMPQ R8, $0x00000100 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_129through256 JMP emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_256through2048 emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), SI MOVB R9, (AX) MOVB SI, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_4: MOVL (SI), R9 MOVL R9, (AX) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), SI MOVW R9, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_5through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), SI MOVL R9, (AX) MOVL SI, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8: MOVQ (SI), R9 MOVQ R9, (AX) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_9through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), SI MOVQ R9, (AX) MOVQ SI, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_65through128: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_129through256: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU -128(SI)(R8*1), X8 MOVOU -112(SI)(R8*1), X9 MOVOU -96(SI)(R8*1), X10 MOVOU -80(SI)(R8*1), X11 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(R8*1) MOVOU X9, -112(AX)(R8*1) MOVOU X10, -96(AX)(R8*1) MOVOU X11, -80(AX)(R8*1) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_256through2048: LEAQ -256(R8), R8 MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU 128(SI), X8 MOVOU 144(SI), X9 MOVOU 160(SI), X10 MOVOU 176(SI), X11 MOVOU 192(SI), X12 MOVOU 208(SI), X13 MOVOU 224(SI), X14 MOVOU 240(SI), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ R8, $0x00000100 LEAQ 256(SI), SI LEAQ 256(AX), AX JGE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_256through2048 JMP emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_tail memmove_end_copy_match_emit_encodeBlockAsm10B: MOVQ DI, AX emit_literal_done_match_emit_encodeBlockAsm10B: match_nolit_loop_encodeBlockAsm10B: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeBlockAsm10B matchlen_loopback_match_nolit_encodeBlockAsm10B: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeBlockAsm10B BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeBlockAsm10B matchlen_loop_match_nolit_encodeBlockAsm10B: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsm10B matchlen_single_match_nolit_encodeBlockAsm10B: TESTL SI, SI JZ match_nolit_end_encodeBlockAsm10B matchlen_single_loopback_match_nolit_encodeBlockAsm10B: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeBlockAsm10B LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm10B match_nolit_end_encodeBlockAsm10B: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 CMPL BP, $0x00010000 JL two_byte_offset_match_nolit_encodeBlockAsm10B four_bytes_loop_back_match_nolit_encodeBlockAsm10B: CMPL R9, $0x40 JLE four_bytes_remain_match_nolit_encodeBlockAsm10B MOVB $0xff, (AX) MOVL BP, 1(AX) LEAL -64(R9), R9 ADDQ $0x05, AX CMPL R9, $0x04 JL four_bytes_remain_match_nolit_encodeBlockAsm10B emit_repeat_again_match_nolit_encodeBlockAsm10B_emit_copy: MOVL R9, SI LEAL -4(R9), R9 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm10B_emit_copy CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy: CMPL R9, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm10B_emit_copy CMPL R9, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm10B_emit_copy CMPL R9, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm10B_emit_copy LEAL -16842747(R9), R9 MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_encodeBlockAsm10B_emit_copy repeat_five_match_nolit_encodeBlockAsm10B_emit_copy: LEAL -65536(R9), R9 MOVL R9, BP MOVW $0x001d, (AX) MOVW R9, 2(AX) SARL $0x10, BP MOVB BP, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_four_match_nolit_encodeBlockAsm10B_emit_copy: LEAL -256(R9), R9 MOVW $0x0019, (AX) MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_three_match_nolit_encodeBlockAsm10B_emit_copy: LEAL -4(R9), R9 MOVW $0x0015, (AX) MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_match_nolit_encodeBlockAsm10B_emit_copy: SHLL $0x02, R9 ORL $0x01, R9 MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy: XORQ SI, SI LEAL 1(SI)(R9*4), R9 MOVB BP, 1(AX) SARL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B JMP four_bytes_loop_back_match_nolit_encodeBlockAsm10B four_bytes_remain_match_nolit_encodeBlockAsm10B: TESTL R9, R9 JZ match_nolit_emitcopy_end_encodeBlockAsm10B MOVB $0x03, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B two_byte_offset_match_nolit_encodeBlockAsm10B: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsm10B MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX emit_repeat_again_match_nolit_encodeBlockAsm10B_emit_copy_short: MOVL R9, SI LEAL -4(R9), R9 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short: CMPL R9, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short CMPL R9, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm10B_emit_copy_short CMPL R9, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm10B_emit_copy_short LEAL -16842747(R9), R9 MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_encodeBlockAsm10B_emit_copy_short repeat_five_match_nolit_encodeBlockAsm10B_emit_copy_short: LEAL -65536(R9), R9 MOVL R9, BP MOVW $0x001d, (AX) MOVW R9, 2(AX) SARL $0x10, BP MOVB BP, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_four_match_nolit_encodeBlockAsm10B_emit_copy_short: LEAL -256(R9), R9 MOVW $0x0019, (AX) MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short: LEAL -4(R9), R9 MOVW $0x0015, (AX) MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short: SHLL $0x02, R9 ORL $0x01, R9 MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short: XORQ SI, SI LEAL 1(SI)(R9*4), R9 MOVB BP, 1(AX) SARL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B JMP two_byte_offset_match_nolit_encodeBlockAsm10B two_byte_offset_short_match_nolit_encodeBlockAsm10B: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsm10B CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeBlockAsm10B MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B emit_copy_three_match_nolit_encodeBlockAsm10B: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeBlockAsm10B: MOVL CX, 12(SP) CMPL CX, 8(SP) JGE emit_remainder_encodeBlockAsm10B CMPQ AX, (SP) JL match_nolit_dst_ok_encodeBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm10B: MOVQ -2(DX)(CX*1), SI MOVQ $0x000000cf1bbcdcbb, BP MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, R8 SHLQ $0x18, DI IMULQ BP, DI SHRQ $0x36, DI SHLQ $0x18, R8 IMULQ BP, R8 SHRQ $0x36, R8 LEAL -2(CX), R9 MOVL 24(SP)(R8*4), BP MOVL R9, 24(SP)(DI*4) MOVL CX, 24(SP)(R8*4) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeBlockAsm10B INCL CX JMP search_loop_encodeBlockAsm10B emit_remainder_encodeBlockAsm10B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 4(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsm10B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeBlockAsm10B MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP MOVL BP, DX SUBL $0x01, DX JC emit_literal_done_emit_remainder_encodeBlockAsm10B CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsm10B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm10B CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeBlockAsm10B CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeBlockAsm10B MOVB $0xfc, (AX) MOVL DX, 1(AX) ADDQ $0x05, AX JMP memmove_emit_remainder_encodeBlockAsm10B four_bytes_emit_remainder_encodeBlockAsm10B: MOVL DX, BX SHRL $0x10, BX MOVB $0xf8, (AX) MOVW DX, 1(AX) MOVB BL, 3(AX) ADDQ $0x04, AX JMP memmove_emit_remainder_encodeBlockAsm10B three_bytes_emit_remainder_encodeBlockAsm10B: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_emit_remainder_encodeBlockAsm10B two_bytes_emit_remainder_encodeBlockAsm10B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX JMP memmove_emit_remainder_encodeBlockAsm10B one_byte_emit_remainder_encodeBlockAsm10B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeBlockAsm10B: LEAQ (AX)(BP*1), DX MOVL BP, BX NOP emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_tail: TESTQ BX, BX JEQ memmove_end_copy_emit_remainder_encodeBlockAsm10B CMPQ BX, $0x02 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_1or2 CMPQ BX, $0x04 JB emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_3 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_5through7 JE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_9through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32 CMPQ BX, $0x40 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64 CMPQ BX, $0x80 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_65through128 CMPQ BX, $0x00000100 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_129through256 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_256through2048 emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_4: MOVL (CX), BP MOVL BP, (AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_5through7: MOVL (CX), BP MOVL -4(CX)(BX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8: MOVQ (CX), BP MOVQ BP, (AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_9through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_65through128: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU -64(CX)(BX*1), X12 MOVOU -48(CX)(BX*1), X13 MOVOU -32(CX)(BX*1), X14 MOVOU -16(CX)(BX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(BX*1) MOVOU X13, -48(AX)(BX*1) MOVOU X14, -32(AX)(BX*1) MOVOU X15, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_129through256: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU -128(CX)(BX*1), X8 MOVOU -112(CX)(BX*1), X9 MOVOU -96(CX)(BX*1), X10 MOVOU -80(CX)(BX*1), X11 MOVOU -64(CX)(BX*1), X12 MOVOU -48(CX)(BX*1), X13 MOVOU -32(CX)(BX*1), X14 MOVOU -16(CX)(BX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(BX*1) MOVOU X9, -112(AX)(BX*1) MOVOU X10, -96(AX)(BX*1) MOVOU X11, -80(AX)(BX*1) MOVOU X12, -64(AX)(BX*1) MOVOU X13, -48(AX)(BX*1) MOVOU X14, -32(AX)(BX*1) MOVOU X15, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_256through2048: LEAQ -256(BX), BX MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU 128(CX), X8 MOVOU 144(CX), X9 MOVOU 160(CX), X10 MOVOU 176(CX), X11 MOVOU 192(CX), X12 MOVOU 208(CX), X13 MOVOU 224(CX), X14 MOVOU 240(CX), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ BX, $0x00000100 LEAQ 256(CX), CX LEAQ 256(AX), AX JGE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_256through2048 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_tail memmove_end_copy_emit_remainder_encodeBlockAsm10B: MOVQ DX, AX emit_literal_done_emit_remainder_encodeBlockAsm10B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeBlockAsm8B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBlockAsm8B(SB), $1048-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000008, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeBlockAsm8B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeBlockAsm8B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeBlockAsm8B: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x04, BP LEAL 4(CX)(BP*1), BP MOVL 8(SP), DI CMPL BP, DI JGT emit_remainder_encodeBlockAsm8B MOVL BP, 20(SP) MOVQ $0x9e3779b1, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x20, R9 IMULQ R8, R9 SHRQ $0x38, R9 SHLQ $0x20, R10 IMULQ R8, R10 SHRQ $0x38, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x20, R9 IMULQ R8, R9 SHRQ $0x38, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeBlockAsm8B LEAL 1(CX), SI MOVL 12(SP), DI MOVL SI, BP SUBL 16(SP), BP JZ repeat_extend_back_end_encodeBlockAsm8B repeat_extend_back_loop_encodeBlockAsm8B: CMPL SI, DI JLE repeat_extend_back_end_encodeBlockAsm8B MOVB -1(DX)(BP*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeBlockAsm8B LEAL -1(SI), SI DECL BP JNZ repeat_extend_back_loop_encodeBlockAsm8B repeat_extend_back_end_encodeBlockAsm8B: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsm8B MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(BP*1), R9 SUBL BP, R8 MOVL R8, BP SUBL $0x01, BP JC emit_literal_done_repeat_emit_encodeBlockAsm8B CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeBlockAsm8B CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm8B CMPL BP, $0x00010000 JLT three_bytes_repeat_emit_encodeBlockAsm8B CMPL BP, $0x01000000 JLT four_bytes_repeat_emit_encodeBlockAsm8B MOVB $0xfc, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP memmove_repeat_emit_encodeBlockAsm8B four_bytes_repeat_emit_encodeBlockAsm8B: MOVL BP, R10 SHRL $0x10, R10 MOVB $0xf8, (AX) MOVW BP, 1(AX) MOVB R10, 3(AX) ADDQ $0x04, AX JMP memmove_repeat_emit_encodeBlockAsm8B three_bytes_repeat_emit_encodeBlockAsm8B: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_repeat_emit_encodeBlockAsm8B two_bytes_repeat_emit_encodeBlockAsm8B: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX JMP memmove_repeat_emit_encodeBlockAsm8B one_byte_repeat_emit_encodeBlockAsm8B: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeBlockAsm8B: LEAQ (AX)(R8*1), BP NOP emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_tail: TESTQ R8, R8 JEQ memmove_end_copy_repeat_emit_encodeBlockAsm8B CMPQ R8, $0x02 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_1or2 CMPQ R8, $0x04 JB emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_3 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_4 CMPQ R8, $0x08 JB emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_5through7 JE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_9through16 CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32 CMPQ R8, $0x40 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64 CMPQ R8, $0x80 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_65through128 CMPQ R8, $0x00000100 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_129through256 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_256through2048 emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_1or2: MOVB (R9), R10 MOVB -1(R9)(R8*1), R9 MOVB R10, (AX) MOVB R9, -1(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_4: MOVL (R9), R10 MOVL R10, (AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_3: MOVW (R9), R10 MOVB 2(R9), R9 MOVW R10, (AX) MOVB R9, 2(AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_5through7: MOVL (R9), R10 MOVL -4(R9)(R8*1), R9 MOVL R10, (AX) MOVL R9, -4(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8: MOVQ (R9), R10 MOVQ R10, (AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_9through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R9 MOVQ R10, (AX) MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_65through128: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU 32(R9), X2 MOVOU 48(R9), X3 MOVOU -64(R9)(R8*1), X12 MOVOU -48(R9)(R8*1), X13 MOVOU -32(R9)(R8*1), X14 MOVOU -16(R9)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_129through256: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU 32(R9), X2 MOVOU 48(R9), X3 MOVOU 64(R9), X4 MOVOU 80(R9), X5 MOVOU 96(R9), X6 MOVOU 112(R9), X7 MOVOU -128(R9)(R8*1), X8 MOVOU -112(R9)(R8*1), X9 MOVOU -96(R9)(R8*1), X10 MOVOU -80(R9)(R8*1), X11 MOVOU -64(R9)(R8*1), X12 MOVOU -48(R9)(R8*1), X13 MOVOU -32(R9)(R8*1), X14 MOVOU -16(R9)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(R8*1) MOVOU X9, -112(AX)(R8*1) MOVOU X10, -96(AX)(R8*1) MOVOU X11, -80(AX)(R8*1) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_256through2048: LEAQ -256(R8), R8 MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU 32(R9), X2 MOVOU 48(R9), X3 MOVOU 64(R9), X4 MOVOU 80(R9), X5 MOVOU 96(R9), X6 MOVOU 112(R9), X7 MOVOU 128(R9), X8 MOVOU 144(R9), X9 MOVOU 160(R9), X10 MOVOU 176(R9), X11 MOVOU 192(R9), X12 MOVOU 208(R9), X13 MOVOU 224(R9), X14 MOVOU 240(R9), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ R8, $0x00000100 LEAQ 256(R9), R9 LEAQ 256(AX), AX JGE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_256through2048 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_tail memmove_end_copy_repeat_emit_encodeBlockAsm8B: MOVQ BP, AX emit_literal_done_repeat_emit_encodeBlockAsm8B: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), R8 SUBL CX, R8 LEAQ (DX)(CX*1), R9 LEAQ (DX)(BP*1), BP XORL R11, R11 CMPL R8, $0x08 JL matchlen_single_repeat_extend matchlen_loopback_repeat_extend: MOVQ (R9)(R11*1), R10 XORQ (BP)(R11*1), R10 TESTQ R10, R10 JZ matchlen_loop_repeat_extend BSFQ R10, R10 SARQ $0x03, R10 LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeBlockAsm8B matchlen_loop_repeat_extend: LEAL -8(R8), R8 LEAL 8(R11), R11 CMPL R8, $0x08 JGE matchlen_loopback_repeat_extend matchlen_single_repeat_extend: TESTL R8, R8 JZ repeat_extend_forward_end_encodeBlockAsm8B matchlen_single_loopback_repeat_extend: MOVB (R9)(R11*1), R10 CMPB (BP)(R11*1), R10 JNE repeat_extend_forward_end_encodeBlockAsm8B LEAL 1(R11), R11 DECL R8 JNZ matchlen_single_loopback_repeat_extend repeat_extend_forward_end_encodeBlockAsm8B: ADDL R11, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI TESTL DI, DI JZ repeat_as_copy_encodeBlockAsm8B emit_repeat_again_match_repeat_encodeBlockAsm8B: MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_match_repeat_encodeBlockAsm8B CMPL DI, $0x0c JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm8B CMPL SI, $0x00000800 JLT repeat_two_offset_match_repeat_encodeBlockAsm8B cant_repeat_two_offset_match_repeat_encodeBlockAsm8B: CMPL BP, $0x00000104 JLT repeat_three_match_repeat_encodeBlockAsm8B CMPL BP, $0x00010100 JLT repeat_four_match_repeat_encodeBlockAsm8B CMPL BP, $0x0100ffff JLT repeat_five_match_repeat_encodeBlockAsm8B LEAL -16842747(BP), BP MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_repeat_encodeBlockAsm8B repeat_five_match_repeat_encodeBlockAsm8B: LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_four_match_repeat_encodeBlockAsm8B: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_three_match_repeat_encodeBlockAsm8B: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_match_repeat_encodeBlockAsm8B: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_offset_match_repeat_encodeBlockAsm8B: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_as_copy_encodeBlockAsm8B: CMPL SI, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeBlockAsm8B four_bytes_loop_back_repeat_as_copy_encodeBlockAsm8B: CMPL BP, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm8B MOVB $0xff, (AX) MOVL SI, 1(AX) LEAL -64(BP), BP ADDQ $0x05, AX CMPL BP, $0x04 JL four_bytes_remain_repeat_as_copy_encodeBlockAsm8B emit_repeat_again_repeat_as_copy_encodeBlockAsm8B_emit_copy: MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy: CMPL BP, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy CMPL BP, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm8B_emit_copy CMPL BP, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm8B_emit_copy LEAL -16842747(BP), BP MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm8B_emit_copy repeat_five_repeat_as_copy_encodeBlockAsm8B_emit_copy: LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_four_repeat_as_copy_encodeBlockAsm8B_emit_copy: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8B JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm8B four_bytes_remain_repeat_as_copy_encodeBlockAsm8B: TESTL BP, BP JZ repeat_end_emit_encodeBlockAsm8B MOVB $0x03, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVL SI, 1(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm8B two_byte_offset_repeat_as_copy_encodeBlockAsm8B: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX emit_repeat_again_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: CMPL BP, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short CMPL BP, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm8B_emit_copy_short CMPL BP, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm8B_emit_copy_short LEAL -16842747(BP), BP MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm8B_emit_copy_short repeat_five_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_four_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8B JMP two_byte_offset_repeat_as_copy_encodeBlockAsm8B two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsm8B CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm8B MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8B emit_copy_three_repeat_as_copy_encodeBlockAsm8B: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeBlockAsm8B: MOVL CX, 12(SP) CMPL CX, 8(SP) JGE emit_remainder_encodeBlockAsm8B JMP search_loop_encodeBlockAsm8B no_repeat_found_encodeBlockAsm8B: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeBlockAsm8B SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeBlockAsm8B MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeBlockAsm8B MOVL 20(SP), CX JMP search_loop_encodeBlockAsm8B candidate3_match_encodeBlockAsm8B: ADDL $0x02, CX JMP candidate_match_encodeBlockAsm8B candidate2_match_encodeBlockAsm8B: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeBlockAsm8B: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeBlockAsm8B match_extend_back_loop_encodeBlockAsm8B: CMPL CX, SI JLE match_extend_back_end_encodeBlockAsm8B MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeBlockAsm8B LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeBlockAsm8B JMP match_extend_back_loop_encodeBlockAsm8B match_extend_back_end_encodeBlockAsm8B: MOVL CX, SI SUBL 12(SP), SI LEAQ 4(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm8B: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeBlockAsm8B MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 MOVL R8, DI SUBL $0x01, DI JC emit_literal_done_match_emit_encodeBlockAsm8B CMPL DI, $0x3c JLT one_byte_match_emit_encodeBlockAsm8B CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm8B CMPL DI, $0x00010000 JLT three_bytes_match_emit_encodeBlockAsm8B CMPL DI, $0x01000000 JLT four_bytes_match_emit_encodeBlockAsm8B MOVB $0xfc, (AX) MOVL DI, 1(AX) ADDQ $0x05, AX JMP memmove_match_emit_encodeBlockAsm8B four_bytes_match_emit_encodeBlockAsm8B: MOVL DI, R9 SHRL $0x10, R9 MOVB $0xf8, (AX) MOVW DI, 1(AX) MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_match_emit_encodeBlockAsm8B three_bytes_match_emit_encodeBlockAsm8B: MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_match_emit_encodeBlockAsm8B two_bytes_match_emit_encodeBlockAsm8B: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX JMP memmove_match_emit_encodeBlockAsm8B one_byte_match_emit_encodeBlockAsm8B: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBlockAsm8B: LEAQ (AX)(R8*1), DI NOP emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_tail: TESTQ R8, R8 JEQ memmove_end_copy_match_emit_encodeBlockAsm8B CMPQ R8, $0x02 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_1or2 CMPQ R8, $0x04 JB emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_3 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_4 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_5through7 JE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_9through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32 CMPQ R8, $0x40 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64 CMPQ R8, $0x80 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_65through128 CMPQ R8, $0x00000100 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_129through256 JMP emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_256through2048 emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), SI MOVB R9, (AX) MOVB SI, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_4: MOVL (SI), R9 MOVL R9, (AX) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), SI MOVW R9, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_5through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), SI MOVL R9, (AX) MOVL SI, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8: MOVQ (SI), R9 MOVQ R9, (AX) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_9through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), SI MOVQ R9, (AX) MOVQ SI, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_65through128: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_129through256: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU -128(SI)(R8*1), X8 MOVOU -112(SI)(R8*1), X9 MOVOU -96(SI)(R8*1), X10 MOVOU -80(SI)(R8*1), X11 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(R8*1) MOVOU X9, -112(AX)(R8*1) MOVOU X10, -96(AX)(R8*1) MOVOU X11, -80(AX)(R8*1) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_256through2048: LEAQ -256(R8), R8 MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU 128(SI), X8 MOVOU 144(SI), X9 MOVOU 160(SI), X10 MOVOU 176(SI), X11 MOVOU 192(SI), X12 MOVOU 208(SI), X13 MOVOU 224(SI), X14 MOVOU 240(SI), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ R8, $0x00000100 LEAQ 256(SI), SI LEAQ 256(AX), AX JGE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_256through2048 JMP emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_tail memmove_end_copy_match_emit_encodeBlockAsm8B: MOVQ DI, AX emit_literal_done_match_emit_encodeBlockAsm8B: match_nolit_loop_encodeBlockAsm8B: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeBlockAsm8B matchlen_loopback_match_nolit_encodeBlockAsm8B: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeBlockAsm8B BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeBlockAsm8B matchlen_loop_match_nolit_encodeBlockAsm8B: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsm8B matchlen_single_match_nolit_encodeBlockAsm8B: TESTL SI, SI JZ match_nolit_end_encodeBlockAsm8B matchlen_single_loopback_match_nolit_encodeBlockAsm8B: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeBlockAsm8B LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm8B match_nolit_end_encodeBlockAsm8B: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 CMPL BP, $0x00010000 JL two_byte_offset_match_nolit_encodeBlockAsm8B four_bytes_loop_back_match_nolit_encodeBlockAsm8B: CMPL R9, $0x40 JLE four_bytes_remain_match_nolit_encodeBlockAsm8B MOVB $0xff, (AX) MOVL BP, 1(AX) LEAL -64(R9), R9 ADDQ $0x05, AX CMPL R9, $0x04 JL four_bytes_remain_match_nolit_encodeBlockAsm8B emit_repeat_again_match_nolit_encodeBlockAsm8B_emit_copy: MOVL R9, SI LEAL -4(R9), R9 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy: CMPL R9, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm8B_emit_copy CMPL R9, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm8B_emit_copy CMPL R9, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm8B_emit_copy LEAL -16842747(R9), R9 MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_encodeBlockAsm8B_emit_copy repeat_five_match_nolit_encodeBlockAsm8B_emit_copy: LEAL -65536(R9), R9 MOVL R9, BP MOVW $0x001d, (AX) MOVW R9, 2(AX) SARL $0x10, BP MOVB BP, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_four_match_nolit_encodeBlockAsm8B_emit_copy: LEAL -256(R9), R9 MOVW $0x0019, (AX) MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_three_match_nolit_encodeBlockAsm8B_emit_copy: LEAL -4(R9), R9 MOVW $0x0015, (AX) MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_two_match_nolit_encodeBlockAsm8B_emit_copy: SHLL $0x02, R9 ORL $0x01, R9 MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy: XORQ SI, SI LEAL 1(SI)(R9*4), R9 MOVB BP, 1(AX) SARL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B JMP four_bytes_loop_back_match_nolit_encodeBlockAsm8B four_bytes_remain_match_nolit_encodeBlockAsm8B: TESTL R9, R9 JZ match_nolit_emitcopy_end_encodeBlockAsm8B MOVB $0x03, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B two_byte_offset_match_nolit_encodeBlockAsm8B: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsm8B MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX emit_repeat_again_match_nolit_encodeBlockAsm8B_emit_copy_short: MOVL R9, SI LEAL -4(R9), R9 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short: CMPL R9, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short CMPL R9, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm8B_emit_copy_short CMPL R9, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm8B_emit_copy_short LEAL -16842747(R9), R9 MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_encodeBlockAsm8B_emit_copy_short repeat_five_match_nolit_encodeBlockAsm8B_emit_copy_short: LEAL -65536(R9), R9 MOVL R9, BP MOVW $0x001d, (AX) MOVW R9, 2(AX) SARL $0x10, BP MOVB BP, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_four_match_nolit_encodeBlockAsm8B_emit_copy_short: LEAL -256(R9), R9 MOVW $0x0019, (AX) MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short: LEAL -4(R9), R9 MOVW $0x0015, (AX) MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short: SHLL $0x02, R9 ORL $0x01, R9 MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short: XORQ SI, SI LEAL 1(SI)(R9*4), R9 MOVB BP, 1(AX) SARL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B JMP two_byte_offset_match_nolit_encodeBlockAsm8B two_byte_offset_short_match_nolit_encodeBlockAsm8B: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsm8B CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeBlockAsm8B MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B emit_copy_three_match_nolit_encodeBlockAsm8B: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeBlockAsm8B: MOVL CX, 12(SP) CMPL CX, 8(SP) JGE emit_remainder_encodeBlockAsm8B CMPQ AX, (SP) JL match_nolit_dst_ok_encodeBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm8B: MOVQ -2(DX)(CX*1), SI MOVQ $0x9e3779b1, BP MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, R8 SHLQ $0x20, DI IMULQ BP, DI SHRQ $0x38, DI SHLQ $0x20, R8 IMULQ BP, R8 SHRQ $0x38, R8 LEAL -2(CX), R9 MOVL 24(SP)(R8*4), BP MOVL R9, 24(SP)(DI*4) MOVL CX, 24(SP)(R8*4) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeBlockAsm8B INCL CX JMP search_loop_encodeBlockAsm8B emit_remainder_encodeBlockAsm8B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 4(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsm8B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeBlockAsm8B MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP MOVL BP, DX SUBL $0x01, DX JC emit_literal_done_emit_remainder_encodeBlockAsm8B CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsm8B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm8B CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeBlockAsm8B CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeBlockAsm8B MOVB $0xfc, (AX) MOVL DX, 1(AX) ADDQ $0x05, AX JMP memmove_emit_remainder_encodeBlockAsm8B four_bytes_emit_remainder_encodeBlockAsm8B: MOVL DX, BX SHRL $0x10, BX MOVB $0xf8, (AX) MOVW DX, 1(AX) MOVB BL, 3(AX) ADDQ $0x04, AX JMP memmove_emit_remainder_encodeBlockAsm8B three_bytes_emit_remainder_encodeBlockAsm8B: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_emit_remainder_encodeBlockAsm8B two_bytes_emit_remainder_encodeBlockAsm8B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX JMP memmove_emit_remainder_encodeBlockAsm8B one_byte_emit_remainder_encodeBlockAsm8B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeBlockAsm8B: LEAQ (AX)(BP*1), DX MOVL BP, BX NOP emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_tail: TESTQ BX, BX JEQ memmove_end_copy_emit_remainder_encodeBlockAsm8B CMPQ BX, $0x02 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_1or2 CMPQ BX, $0x04 JB emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_3 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_5through7 JE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_9through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32 CMPQ BX, $0x40 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64 CMPQ BX, $0x80 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_65through128 CMPQ BX, $0x00000100 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_129through256 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_256through2048 emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_4: MOVL (CX), BP MOVL BP, (AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_5through7: MOVL (CX), BP MOVL -4(CX)(BX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8: MOVQ (CX), BP MOVQ BP, (AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_9through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_65through128: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU -64(CX)(BX*1), X12 MOVOU -48(CX)(BX*1), X13 MOVOU -32(CX)(BX*1), X14 MOVOU -16(CX)(BX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(BX*1) MOVOU X13, -48(AX)(BX*1) MOVOU X14, -32(AX)(BX*1) MOVOU X15, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_129through256: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU -128(CX)(BX*1), X8 MOVOU -112(CX)(BX*1), X9 MOVOU -96(CX)(BX*1), X10 MOVOU -80(CX)(BX*1), X11 MOVOU -64(CX)(BX*1), X12 MOVOU -48(CX)(BX*1), X13 MOVOU -32(CX)(BX*1), X14 MOVOU -16(CX)(BX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(BX*1) MOVOU X9, -112(AX)(BX*1) MOVOU X10, -96(AX)(BX*1) MOVOU X11, -80(AX)(BX*1) MOVOU X12, -64(AX)(BX*1) MOVOU X13, -48(AX)(BX*1) MOVOU X14, -32(AX)(BX*1) MOVOU X15, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_256through2048: LEAQ -256(BX), BX MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU 128(CX), X8 MOVOU 144(CX), X9 MOVOU 160(CX), X10 MOVOU 176(CX), X11 MOVOU 192(CX), X12 MOVOU 208(CX), X13 MOVOU 224(CX), X14 MOVOU 240(CX), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ BX, $0x00000100 LEAQ 256(CX), CX LEAQ 256(AX), AX JGE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_256through2048 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_tail memmove_end_copy_emit_remainder_encodeBlockAsm8B: MOVQ DX, AX emit_literal_done_emit_remainder_encodeBlockAsm8B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeBlockAsmAvx(dst []byte, src []byte) int // Requires: AVX, SSE2 TEXT ·encodeBlockAsmAvx(SB), $65560-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000200, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeBlockAsmAvx: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeBlockAsmAvx MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeBlockAsmAvx: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x05, BP LEAL 4(CX)(BP*1), BP MOVL 8(SP), DI CMPL BP, DI JGT emit_remainder_encodeBlockAsmAvx MOVL BP, 20(SP) MOVQ $0x0000cf1bbcdcbf9b, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x10, R9 IMULQ R8, R9 SHRQ $0x32, R9 SHLQ $0x10, R10 IMULQ R8, R10 SHRQ $0x32, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x10, R9 IMULQ R8, R9 SHRQ $0x32, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeBlockAsmAvx LEAL 1(CX), SI MOVL 12(SP), DI MOVL SI, BP SUBL 16(SP), BP JZ repeat_extend_back_end_encodeBlockAsmAvx repeat_extend_back_loop_encodeBlockAsmAvx: CMPL SI, DI JLE repeat_extend_back_end_encodeBlockAsmAvx MOVB -1(DX)(BP*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeBlockAsmAvx LEAL -1(SI), SI DECL BP JNZ repeat_extend_back_loop_encodeBlockAsmAvx repeat_extend_back_end_encodeBlockAsmAvx: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsmAvx MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(BP*1), R9 SUBL BP, R8 MOVL R8, BP SUBL $0x01, BP JC emit_literal_done_repeat_emit_encodeBlockAsmAvx CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeBlockAsmAvx CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsmAvx CMPL BP, $0x00010000 JLT three_bytes_repeat_emit_encodeBlockAsmAvx CMPL BP, $0x01000000 JLT four_bytes_repeat_emit_encodeBlockAsmAvx MOVB $0xfc, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP memmove_repeat_emit_encodeBlockAsmAvx four_bytes_repeat_emit_encodeBlockAsmAvx: MOVL BP, R10 SHRL $0x10, R10 MOVB $0xf8, (AX) MOVW BP, 1(AX) MOVB R10, 3(AX) ADDQ $0x04, AX JMP memmove_repeat_emit_encodeBlockAsmAvx three_bytes_repeat_emit_encodeBlockAsmAvx: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_repeat_emit_encodeBlockAsmAvx two_bytes_repeat_emit_encodeBlockAsmAvx: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX JMP memmove_repeat_emit_encodeBlockAsmAvx one_byte_repeat_emit_encodeBlockAsmAvx: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeBlockAsmAvx: LEAQ (AX)(R8*1), BP NOP emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_tail: TESTQ R8, R8 JEQ memmove_end_copy_repeat_emit_encodeBlockAsmAvx CMPQ R8, $0x02 JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_1or2 CMPQ R8, $0x04 JB emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_3 JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_4 CMPQ R8, $0x08 JB emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_5through7 JE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_9through16 CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_17through32 CMPQ R8, $0x40 JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_33through64 CMPQ R8, $0x80 JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_65through128 CMPQ R8, $0x00000100 JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_129through256 JMP emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_avxUnaligned emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_1or2: MOVB (R9), R10 MOVB -1(R9)(R8*1), R11 MOVB R10, (AX) MOVB R11, -1(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsmAvx emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_4: MOVL (R9), R10 MOVL R10, (AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsmAvx emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_3: MOVW (R9), R10 MOVB 2(R9), R11 MOVW R10, (AX) MOVB R11, 2(AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsmAvx emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_5through7: MOVL (R9), R10 MOVL -4(R9)(R8*1), R11 MOVL R10, (AX) MOVL R11, -4(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsmAvx emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_8: MOVQ (R9), R10 MOVQ R10, (AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsmAvx emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_9through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R11 MOVQ R10, (AX) MOVQ R11, -8(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsmAvx emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsmAvx emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsmAvx emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_65through128: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU 32(R9), X2 MOVOU 48(R9), X3 MOVOU -64(R9)(R8*1), X12 MOVOU -48(R9)(R8*1), X13 MOVOU -32(R9)(R8*1), X14 MOVOU -16(R9)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsmAvx emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_129through256: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU 32(R9), X2 MOVOU 48(R9), X3 MOVOU 64(R9), X4 MOVOU 80(R9), X5 MOVOU 96(R9), X6 MOVOU 112(R9), X7 MOVOU -128(R9)(R8*1), X8 MOVOU -112(R9)(R8*1), X9 MOVOU -96(R9)(R8*1), X10 MOVOU -80(R9)(R8*1), X11 MOVOU -64(R9)(R8*1), X12 MOVOU -48(R9)(R8*1), X13 MOVOU -32(R9)(R8*1), X14 MOVOU -16(R9)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(R8*1) MOVOU X9, -112(AX)(R8*1) MOVOU X10, -96(AX)(R8*1) MOVOU X11, -80(AX)(R8*1) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsmAvx emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_256through2048: LEAQ -256(R8), R8 MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU 32(R9), X2 MOVOU 48(R9), X3 MOVOU 64(R9), X4 MOVOU 80(R9), X5 MOVOU 96(R9), X6 MOVOU 112(R9), X7 MOVOU 128(R9), X8 MOVOU 144(R9), X9 MOVOU 160(R9), X10 MOVOU 176(R9), X11 MOVOU 192(R9), X12 MOVOU 208(R9), X13 MOVOU 224(R9), X14 MOVOU 240(R9), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ R8, $0x00000100 LEAQ 256(R9), R9 LEAQ 256(AX), AX JGE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_256through2048 JMP emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_tail emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_avxUnaligned: LEAQ (R9)(R8*1), R11 MOVQ AX, R13 MOVOU -128(R11), X5 MOVOU -112(R11), X6 MOVQ $0x00000080, R10 ANDQ $0xffffffe0, AX ADDQ $0x20, AX MOVOU -96(R11), X7 MOVOU -80(R11), X8 MOVQ AX, R12 SUBQ R13, R12 MOVOU -64(R11), X9 MOVOU -48(R11), X10 SUBQ R12, R8 MOVOU -32(R11), X11 MOVOU -16(R11), X12 VMOVDQU (R9), Y4 ADDQ R12, R9 SUBQ R10, R8 emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_gobble_128_loop: VMOVDQU (R9), Y0 VMOVDQU 32(R9), Y1 VMOVDQU 64(R9), Y2 VMOVDQU 96(R9), Y3 ADDQ R10, R9 VMOVDQA Y0, (AX) VMOVDQA Y1, 32(AX) VMOVDQA Y2, 64(AX) VMOVDQA Y3, 96(AX) ADDQ R10, AX SUBQ R10, R8 JA emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_gobble_128_loop ADDQ R10, R8 ADDQ AX, R8 VMOVDQU Y4, (R13) VZEROUPPER MOVOU X5, -128(R8) MOVOU X6, -112(R8) MOVOU X7, -96(R8) MOVOU X8, -80(R8) MOVOU X9, -64(R8) MOVOU X10, -48(R8) MOVOU X11, -32(R8) MOVOU X12, -16(R8) memmove_end_copy_repeat_emit_encodeBlockAsmAvx: MOVQ BP, AX emit_literal_done_repeat_emit_encodeBlockAsmAvx: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), R8 SUBL CX, R8 LEAQ (DX)(CX*1), R9 LEAQ (DX)(BP*1), BP XORL R11, R11 CMPL R8, $0x08 JL matchlen_single_repeat_extend matchlen_loopback_repeat_extend: MOVQ (R9)(R11*1), R10 XORQ (BP)(R11*1), R10 TESTQ R10, R10 JZ matchlen_loop_repeat_extend BSFQ R10, R10 SARQ $0x03, R10 LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeBlockAsmAvx matchlen_loop_repeat_extend: LEAL -8(R8), R8 LEAL 8(R11), R11 CMPL R8, $0x08 JGE matchlen_loopback_repeat_extend matchlen_single_repeat_extend: TESTL R8, R8 JZ repeat_extend_forward_end_encodeBlockAsmAvx matchlen_single_loopback_repeat_extend: MOVB (R9)(R11*1), R10 CMPB (BP)(R11*1), R10 JNE repeat_extend_forward_end_encodeBlockAsmAvx LEAL 1(R11), R11 DECL R8 JNZ matchlen_single_loopback_repeat_extend repeat_extend_forward_end_encodeBlockAsmAvx: ADDL R11, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI TESTL DI, DI JZ repeat_as_copy_encodeBlockAsmAvx emit_repeat_again_match_repeat_encodeBlockAsmAvx: MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_match_repeat_encodeBlockAsmAvx CMPL DI, $0x0c JGE cant_repeat_two_offset_match_repeat_encodeBlockAsmAvx CMPL SI, $0x00000800 JLT repeat_two_offset_match_repeat_encodeBlockAsmAvx cant_repeat_two_offset_match_repeat_encodeBlockAsmAvx: CMPL BP, $0x00000104 JLT repeat_three_match_repeat_encodeBlockAsmAvx CMPL BP, $0x00010100 JLT repeat_four_match_repeat_encodeBlockAsmAvx CMPL BP, $0x0100ffff JLT repeat_five_match_repeat_encodeBlockAsmAvx LEAL -16842747(BP), BP MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_repeat_encodeBlockAsmAvx repeat_five_match_repeat_encodeBlockAsmAvx: LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsmAvx repeat_four_match_repeat_encodeBlockAsmAvx: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsmAvx repeat_three_match_repeat_encodeBlockAsmAvx: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsmAvx repeat_two_match_repeat_encodeBlockAsmAvx: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsmAvx repeat_two_offset_match_repeat_encodeBlockAsmAvx: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsmAvx repeat_as_copy_encodeBlockAsmAvx: CMPL SI, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeBlockAsmAvx four_bytes_loop_back_repeat_as_copy_encodeBlockAsmAvx: CMPL BP, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeBlockAsmAvx MOVB $0xff, (AX) MOVL SI, 1(AX) LEAL -64(BP), BP ADDQ $0x05, AX CMPL BP, $0x04 JL four_bytes_remain_repeat_as_copy_encodeBlockAsmAvx emit_repeat_again_repeat_as_copy_encodeBlockAsmAvx_emit_copy: MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsmAvx_emit_copy CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy: CMPL BP, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsmAvx_emit_copy CMPL BP, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsmAvx_emit_copy CMPL BP, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsmAvx_emit_copy LEAL -16842747(BP), BP MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsmAvx_emit_copy repeat_five_repeat_as_copy_encodeBlockAsmAvx_emit_copy: LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsmAvx repeat_four_repeat_as_copy_encodeBlockAsmAvx_emit_copy: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsmAvx repeat_three_repeat_as_copy_encodeBlockAsmAvx_emit_copy: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsmAvx repeat_two_repeat_as_copy_encodeBlockAsmAvx_emit_copy: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsmAvx repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsmAvx JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsmAvx four_bytes_remain_repeat_as_copy_encodeBlockAsmAvx: TESTL BP, BP JZ repeat_end_emit_encodeBlockAsmAvx MOVB $0x03, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVL SI, 1(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsmAvx two_byte_offset_repeat_as_copy_encodeBlockAsmAvx: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsmAvx MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX emit_repeat_again_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short: MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short: CMPL BP, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short CMPL BP, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short CMPL BP, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short LEAL -16842747(BP), BP MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short repeat_five_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short: LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsmAvx repeat_four_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsmAvx repeat_three_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsmAvx repeat_two_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsmAvx repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsmAvx JMP two_byte_offset_repeat_as_copy_encodeBlockAsmAvx two_byte_offset_short_repeat_as_copy_encodeBlockAsmAvx: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsmAvx CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeBlockAsmAvx MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsmAvx emit_copy_three_repeat_as_copy_encodeBlockAsmAvx: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeBlockAsmAvx: MOVL CX, 12(SP) CMPL CX, 8(SP) JGE emit_remainder_encodeBlockAsmAvx JMP search_loop_encodeBlockAsmAvx no_repeat_found_encodeBlockAsmAvx: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeBlockAsmAvx SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeBlockAsmAvx MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeBlockAsmAvx MOVL 20(SP), CX JMP search_loop_encodeBlockAsmAvx candidate3_match_encodeBlockAsmAvx: ADDL $0x02, CX JMP candidate_match_encodeBlockAsmAvx candidate2_match_encodeBlockAsmAvx: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeBlockAsmAvx: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeBlockAsmAvx match_extend_back_loop_encodeBlockAsmAvx: CMPL CX, SI JLE match_extend_back_end_encodeBlockAsmAvx MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeBlockAsmAvx LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeBlockAsmAvx JMP match_extend_back_loop_encodeBlockAsmAvx match_extend_back_end_encodeBlockAsmAvx: MOVL CX, SI SUBL 12(SP), SI LEAQ 4(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeBlockAsmAvx MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsmAvx: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeBlockAsmAvx MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 MOVL R8, DI SUBL $0x01, DI JC emit_literal_done_match_emit_encodeBlockAsmAvx CMPL DI, $0x3c JLT one_byte_match_emit_encodeBlockAsmAvx CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsmAvx CMPL DI, $0x00010000 JLT three_bytes_match_emit_encodeBlockAsmAvx CMPL DI, $0x01000000 JLT four_bytes_match_emit_encodeBlockAsmAvx MOVB $0xfc, (AX) MOVL DI, 1(AX) ADDQ $0x05, AX JMP memmove_match_emit_encodeBlockAsmAvx four_bytes_match_emit_encodeBlockAsmAvx: MOVL DI, R9 SHRL $0x10, R9 MOVB $0xf8, (AX) MOVW DI, 1(AX) MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_match_emit_encodeBlockAsmAvx three_bytes_match_emit_encodeBlockAsmAvx: MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_match_emit_encodeBlockAsmAvx two_bytes_match_emit_encodeBlockAsmAvx: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX JMP memmove_match_emit_encodeBlockAsmAvx one_byte_match_emit_encodeBlockAsmAvx: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBlockAsmAvx: LEAQ (AX)(R8*1), DI NOP emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_tail: TESTQ R8, R8 JEQ memmove_end_copy_match_emit_encodeBlockAsmAvx CMPQ R8, $0x02 JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_1or2 CMPQ R8, $0x04 JB emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_3 JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_4 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_5through7 JE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_9through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_17through32 CMPQ R8, $0x40 JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_33through64 CMPQ R8, $0x80 JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_65through128 CMPQ R8, $0x00000100 JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_129through256 JMP emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_avxUnaligned emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), R10 MOVB R9, (AX) MOVB R10, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsmAvx emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_4: MOVL (SI), R9 MOVL R9, (AX) JMP memmove_end_copy_match_emit_encodeBlockAsmAvx emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), R10 MOVW R9, (AX) MOVB R10, 2(AX) JMP memmove_end_copy_match_emit_encodeBlockAsmAvx emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_5through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), R10 MOVL R9, (AX) MOVL R10, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsmAvx emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_8: MOVQ (SI), R9 MOVQ R9, (AX) JMP memmove_end_copy_match_emit_encodeBlockAsmAvx emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_9through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), R10 MOVQ R9, (AX) MOVQ R10, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsmAvx emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsmAvx emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsmAvx emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_65through128: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsmAvx emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_129through256: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU -128(SI)(R8*1), X8 MOVOU -112(SI)(R8*1), X9 MOVOU -96(SI)(R8*1), X10 MOVOU -80(SI)(R8*1), X11 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(R8*1) MOVOU X9, -112(AX)(R8*1) MOVOU X10, -96(AX)(R8*1) MOVOU X11, -80(AX)(R8*1) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsmAvx emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_256through2048: LEAQ -256(R8), R8 MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU 128(SI), X8 MOVOU 144(SI), X9 MOVOU 160(SI), X10 MOVOU 176(SI), X11 MOVOU 192(SI), X12 MOVOU 208(SI), X13 MOVOU 224(SI), X14 MOVOU 240(SI), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ R8, $0x00000100 LEAQ 256(SI), SI LEAQ 256(AX), AX JGE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_256through2048 JMP emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_tail emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_avxUnaligned: LEAQ (SI)(R8*1), R10 MOVQ AX, R12 MOVOU -128(R10), X5 MOVOU -112(R10), X6 MOVQ $0x00000080, R9 ANDQ $0xffffffe0, AX ADDQ $0x20, AX MOVOU -96(R10), X7 MOVOU -80(R10), X8 MOVQ AX, R11 SUBQ R12, R11 MOVOU -64(R10), X9 MOVOU -48(R10), X10 SUBQ R11, R8 MOVOU -32(R10), X11 MOVOU -16(R10), X12 VMOVDQU (SI), Y4 ADDQ R11, SI SUBQ R9, R8 emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_gobble_128_loop: VMOVDQU (SI), Y0 VMOVDQU 32(SI), Y1 VMOVDQU 64(SI), Y2 VMOVDQU 96(SI), Y3 ADDQ R9, SI VMOVDQA Y0, (AX) VMOVDQA Y1, 32(AX) VMOVDQA Y2, 64(AX) VMOVDQA Y3, 96(AX) ADDQ R9, AX SUBQ R9, R8 JA emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_gobble_128_loop ADDQ R9, R8 ADDQ AX, R8 VMOVDQU Y4, (R12) VZEROUPPER MOVOU X5, -128(R8) MOVOU X6, -112(R8) MOVOU X7, -96(R8) MOVOU X8, -80(R8) MOVOU X9, -64(R8) MOVOU X10, -48(R8) MOVOU X11, -32(R8) MOVOU X12, -16(R8) memmove_end_copy_match_emit_encodeBlockAsmAvx: MOVQ DI, AX emit_literal_done_match_emit_encodeBlockAsmAvx: match_nolit_loop_encodeBlockAsmAvx: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeBlockAsmAvx matchlen_loopback_match_nolit_encodeBlockAsmAvx: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeBlockAsmAvx BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeBlockAsmAvx matchlen_loop_match_nolit_encodeBlockAsmAvx: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsmAvx matchlen_single_match_nolit_encodeBlockAsmAvx: TESTL SI, SI JZ match_nolit_end_encodeBlockAsmAvx matchlen_single_loopback_match_nolit_encodeBlockAsmAvx: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeBlockAsmAvx LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeBlockAsmAvx match_nolit_end_encodeBlockAsmAvx: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 CMPL BP, $0x00010000 JL two_byte_offset_match_nolit_encodeBlockAsmAvx four_bytes_loop_back_match_nolit_encodeBlockAsmAvx: CMPL R9, $0x40 JLE four_bytes_remain_match_nolit_encodeBlockAsmAvx MOVB $0xff, (AX) MOVL BP, 1(AX) LEAL -64(R9), R9 ADDQ $0x05, AX CMPL R9, $0x04 JL four_bytes_remain_match_nolit_encodeBlockAsmAvx emit_repeat_again_match_nolit_encodeBlockAsmAvx_emit_copy: MOVL R9, SI LEAL -4(R9), R9 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsmAvx_emit_copy CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy: CMPL R9, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsmAvx_emit_copy CMPL R9, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsmAvx_emit_copy CMPL R9, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsmAvx_emit_copy LEAL -16842747(R9), R9 MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_encodeBlockAsmAvx_emit_copy repeat_five_match_nolit_encodeBlockAsmAvx_emit_copy: LEAL -65536(R9), R9 MOVL R9, BP MOVW $0x001d, (AX) MOVW R9, 2(AX) SARL $0x10, BP MOVB BP, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsmAvx repeat_four_match_nolit_encodeBlockAsmAvx_emit_copy: LEAL -256(R9), R9 MOVW $0x0019, (AX) MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsmAvx repeat_three_match_nolit_encodeBlockAsmAvx_emit_copy: LEAL -4(R9), R9 MOVW $0x0015, (AX) MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsmAvx repeat_two_match_nolit_encodeBlockAsmAvx_emit_copy: SHLL $0x02, R9 ORL $0x01, R9 MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsmAvx repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy: XORQ SI, SI LEAL 1(SI)(R9*4), R9 MOVB BP, 1(AX) SARL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsmAvx JMP four_bytes_loop_back_match_nolit_encodeBlockAsmAvx four_bytes_remain_match_nolit_encodeBlockAsmAvx: TESTL R9, R9 JZ match_nolit_emitcopy_end_encodeBlockAsmAvx MOVB $0x03, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsmAvx two_byte_offset_match_nolit_encodeBlockAsmAvx: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsmAvx MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX emit_repeat_again_match_nolit_encodeBlockAsmAvx_emit_copy_short: MOVL R9, SI LEAL -4(R9), R9 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsmAvx_emit_copy_short CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy_short CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy_short: CMPL R9, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsmAvx_emit_copy_short CMPL R9, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsmAvx_emit_copy_short CMPL R9, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsmAvx_emit_copy_short LEAL -16842747(R9), R9 MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_encodeBlockAsmAvx_emit_copy_short repeat_five_match_nolit_encodeBlockAsmAvx_emit_copy_short: LEAL -65536(R9), R9 MOVL R9, BP MOVW $0x001d, (AX) MOVW R9, 2(AX) SARL $0x10, BP MOVB BP, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsmAvx repeat_four_match_nolit_encodeBlockAsmAvx_emit_copy_short: LEAL -256(R9), R9 MOVW $0x0019, (AX) MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsmAvx repeat_three_match_nolit_encodeBlockAsmAvx_emit_copy_short: LEAL -4(R9), R9 MOVW $0x0015, (AX) MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsmAvx repeat_two_match_nolit_encodeBlockAsmAvx_emit_copy_short: SHLL $0x02, R9 ORL $0x01, R9 MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsmAvx repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy_short: XORQ SI, SI LEAL 1(SI)(R9*4), R9 MOVB BP, 1(AX) SARL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsmAvx JMP two_byte_offset_match_nolit_encodeBlockAsmAvx two_byte_offset_short_match_nolit_encodeBlockAsmAvx: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsmAvx CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeBlockAsmAvx MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsmAvx emit_copy_three_match_nolit_encodeBlockAsmAvx: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeBlockAsmAvx: MOVL CX, 12(SP) CMPL CX, 8(SP) JGE emit_remainder_encodeBlockAsmAvx CMPQ AX, (SP) JL match_nolit_dst_ok_encodeBlockAsmAvx MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsmAvx: MOVQ -2(DX)(CX*1), SI MOVQ $0x0000cf1bbcdcbf9b, BP MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, R8 SHLQ $0x10, DI IMULQ BP, DI SHRQ $0x32, DI SHLQ $0x10, R8 IMULQ BP, R8 SHRQ $0x32, R8 LEAL -2(CX), R9 MOVL 24(SP)(R8*4), BP MOVL R9, 24(SP)(DI*4) MOVL CX, 24(SP)(R8*4) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeBlockAsmAvx INCL CX JMP search_loop_encodeBlockAsmAvx emit_remainder_encodeBlockAsmAvx: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 4(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeBlockAsmAvx MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsmAvx: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeBlockAsmAvx MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP MOVL BP, DX SUBL $0x01, DX JC emit_literal_done_emit_remainder_encodeBlockAsmAvx CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsmAvx CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsmAvx CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeBlockAsmAvx CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeBlockAsmAvx MOVB $0xfc, (AX) MOVL DX, 1(AX) ADDQ $0x05, AX JMP memmove_emit_remainder_encodeBlockAsmAvx four_bytes_emit_remainder_encodeBlockAsmAvx: MOVL DX, BX SHRL $0x10, BX MOVB $0xf8, (AX) MOVW DX, 1(AX) MOVB BL, 3(AX) ADDQ $0x04, AX JMP memmove_emit_remainder_encodeBlockAsmAvx three_bytes_emit_remainder_encodeBlockAsmAvx: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_emit_remainder_encodeBlockAsmAvx two_bytes_emit_remainder_encodeBlockAsmAvx: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX JMP memmove_emit_remainder_encodeBlockAsmAvx one_byte_emit_remainder_encodeBlockAsmAvx: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeBlockAsmAvx: LEAQ (AX)(BP*1), DX MOVL BP, BX NOP emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_tail: TESTQ BX, BX JEQ memmove_end_copy_emit_remainder_encodeBlockAsmAvx CMPQ BX, $0x02 JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_1or2 CMPQ BX, $0x04 JB emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_3 JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_5through7 JE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_9through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_17through32 CMPQ BX, $0x40 JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_33through64 CMPQ BX, $0x80 JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_65through128 CMPQ BX, $0x00000100 JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_129through256 JMP emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_avxUnaligned emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), SI MOVB BP, (AX) MOVB SI, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsmAvx emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_4: MOVL (CX), BP MOVL BP, (AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsmAvx emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_3: MOVW (CX), BP MOVB 2(CX), SI MOVW BP, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsmAvx emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_5through7: MOVL (CX), BP MOVL -4(CX)(BX*1), SI MOVL BP, (AX) MOVL SI, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsmAvx emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_8: MOVQ (CX), BP MOVQ BP, (AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsmAvx emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_9through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), SI MOVQ BP, (AX) MOVQ SI, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsmAvx emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsmAvx emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsmAvx emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_65through128: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU -64(CX)(BX*1), X12 MOVOU -48(CX)(BX*1), X13 MOVOU -32(CX)(BX*1), X14 MOVOU -16(CX)(BX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(BX*1) MOVOU X13, -48(AX)(BX*1) MOVOU X14, -32(AX)(BX*1) MOVOU X15, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsmAvx emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_129through256: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU -128(CX)(BX*1), X8 MOVOU -112(CX)(BX*1), X9 MOVOU -96(CX)(BX*1), X10 MOVOU -80(CX)(BX*1), X11 MOVOU -64(CX)(BX*1), X12 MOVOU -48(CX)(BX*1), X13 MOVOU -32(CX)(BX*1), X14 MOVOU -16(CX)(BX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(BX*1) MOVOU X9, -112(AX)(BX*1) MOVOU X10, -96(AX)(BX*1) MOVOU X11, -80(AX)(BX*1) MOVOU X12, -64(AX)(BX*1) MOVOU X13, -48(AX)(BX*1) MOVOU X14, -32(AX)(BX*1) MOVOU X15, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsmAvx emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_256through2048: LEAQ -256(BX), BX MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU 128(CX), X8 MOVOU 144(CX), X9 MOVOU 160(CX), X10 MOVOU 176(CX), X11 MOVOU 192(CX), X12 MOVOU 208(CX), X13 MOVOU 224(CX), X14 MOVOU 240(CX), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ BX, $0x00000100 LEAQ 256(CX), CX LEAQ 256(AX), AX JGE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_256through2048 JMP emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_tail emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_avxUnaligned: LEAQ (CX)(BX*1), SI MOVQ AX, R8 MOVOU -128(SI), X5 MOVOU -112(SI), X6 MOVQ $0x00000080, BP ANDQ $0xffffffe0, AX ADDQ $0x20, AX MOVOU -96(SI), X7 MOVOU -80(SI), X8 MOVQ AX, DI SUBQ R8, DI MOVOU -64(SI), X9 MOVOU -48(SI), X10 SUBQ DI, BX MOVOU -32(SI), X11 MOVOU -16(SI), X12 VMOVDQU (CX), Y4 ADDQ DI, CX SUBQ BP, BX emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_gobble_128_loop: VMOVDQU (CX), Y0 VMOVDQU 32(CX), Y1 VMOVDQU 64(CX), Y2 VMOVDQU 96(CX), Y3 ADDQ BP, CX VMOVDQA Y0, (AX) VMOVDQA Y1, 32(AX) VMOVDQA Y2, 64(AX) VMOVDQA Y3, 96(AX) ADDQ BP, AX SUBQ BP, BX JA emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_gobble_128_loop ADDQ BP, BX ADDQ AX, BX VMOVDQU Y4, (R8) VZEROUPPER MOVOU X5, -128(BX) MOVOU X6, -112(BX) MOVOU X7, -96(BX) MOVOU X8, -80(BX) MOVOU X9, -64(BX) MOVOU X10, -48(BX) MOVOU X11, -32(BX) MOVOU X12, -16(BX) memmove_end_copy_emit_remainder_encodeBlockAsmAvx: MOVQ DX, AX emit_literal_done_emit_remainder_encodeBlockAsmAvx: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeBlockAsm12BAvx(dst []byte, src []byte) int // Requires: AVX, SSE2 TEXT ·encodeBlockAsm12BAvx(SB), $16408-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000080, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeBlockAsm12BAvx: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeBlockAsm12BAvx MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeBlockAsm12BAvx: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x05, BP LEAL 4(CX)(BP*1), BP MOVL 8(SP), DI CMPL BP, DI JGT emit_remainder_encodeBlockAsm12BAvx MOVL BP, 20(SP) MOVQ $0x000000cf1bbcdcbb, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x18, R9 IMULQ R8, R9 SHRQ $0x34, R9 SHLQ $0x18, R10 IMULQ R8, R10 SHRQ $0x34, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x18, R9 IMULQ R8, R9 SHRQ $0x34, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeBlockAsm12BAvx LEAL 1(CX), SI MOVL 12(SP), DI MOVL SI, BP SUBL 16(SP), BP JZ repeat_extend_back_end_encodeBlockAsm12BAvx repeat_extend_back_loop_encodeBlockAsm12BAvx: CMPL SI, DI JLE repeat_extend_back_end_encodeBlockAsm12BAvx MOVB -1(DX)(BP*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeBlockAsm12BAvx LEAL -1(SI), SI DECL BP JNZ repeat_extend_back_loop_encodeBlockAsm12BAvx repeat_extend_back_end_encodeBlockAsm12BAvx: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsm12BAvx MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(BP*1), R9 SUBL BP, R8 MOVL R8, BP SUBL $0x01, BP JC emit_literal_done_repeat_emit_encodeBlockAsm12BAvx CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeBlockAsm12BAvx CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm12BAvx CMPL BP, $0x00010000 JLT three_bytes_repeat_emit_encodeBlockAsm12BAvx CMPL BP, $0x01000000 JLT four_bytes_repeat_emit_encodeBlockAsm12BAvx MOVB $0xfc, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP memmove_repeat_emit_encodeBlockAsm12BAvx four_bytes_repeat_emit_encodeBlockAsm12BAvx: MOVL BP, R10 SHRL $0x10, R10 MOVB $0xf8, (AX) MOVW BP, 1(AX) MOVB R10, 3(AX) ADDQ $0x04, AX JMP memmove_repeat_emit_encodeBlockAsm12BAvx three_bytes_repeat_emit_encodeBlockAsm12BAvx: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_repeat_emit_encodeBlockAsm12BAvx two_bytes_repeat_emit_encodeBlockAsm12BAvx: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX JMP memmove_repeat_emit_encodeBlockAsm12BAvx one_byte_repeat_emit_encodeBlockAsm12BAvx: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeBlockAsm12BAvx: LEAQ (AX)(R8*1), BP NOP emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_tail: TESTQ R8, R8 JEQ memmove_end_copy_repeat_emit_encodeBlockAsm12BAvx CMPQ R8, $0x02 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_1or2 CMPQ R8, $0x04 JB emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_3 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_4 CMPQ R8, $0x08 JB emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_5through7 JE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_9through16 CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_17through32 CMPQ R8, $0x40 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_33through64 CMPQ R8, $0x80 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_65through128 CMPQ R8, $0x00000100 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_129through256 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_avxUnaligned emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_1or2: MOVB (R9), R10 MOVB -1(R9)(R8*1), R11 MOVB R10, (AX) MOVB R11, -1(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_4: MOVL (R9), R10 MOVL R10, (AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_3: MOVW (R9), R10 MOVB 2(R9), R11 MOVW R10, (AX) MOVB R11, 2(AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_5through7: MOVL (R9), R10 MOVL -4(R9)(R8*1), R11 MOVL R10, (AX) MOVL R11, -4(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_8: MOVQ (R9), R10 MOVQ R10, (AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_9through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R11 MOVQ R10, (AX) MOVQ R11, -8(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_65through128: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU 32(R9), X2 MOVOU 48(R9), X3 MOVOU -64(R9)(R8*1), X12 MOVOU -48(R9)(R8*1), X13 MOVOU -32(R9)(R8*1), X14 MOVOU -16(R9)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_129through256: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU 32(R9), X2 MOVOU 48(R9), X3 MOVOU 64(R9), X4 MOVOU 80(R9), X5 MOVOU 96(R9), X6 MOVOU 112(R9), X7 MOVOU -128(R9)(R8*1), X8 MOVOU -112(R9)(R8*1), X9 MOVOU -96(R9)(R8*1), X10 MOVOU -80(R9)(R8*1), X11 MOVOU -64(R9)(R8*1), X12 MOVOU -48(R9)(R8*1), X13 MOVOU -32(R9)(R8*1), X14 MOVOU -16(R9)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(R8*1) MOVOU X9, -112(AX)(R8*1) MOVOU X10, -96(AX)(R8*1) MOVOU X11, -80(AX)(R8*1) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_256through2048: LEAQ -256(R8), R8 MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU 32(R9), X2 MOVOU 48(R9), X3 MOVOU 64(R9), X4 MOVOU 80(R9), X5 MOVOU 96(R9), X6 MOVOU 112(R9), X7 MOVOU 128(R9), X8 MOVOU 144(R9), X9 MOVOU 160(R9), X10 MOVOU 176(R9), X11 MOVOU 192(R9), X12 MOVOU 208(R9), X13 MOVOU 224(R9), X14 MOVOU 240(R9), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ R8, $0x00000100 LEAQ 256(R9), R9 LEAQ 256(AX), AX JGE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_256through2048 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_tail emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_avxUnaligned: LEAQ (R9)(R8*1), R11 MOVQ AX, R13 MOVOU -128(R11), X5 MOVOU -112(R11), X6 MOVQ $0x00000080, R10 ANDQ $0xffffffe0, AX ADDQ $0x20, AX MOVOU -96(R11), X7 MOVOU -80(R11), X8 MOVQ AX, R12 SUBQ R13, R12 MOVOU -64(R11), X9 MOVOU -48(R11), X10 SUBQ R12, R8 MOVOU -32(R11), X11 MOVOU -16(R11), X12 VMOVDQU (R9), Y4 ADDQ R12, R9 SUBQ R10, R8 emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_gobble_128_loop: VMOVDQU (R9), Y0 VMOVDQU 32(R9), Y1 VMOVDQU 64(R9), Y2 VMOVDQU 96(R9), Y3 ADDQ R10, R9 VMOVDQA Y0, (AX) VMOVDQA Y1, 32(AX) VMOVDQA Y2, 64(AX) VMOVDQA Y3, 96(AX) ADDQ R10, AX SUBQ R10, R8 JA emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_gobble_128_loop ADDQ R10, R8 ADDQ AX, R8 VMOVDQU Y4, (R13) VZEROUPPER MOVOU X5, -128(R8) MOVOU X6, -112(R8) MOVOU X7, -96(R8) MOVOU X8, -80(R8) MOVOU X9, -64(R8) MOVOU X10, -48(R8) MOVOU X11, -32(R8) MOVOU X12, -16(R8) memmove_end_copy_repeat_emit_encodeBlockAsm12BAvx: MOVQ BP, AX emit_literal_done_repeat_emit_encodeBlockAsm12BAvx: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), R8 SUBL CX, R8 LEAQ (DX)(CX*1), R9 LEAQ (DX)(BP*1), BP XORL R11, R11 CMPL R8, $0x08 JL matchlen_single_repeat_extend matchlen_loopback_repeat_extend: MOVQ (R9)(R11*1), R10 XORQ (BP)(R11*1), R10 TESTQ R10, R10 JZ matchlen_loop_repeat_extend BSFQ R10, R10 SARQ $0x03, R10 LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeBlockAsm12BAvx matchlen_loop_repeat_extend: LEAL -8(R8), R8 LEAL 8(R11), R11 CMPL R8, $0x08 JGE matchlen_loopback_repeat_extend matchlen_single_repeat_extend: TESTL R8, R8 JZ repeat_extend_forward_end_encodeBlockAsm12BAvx matchlen_single_loopback_repeat_extend: MOVB (R9)(R11*1), R10 CMPB (BP)(R11*1), R10 JNE repeat_extend_forward_end_encodeBlockAsm12BAvx LEAL 1(R11), R11 DECL R8 JNZ matchlen_single_loopback_repeat_extend repeat_extend_forward_end_encodeBlockAsm12BAvx: ADDL R11, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI TESTL DI, DI JZ repeat_as_copy_encodeBlockAsm12BAvx emit_repeat_again_match_repeat_encodeBlockAsm12BAvx: MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_match_repeat_encodeBlockAsm12BAvx CMPL DI, $0x0c JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm12BAvx CMPL SI, $0x00000800 JLT repeat_two_offset_match_repeat_encodeBlockAsm12BAvx cant_repeat_two_offset_match_repeat_encodeBlockAsm12BAvx: CMPL BP, $0x00000104 JLT repeat_three_match_repeat_encodeBlockAsm12BAvx CMPL BP, $0x00010100 JLT repeat_four_match_repeat_encodeBlockAsm12BAvx CMPL BP, $0x0100ffff JLT repeat_five_match_repeat_encodeBlockAsm12BAvx LEAL -16842747(BP), BP MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_repeat_encodeBlockAsm12BAvx repeat_five_match_repeat_encodeBlockAsm12BAvx: LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_four_match_repeat_encodeBlockAsm12BAvx: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_three_match_repeat_encodeBlockAsm12BAvx: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_two_match_repeat_encodeBlockAsm12BAvx: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_two_offset_match_repeat_encodeBlockAsm12BAvx: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_as_copy_encodeBlockAsm12BAvx: CMPL SI, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeBlockAsm12BAvx four_bytes_loop_back_repeat_as_copy_encodeBlockAsm12BAvx: CMPL BP, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm12BAvx MOVB $0xff, (AX) MOVL SI, 1(AX) LEAL -64(BP), BP ADDQ $0x05, AX CMPL BP, $0x04 JL four_bytes_remain_repeat_as_copy_encodeBlockAsm12BAvx emit_repeat_again_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy: MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy: CMPL BP, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy CMPL BP, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy CMPL BP, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy LEAL -16842747(BP), BP MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy repeat_five_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy: LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_four_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_three_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_two_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12BAvx JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm12BAvx four_bytes_remain_repeat_as_copy_encodeBlockAsm12BAvx: TESTL BP, BP JZ repeat_end_emit_encodeBlockAsm12BAvx MOVB $0x03, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVL SI, 1(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm12BAvx two_byte_offset_repeat_as_copy_encodeBlockAsm12BAvx: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm12BAvx MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX emit_repeat_again_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short: MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short: CMPL BP, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short CMPL BP, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short CMPL BP, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short LEAL -16842747(BP), BP MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short repeat_five_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short: LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_four_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_three_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_two_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12BAvx JMP two_byte_offset_repeat_as_copy_encodeBlockAsm12BAvx two_byte_offset_short_repeat_as_copy_encodeBlockAsm12BAvx: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12BAvx CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12BAvx MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12BAvx emit_copy_three_repeat_as_copy_encodeBlockAsm12BAvx: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeBlockAsm12BAvx: MOVL CX, 12(SP) CMPL CX, 8(SP) JGE emit_remainder_encodeBlockAsm12BAvx JMP search_loop_encodeBlockAsm12BAvx no_repeat_found_encodeBlockAsm12BAvx: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeBlockAsm12BAvx SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeBlockAsm12BAvx MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeBlockAsm12BAvx MOVL 20(SP), CX JMP search_loop_encodeBlockAsm12BAvx candidate3_match_encodeBlockAsm12BAvx: ADDL $0x02, CX JMP candidate_match_encodeBlockAsm12BAvx candidate2_match_encodeBlockAsm12BAvx: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeBlockAsm12BAvx: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeBlockAsm12BAvx match_extend_back_loop_encodeBlockAsm12BAvx: CMPL CX, SI JLE match_extend_back_end_encodeBlockAsm12BAvx MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeBlockAsm12BAvx LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeBlockAsm12BAvx JMP match_extend_back_loop_encodeBlockAsm12BAvx match_extend_back_end_encodeBlockAsm12BAvx: MOVL CX, SI SUBL 12(SP), SI LEAQ 4(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeBlockAsm12BAvx MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm12BAvx: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeBlockAsm12BAvx MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 MOVL R8, DI SUBL $0x01, DI JC emit_literal_done_match_emit_encodeBlockAsm12BAvx CMPL DI, $0x3c JLT one_byte_match_emit_encodeBlockAsm12BAvx CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm12BAvx CMPL DI, $0x00010000 JLT three_bytes_match_emit_encodeBlockAsm12BAvx CMPL DI, $0x01000000 JLT four_bytes_match_emit_encodeBlockAsm12BAvx MOVB $0xfc, (AX) MOVL DI, 1(AX) ADDQ $0x05, AX JMP memmove_match_emit_encodeBlockAsm12BAvx four_bytes_match_emit_encodeBlockAsm12BAvx: MOVL DI, R9 SHRL $0x10, R9 MOVB $0xf8, (AX) MOVW DI, 1(AX) MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_match_emit_encodeBlockAsm12BAvx three_bytes_match_emit_encodeBlockAsm12BAvx: MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_match_emit_encodeBlockAsm12BAvx two_bytes_match_emit_encodeBlockAsm12BAvx: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX JMP memmove_match_emit_encodeBlockAsm12BAvx one_byte_match_emit_encodeBlockAsm12BAvx: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBlockAsm12BAvx: LEAQ (AX)(R8*1), DI NOP emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_tail: TESTQ R8, R8 JEQ memmove_end_copy_match_emit_encodeBlockAsm12BAvx CMPQ R8, $0x02 JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_1or2 CMPQ R8, $0x04 JB emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_3 JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_4 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_5through7 JE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_9through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_17through32 CMPQ R8, $0x40 JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_33through64 CMPQ R8, $0x80 JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_65through128 CMPQ R8, $0x00000100 JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_129through256 JMP emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_avxUnaligned emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), R10 MOVB R9, (AX) MOVB R10, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12BAvx emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_4: MOVL (SI), R9 MOVL R9, (AX) JMP memmove_end_copy_match_emit_encodeBlockAsm12BAvx emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), R10 MOVW R9, (AX) MOVB R10, 2(AX) JMP memmove_end_copy_match_emit_encodeBlockAsm12BAvx emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_5through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), R10 MOVL R9, (AX) MOVL R10, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12BAvx emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_8: MOVQ (SI), R9 MOVQ R9, (AX) JMP memmove_end_copy_match_emit_encodeBlockAsm12BAvx emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_9through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), R10 MOVQ R9, (AX) MOVQ R10, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12BAvx emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12BAvx emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12BAvx emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_65through128: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12BAvx emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_129through256: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU -128(SI)(R8*1), X8 MOVOU -112(SI)(R8*1), X9 MOVOU -96(SI)(R8*1), X10 MOVOU -80(SI)(R8*1), X11 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(R8*1) MOVOU X9, -112(AX)(R8*1) MOVOU X10, -96(AX)(R8*1) MOVOU X11, -80(AX)(R8*1) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12BAvx emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_256through2048: LEAQ -256(R8), R8 MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU 128(SI), X8 MOVOU 144(SI), X9 MOVOU 160(SI), X10 MOVOU 176(SI), X11 MOVOU 192(SI), X12 MOVOU 208(SI), X13 MOVOU 224(SI), X14 MOVOU 240(SI), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ R8, $0x00000100 LEAQ 256(SI), SI LEAQ 256(AX), AX JGE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_256through2048 JMP emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_tail emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_avxUnaligned: LEAQ (SI)(R8*1), R10 MOVQ AX, R12 MOVOU -128(R10), X5 MOVOU -112(R10), X6 MOVQ $0x00000080, R9 ANDQ $0xffffffe0, AX ADDQ $0x20, AX MOVOU -96(R10), X7 MOVOU -80(R10), X8 MOVQ AX, R11 SUBQ R12, R11 MOVOU -64(R10), X9 MOVOU -48(R10), X10 SUBQ R11, R8 MOVOU -32(R10), X11 MOVOU -16(R10), X12 VMOVDQU (SI), Y4 ADDQ R11, SI SUBQ R9, R8 emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_gobble_128_loop: VMOVDQU (SI), Y0 VMOVDQU 32(SI), Y1 VMOVDQU 64(SI), Y2 VMOVDQU 96(SI), Y3 ADDQ R9, SI VMOVDQA Y0, (AX) VMOVDQA Y1, 32(AX) VMOVDQA Y2, 64(AX) VMOVDQA Y3, 96(AX) ADDQ R9, AX SUBQ R9, R8 JA emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_gobble_128_loop ADDQ R9, R8 ADDQ AX, R8 VMOVDQU Y4, (R12) VZEROUPPER MOVOU X5, -128(R8) MOVOU X6, -112(R8) MOVOU X7, -96(R8) MOVOU X8, -80(R8) MOVOU X9, -64(R8) MOVOU X10, -48(R8) MOVOU X11, -32(R8) MOVOU X12, -16(R8) memmove_end_copy_match_emit_encodeBlockAsm12BAvx: MOVQ DI, AX emit_literal_done_match_emit_encodeBlockAsm12BAvx: match_nolit_loop_encodeBlockAsm12BAvx: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeBlockAsm12BAvx matchlen_loopback_match_nolit_encodeBlockAsm12BAvx: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeBlockAsm12BAvx BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeBlockAsm12BAvx matchlen_loop_match_nolit_encodeBlockAsm12BAvx: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsm12BAvx matchlen_single_match_nolit_encodeBlockAsm12BAvx: TESTL SI, SI JZ match_nolit_end_encodeBlockAsm12BAvx matchlen_single_loopback_match_nolit_encodeBlockAsm12BAvx: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeBlockAsm12BAvx LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm12BAvx match_nolit_end_encodeBlockAsm12BAvx: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 CMPL BP, $0x00010000 JL two_byte_offset_match_nolit_encodeBlockAsm12BAvx four_bytes_loop_back_match_nolit_encodeBlockAsm12BAvx: CMPL R9, $0x40 JLE four_bytes_remain_match_nolit_encodeBlockAsm12BAvx MOVB $0xff, (AX) MOVL BP, 1(AX) LEAL -64(R9), R9 ADDQ $0x05, AX CMPL R9, $0x04 JL four_bytes_remain_match_nolit_encodeBlockAsm12BAvx emit_repeat_again_match_nolit_encodeBlockAsm12BAvx_emit_copy: MOVL R9, SI LEAL -4(R9), R9 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm12BAvx_emit_copy CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy: CMPL R9, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm12BAvx_emit_copy CMPL R9, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm12BAvx_emit_copy CMPL R9, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm12BAvx_emit_copy LEAL -16842747(R9), R9 MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_encodeBlockAsm12BAvx_emit_copy repeat_five_match_nolit_encodeBlockAsm12BAvx_emit_copy: LEAL -65536(R9), R9 MOVL R9, BP MOVW $0x001d, (AX) MOVW R9, 2(AX) SARL $0x10, BP MOVB BP, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx repeat_four_match_nolit_encodeBlockAsm12BAvx_emit_copy: LEAL -256(R9), R9 MOVW $0x0019, (AX) MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx repeat_three_match_nolit_encodeBlockAsm12BAvx_emit_copy: LEAL -4(R9), R9 MOVW $0x0015, (AX) MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx repeat_two_match_nolit_encodeBlockAsm12BAvx_emit_copy: SHLL $0x02, R9 ORL $0x01, R9 MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy: XORQ SI, SI LEAL 1(SI)(R9*4), R9 MOVB BP, 1(AX) SARL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx JMP four_bytes_loop_back_match_nolit_encodeBlockAsm12BAvx four_bytes_remain_match_nolit_encodeBlockAsm12BAvx: TESTL R9, R9 JZ match_nolit_emitcopy_end_encodeBlockAsm12BAvx MOVB $0x03, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx two_byte_offset_match_nolit_encodeBlockAsm12BAvx: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsm12BAvx MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX emit_repeat_again_match_nolit_encodeBlockAsm12BAvx_emit_copy_short: MOVL R9, SI LEAL -4(R9), R9 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm12BAvx_emit_copy_short CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy_short CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy_short: CMPL R9, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm12BAvx_emit_copy_short CMPL R9, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm12BAvx_emit_copy_short CMPL R9, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm12BAvx_emit_copy_short LEAL -16842747(R9), R9 MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_encodeBlockAsm12BAvx_emit_copy_short repeat_five_match_nolit_encodeBlockAsm12BAvx_emit_copy_short: LEAL -65536(R9), R9 MOVL R9, BP MOVW $0x001d, (AX) MOVW R9, 2(AX) SARL $0x10, BP MOVB BP, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx repeat_four_match_nolit_encodeBlockAsm12BAvx_emit_copy_short: LEAL -256(R9), R9 MOVW $0x0019, (AX) MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx repeat_three_match_nolit_encodeBlockAsm12BAvx_emit_copy_short: LEAL -4(R9), R9 MOVW $0x0015, (AX) MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx repeat_two_match_nolit_encodeBlockAsm12BAvx_emit_copy_short: SHLL $0x02, R9 ORL $0x01, R9 MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy_short: XORQ SI, SI LEAL 1(SI)(R9*4), R9 MOVB BP, 1(AX) SARL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx JMP two_byte_offset_match_nolit_encodeBlockAsm12BAvx two_byte_offset_short_match_nolit_encodeBlockAsm12BAvx: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsm12BAvx CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeBlockAsm12BAvx MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx emit_copy_three_match_nolit_encodeBlockAsm12BAvx: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeBlockAsm12BAvx: MOVL CX, 12(SP) CMPL CX, 8(SP) JGE emit_remainder_encodeBlockAsm12BAvx CMPQ AX, (SP) JL match_nolit_dst_ok_encodeBlockAsm12BAvx MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm12BAvx: MOVQ -2(DX)(CX*1), SI MOVQ $0x000000cf1bbcdcbb, BP MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, R8 SHLQ $0x18, DI IMULQ BP, DI SHRQ $0x34, DI SHLQ $0x18, R8 IMULQ BP, R8 SHRQ $0x34, R8 LEAL -2(CX), R9 MOVL 24(SP)(R8*4), BP MOVL R9, 24(SP)(DI*4) MOVL CX, 24(SP)(R8*4) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeBlockAsm12BAvx INCL CX JMP search_loop_encodeBlockAsm12BAvx emit_remainder_encodeBlockAsm12BAvx: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 4(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeBlockAsm12BAvx MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsm12BAvx: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeBlockAsm12BAvx MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP MOVL BP, DX SUBL $0x01, DX JC emit_literal_done_emit_remainder_encodeBlockAsm12BAvx CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsm12BAvx CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm12BAvx CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeBlockAsm12BAvx CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeBlockAsm12BAvx MOVB $0xfc, (AX) MOVL DX, 1(AX) ADDQ $0x05, AX JMP memmove_emit_remainder_encodeBlockAsm12BAvx four_bytes_emit_remainder_encodeBlockAsm12BAvx: MOVL DX, BX SHRL $0x10, BX MOVB $0xf8, (AX) MOVW DX, 1(AX) MOVB BL, 3(AX) ADDQ $0x04, AX JMP memmove_emit_remainder_encodeBlockAsm12BAvx three_bytes_emit_remainder_encodeBlockAsm12BAvx: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_emit_remainder_encodeBlockAsm12BAvx two_bytes_emit_remainder_encodeBlockAsm12BAvx: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX JMP memmove_emit_remainder_encodeBlockAsm12BAvx one_byte_emit_remainder_encodeBlockAsm12BAvx: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeBlockAsm12BAvx: LEAQ (AX)(BP*1), DX MOVL BP, BX NOP emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_tail: TESTQ BX, BX JEQ memmove_end_copy_emit_remainder_encodeBlockAsm12BAvx CMPQ BX, $0x02 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_1or2 CMPQ BX, $0x04 JB emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_3 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_5through7 JE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_9through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_17through32 CMPQ BX, $0x40 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_33through64 CMPQ BX, $0x80 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_65through128 CMPQ BX, $0x00000100 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_129through256 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_avxUnaligned emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), SI MOVB BP, (AX) MOVB SI, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_4: MOVL (CX), BP MOVL BP, (AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_3: MOVW (CX), BP MOVB 2(CX), SI MOVW BP, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_5through7: MOVL (CX), BP MOVL -4(CX)(BX*1), SI MOVL BP, (AX) MOVL SI, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_8: MOVQ (CX), BP MOVQ BP, (AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_9through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), SI MOVQ BP, (AX) MOVQ SI, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_65through128: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU -64(CX)(BX*1), X12 MOVOU -48(CX)(BX*1), X13 MOVOU -32(CX)(BX*1), X14 MOVOU -16(CX)(BX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(BX*1) MOVOU X13, -48(AX)(BX*1) MOVOU X14, -32(AX)(BX*1) MOVOU X15, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_129through256: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU -128(CX)(BX*1), X8 MOVOU -112(CX)(BX*1), X9 MOVOU -96(CX)(BX*1), X10 MOVOU -80(CX)(BX*1), X11 MOVOU -64(CX)(BX*1), X12 MOVOU -48(CX)(BX*1), X13 MOVOU -32(CX)(BX*1), X14 MOVOU -16(CX)(BX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(BX*1) MOVOU X9, -112(AX)(BX*1) MOVOU X10, -96(AX)(BX*1) MOVOU X11, -80(AX)(BX*1) MOVOU X12, -64(AX)(BX*1) MOVOU X13, -48(AX)(BX*1) MOVOU X14, -32(AX)(BX*1) MOVOU X15, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_256through2048: LEAQ -256(BX), BX MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU 128(CX), X8 MOVOU 144(CX), X9 MOVOU 160(CX), X10 MOVOU 176(CX), X11 MOVOU 192(CX), X12 MOVOU 208(CX), X13 MOVOU 224(CX), X14 MOVOU 240(CX), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ BX, $0x00000100 LEAQ 256(CX), CX LEAQ 256(AX), AX JGE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_256through2048 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_tail emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_avxUnaligned: LEAQ (CX)(BX*1), SI MOVQ AX, R8 MOVOU -128(SI), X5 MOVOU -112(SI), X6 MOVQ $0x00000080, BP ANDQ $0xffffffe0, AX ADDQ $0x20, AX MOVOU -96(SI), X7 MOVOU -80(SI), X8 MOVQ AX, DI SUBQ R8, DI MOVOU -64(SI), X9 MOVOU -48(SI), X10 SUBQ DI, BX MOVOU -32(SI), X11 MOVOU -16(SI), X12 VMOVDQU (CX), Y4 ADDQ DI, CX SUBQ BP, BX emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_gobble_128_loop: VMOVDQU (CX), Y0 VMOVDQU 32(CX), Y1 VMOVDQU 64(CX), Y2 VMOVDQU 96(CX), Y3 ADDQ BP, CX VMOVDQA Y0, (AX) VMOVDQA Y1, 32(AX) VMOVDQA Y2, 64(AX) VMOVDQA Y3, 96(AX) ADDQ BP, AX SUBQ BP, BX JA emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_gobble_128_loop ADDQ BP, BX ADDQ AX, BX VMOVDQU Y4, (R8) VZEROUPPER MOVOU X5, -128(BX) MOVOU X6, -112(BX) MOVOU X7, -96(BX) MOVOU X8, -80(BX) MOVOU X9, -64(BX) MOVOU X10, -48(BX) MOVOU X11, -32(BX) MOVOU X12, -16(BX) memmove_end_copy_emit_remainder_encodeBlockAsm12BAvx: MOVQ DX, AX emit_literal_done_emit_remainder_encodeBlockAsm12BAvx: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeBlockAsm10BAvx(dst []byte, src []byte) int // Requires: AVX, SSE2 TEXT ·encodeBlockAsm10BAvx(SB), $4120-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000020, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeBlockAsm10BAvx: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeBlockAsm10BAvx MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeBlockAsm10BAvx: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x05, BP LEAL 4(CX)(BP*1), BP MOVL 8(SP), DI CMPL BP, DI JGT emit_remainder_encodeBlockAsm10BAvx MOVL BP, 20(SP) MOVQ $0x9e3779b1, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x20, R9 IMULQ R8, R9 SHRQ $0x36, R9 SHLQ $0x20, R10 IMULQ R8, R10 SHRQ $0x36, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x20, R9 IMULQ R8, R9 SHRQ $0x36, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeBlockAsm10BAvx LEAL 1(CX), SI MOVL 12(SP), DI MOVL SI, BP SUBL 16(SP), BP JZ repeat_extend_back_end_encodeBlockAsm10BAvx repeat_extend_back_loop_encodeBlockAsm10BAvx: CMPL SI, DI JLE repeat_extend_back_end_encodeBlockAsm10BAvx MOVB -1(DX)(BP*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeBlockAsm10BAvx LEAL -1(SI), SI DECL BP JNZ repeat_extend_back_loop_encodeBlockAsm10BAvx repeat_extend_back_end_encodeBlockAsm10BAvx: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsm10BAvx MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(BP*1), R9 SUBL BP, R8 MOVL R8, BP SUBL $0x01, BP JC emit_literal_done_repeat_emit_encodeBlockAsm10BAvx CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeBlockAsm10BAvx CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm10BAvx CMPL BP, $0x00010000 JLT three_bytes_repeat_emit_encodeBlockAsm10BAvx CMPL BP, $0x01000000 JLT four_bytes_repeat_emit_encodeBlockAsm10BAvx MOVB $0xfc, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP memmove_repeat_emit_encodeBlockAsm10BAvx four_bytes_repeat_emit_encodeBlockAsm10BAvx: MOVL BP, R10 SHRL $0x10, R10 MOVB $0xf8, (AX) MOVW BP, 1(AX) MOVB R10, 3(AX) ADDQ $0x04, AX JMP memmove_repeat_emit_encodeBlockAsm10BAvx three_bytes_repeat_emit_encodeBlockAsm10BAvx: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_repeat_emit_encodeBlockAsm10BAvx two_bytes_repeat_emit_encodeBlockAsm10BAvx: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX JMP memmove_repeat_emit_encodeBlockAsm10BAvx one_byte_repeat_emit_encodeBlockAsm10BAvx: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeBlockAsm10BAvx: LEAQ (AX)(R8*1), BP NOP emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_tail: TESTQ R8, R8 JEQ memmove_end_copy_repeat_emit_encodeBlockAsm10BAvx CMPQ R8, $0x02 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_1or2 CMPQ R8, $0x04 JB emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_3 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_4 CMPQ R8, $0x08 JB emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_5through7 JE emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_9through16 CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_17through32 CMPQ R8, $0x40 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_33through64 CMPQ R8, $0x80 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_65through128 CMPQ R8, $0x00000100 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_129through256 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_avxUnaligned emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_1or2: MOVB (R9), R10 MOVB -1(R9)(R8*1), R11 MOVB R10, (AX) MOVB R11, -1(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_4: MOVL (R9), R10 MOVL R10, (AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_3: MOVW (R9), R10 MOVB 2(R9), R11 MOVW R10, (AX) MOVB R11, 2(AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_5through7: MOVL (R9), R10 MOVL -4(R9)(R8*1), R11 MOVL R10, (AX) MOVL R11, -4(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_8: MOVQ (R9), R10 MOVQ R10, (AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_9through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R11 MOVQ R10, (AX) MOVQ R11, -8(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_65through128: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU 32(R9), X2 MOVOU 48(R9), X3 MOVOU -64(R9)(R8*1), X12 MOVOU -48(R9)(R8*1), X13 MOVOU -32(R9)(R8*1), X14 MOVOU -16(R9)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_129through256: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU 32(R9), X2 MOVOU 48(R9), X3 MOVOU 64(R9), X4 MOVOU 80(R9), X5 MOVOU 96(R9), X6 MOVOU 112(R9), X7 MOVOU -128(R9)(R8*1), X8 MOVOU -112(R9)(R8*1), X9 MOVOU -96(R9)(R8*1), X10 MOVOU -80(R9)(R8*1), X11 MOVOU -64(R9)(R8*1), X12 MOVOU -48(R9)(R8*1), X13 MOVOU -32(R9)(R8*1), X14 MOVOU -16(R9)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(R8*1) MOVOU X9, -112(AX)(R8*1) MOVOU X10, -96(AX)(R8*1) MOVOU X11, -80(AX)(R8*1) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_256through2048: LEAQ -256(R8), R8 MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU 32(R9), X2 MOVOU 48(R9), X3 MOVOU 64(R9), X4 MOVOU 80(R9), X5 MOVOU 96(R9), X6 MOVOU 112(R9), X7 MOVOU 128(R9), X8 MOVOU 144(R9), X9 MOVOU 160(R9), X10 MOVOU 176(R9), X11 MOVOU 192(R9), X12 MOVOU 208(R9), X13 MOVOU 224(R9), X14 MOVOU 240(R9), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ R8, $0x00000100 LEAQ 256(R9), R9 LEAQ 256(AX), AX JGE emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_move_256through2048 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_tail emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_avxUnaligned: LEAQ (R9)(R8*1), R11 MOVQ AX, R13 MOVOU -128(R11), X5 MOVOU -112(R11), X6 MOVQ $0x00000080, R10 ANDQ $0xffffffe0, AX ADDQ $0x20, AX MOVOU -96(R11), X7 MOVOU -80(R11), X8 MOVQ AX, R12 SUBQ R13, R12 MOVOU -64(R11), X9 MOVOU -48(R11), X10 SUBQ R12, R8 MOVOU -32(R11), X11 MOVOU -16(R11), X12 VMOVDQU (R9), Y4 ADDQ R12, R9 SUBQ R10, R8 emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_gobble_128_loop: VMOVDQU (R9), Y0 VMOVDQU 32(R9), Y1 VMOVDQU 64(R9), Y2 VMOVDQU 96(R9), Y3 ADDQ R10, R9 VMOVDQA Y0, (AX) VMOVDQA Y1, 32(AX) VMOVDQA Y2, 64(AX) VMOVDQA Y3, 96(AX) ADDQ R10, AX SUBQ R10, R8 JA emit_lit_memmove_repeat_emit_encodeBlockAsm10BAvx_memmove_gobble_128_loop ADDQ R10, R8 ADDQ AX, R8 VMOVDQU Y4, (R13) VZEROUPPER MOVOU X5, -128(R8) MOVOU X6, -112(R8) MOVOU X7, -96(R8) MOVOU X8, -80(R8) MOVOU X9, -64(R8) MOVOU X10, -48(R8) MOVOU X11, -32(R8) MOVOU X12, -16(R8) memmove_end_copy_repeat_emit_encodeBlockAsm10BAvx: MOVQ BP, AX emit_literal_done_repeat_emit_encodeBlockAsm10BAvx: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), R8 SUBL CX, R8 LEAQ (DX)(CX*1), R9 LEAQ (DX)(BP*1), BP XORL R11, R11 CMPL R8, $0x08 JL matchlen_single_repeat_extend matchlen_loopback_repeat_extend: MOVQ (R9)(R11*1), R10 XORQ (BP)(R11*1), R10 TESTQ R10, R10 JZ matchlen_loop_repeat_extend BSFQ R10, R10 SARQ $0x03, R10 LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeBlockAsm10BAvx matchlen_loop_repeat_extend: LEAL -8(R8), R8 LEAL 8(R11), R11 CMPL R8, $0x08 JGE matchlen_loopback_repeat_extend matchlen_single_repeat_extend: TESTL R8, R8 JZ repeat_extend_forward_end_encodeBlockAsm10BAvx matchlen_single_loopback_repeat_extend: MOVB (R9)(R11*1), R10 CMPB (BP)(R11*1), R10 JNE repeat_extend_forward_end_encodeBlockAsm10BAvx LEAL 1(R11), R11 DECL R8 JNZ matchlen_single_loopback_repeat_extend repeat_extend_forward_end_encodeBlockAsm10BAvx: ADDL R11, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI TESTL DI, DI JZ repeat_as_copy_encodeBlockAsm10BAvx emit_repeat_again_match_repeat_encodeBlockAsm10BAvx: MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_match_repeat_encodeBlockAsm10BAvx CMPL DI, $0x0c JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm10BAvx CMPL SI, $0x00000800 JLT repeat_two_offset_match_repeat_encodeBlockAsm10BAvx cant_repeat_two_offset_match_repeat_encodeBlockAsm10BAvx: CMPL BP, $0x00000104 JLT repeat_three_match_repeat_encodeBlockAsm10BAvx CMPL BP, $0x00010100 JLT repeat_four_match_repeat_encodeBlockAsm10BAvx CMPL BP, $0x0100ffff JLT repeat_five_match_repeat_encodeBlockAsm10BAvx LEAL -16842747(BP), BP MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_repeat_encodeBlockAsm10BAvx repeat_five_match_repeat_encodeBlockAsm10BAvx: LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm10BAvx repeat_four_match_repeat_encodeBlockAsm10BAvx: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm10BAvx repeat_three_match_repeat_encodeBlockAsm10BAvx: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm10BAvx repeat_two_match_repeat_encodeBlockAsm10BAvx: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10BAvx repeat_two_offset_match_repeat_encodeBlockAsm10BAvx: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10BAvx repeat_as_copy_encodeBlockAsm10BAvx: CMPL SI, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeBlockAsm10BAvx four_bytes_loop_back_repeat_as_copy_encodeBlockAsm10BAvx: CMPL BP, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm10BAvx MOVB $0xff, (AX) MOVL SI, 1(AX) LEAL -64(BP), BP ADDQ $0x05, AX CMPL BP, $0x04 JL four_bytes_remain_repeat_as_copy_encodeBlockAsm10BAvx emit_repeat_again_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy: MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy: CMPL BP, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy CMPL BP, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy CMPL BP, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy LEAL -16842747(BP), BP MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy repeat_five_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy: LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm10BAvx repeat_four_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm10BAvx repeat_three_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm10BAvx repeat_two_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10BAvx repeat_two_offset_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10BAvx JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm10BAvx four_bytes_remain_repeat_as_copy_encodeBlockAsm10BAvx: TESTL BP, BP JZ repeat_end_emit_encodeBlockAsm10BAvx MOVB $0x03, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVL SI, 1(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm10BAvx two_byte_offset_repeat_as_copy_encodeBlockAsm10BAvx: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm10BAvx MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX emit_repeat_again_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy_short: MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy_short CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy_short CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy_short: CMPL BP, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy_short CMPL BP, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy_short CMPL BP, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy_short LEAL -16842747(BP), BP MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy_short repeat_five_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy_short: LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm10BAvx repeat_four_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy_short: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm10BAvx repeat_three_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy_short: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm10BAvx repeat_two_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy_short: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10BAvx repeat_two_offset_repeat_as_copy_encodeBlockAsm10BAvx_emit_copy_short: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10BAvx JMP two_byte_offset_repeat_as_copy_encodeBlockAsm10BAvx two_byte_offset_short_repeat_as_copy_encodeBlockAsm10BAvx: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsm10BAvx CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm10BAvx MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10BAvx emit_copy_three_repeat_as_copy_encodeBlockAsm10BAvx: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeBlockAsm10BAvx: MOVL CX, 12(SP) CMPL CX, 8(SP) JGE emit_remainder_encodeBlockAsm10BAvx JMP search_loop_encodeBlockAsm10BAvx no_repeat_found_encodeBlockAsm10BAvx: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeBlockAsm10BAvx SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeBlockAsm10BAvx MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeBlockAsm10BAvx MOVL 20(SP), CX JMP search_loop_encodeBlockAsm10BAvx candidate3_match_encodeBlockAsm10BAvx: ADDL $0x02, CX JMP candidate_match_encodeBlockAsm10BAvx candidate2_match_encodeBlockAsm10BAvx: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeBlockAsm10BAvx: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeBlockAsm10BAvx match_extend_back_loop_encodeBlockAsm10BAvx: CMPL CX, SI JLE match_extend_back_end_encodeBlockAsm10BAvx MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeBlockAsm10BAvx LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeBlockAsm10BAvx JMP match_extend_back_loop_encodeBlockAsm10BAvx match_extend_back_end_encodeBlockAsm10BAvx: MOVL CX, SI SUBL 12(SP), SI LEAQ 4(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeBlockAsm10BAvx MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm10BAvx: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeBlockAsm10BAvx MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 MOVL R8, DI SUBL $0x01, DI JC emit_literal_done_match_emit_encodeBlockAsm10BAvx CMPL DI, $0x3c JLT one_byte_match_emit_encodeBlockAsm10BAvx CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm10BAvx CMPL DI, $0x00010000 JLT three_bytes_match_emit_encodeBlockAsm10BAvx CMPL DI, $0x01000000 JLT four_bytes_match_emit_encodeBlockAsm10BAvx MOVB $0xfc, (AX) MOVL DI, 1(AX) ADDQ $0x05, AX JMP memmove_match_emit_encodeBlockAsm10BAvx four_bytes_match_emit_encodeBlockAsm10BAvx: MOVL DI, R9 SHRL $0x10, R9 MOVB $0xf8, (AX) MOVW DI, 1(AX) MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_match_emit_encodeBlockAsm10BAvx three_bytes_match_emit_encodeBlockAsm10BAvx: MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_match_emit_encodeBlockAsm10BAvx two_bytes_match_emit_encodeBlockAsm10BAvx: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX JMP memmove_match_emit_encodeBlockAsm10BAvx one_byte_match_emit_encodeBlockAsm10BAvx: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBlockAsm10BAvx: LEAQ (AX)(R8*1), DI NOP emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_tail: TESTQ R8, R8 JEQ memmove_end_copy_match_emit_encodeBlockAsm10BAvx CMPQ R8, $0x02 JBE emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_1or2 CMPQ R8, $0x04 JB emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_3 JBE emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_4 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_5through7 JE emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_9through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_17through32 CMPQ R8, $0x40 JBE emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_33through64 CMPQ R8, $0x80 JBE emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_65through128 CMPQ R8, $0x00000100 JBE emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_129through256 JMP emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_avxUnaligned emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), R10 MOVB R9, (AX) MOVB R10, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10BAvx emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_4: MOVL (SI), R9 MOVL R9, (AX) JMP memmove_end_copy_match_emit_encodeBlockAsm10BAvx emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), R10 MOVW R9, (AX) MOVB R10, 2(AX) JMP memmove_end_copy_match_emit_encodeBlockAsm10BAvx emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_5through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), R10 MOVL R9, (AX) MOVL R10, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10BAvx emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_8: MOVQ (SI), R9 MOVQ R9, (AX) JMP memmove_end_copy_match_emit_encodeBlockAsm10BAvx emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_9through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), R10 MOVQ R9, (AX) MOVQ R10, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10BAvx emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10BAvx emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10BAvx emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_65through128: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10BAvx emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_129through256: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU -128(SI)(R8*1), X8 MOVOU -112(SI)(R8*1), X9 MOVOU -96(SI)(R8*1), X10 MOVOU -80(SI)(R8*1), X11 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(R8*1) MOVOU X9, -112(AX)(R8*1) MOVOU X10, -96(AX)(R8*1) MOVOU X11, -80(AX)(R8*1) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10BAvx emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_256through2048: LEAQ -256(R8), R8 MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU 128(SI), X8 MOVOU 144(SI), X9 MOVOU 160(SI), X10 MOVOU 176(SI), X11 MOVOU 192(SI), X12 MOVOU 208(SI), X13 MOVOU 224(SI), X14 MOVOU 240(SI), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ R8, $0x00000100 LEAQ 256(SI), SI LEAQ 256(AX), AX JGE emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_move_256through2048 JMP emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_tail emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_avxUnaligned: LEAQ (SI)(R8*1), R10 MOVQ AX, R12 MOVOU -128(R10), X5 MOVOU -112(R10), X6 MOVQ $0x00000080, R9 ANDQ $0xffffffe0, AX ADDQ $0x20, AX MOVOU -96(R10), X7 MOVOU -80(R10), X8 MOVQ AX, R11 SUBQ R12, R11 MOVOU -64(R10), X9 MOVOU -48(R10), X10 SUBQ R11, R8 MOVOU -32(R10), X11 MOVOU -16(R10), X12 VMOVDQU (SI), Y4 ADDQ R11, SI SUBQ R9, R8 emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_gobble_128_loop: VMOVDQU (SI), Y0 VMOVDQU 32(SI), Y1 VMOVDQU 64(SI), Y2 VMOVDQU 96(SI), Y3 ADDQ R9, SI VMOVDQA Y0, (AX) VMOVDQA Y1, 32(AX) VMOVDQA Y2, 64(AX) VMOVDQA Y3, 96(AX) ADDQ R9, AX SUBQ R9, R8 JA emit_lit_memmove_match_emit_encodeBlockAsm10BAvx_memmove_gobble_128_loop ADDQ R9, R8 ADDQ AX, R8 VMOVDQU Y4, (R12) VZEROUPPER MOVOU X5, -128(R8) MOVOU X6, -112(R8) MOVOU X7, -96(R8) MOVOU X8, -80(R8) MOVOU X9, -64(R8) MOVOU X10, -48(R8) MOVOU X11, -32(R8) MOVOU X12, -16(R8) memmove_end_copy_match_emit_encodeBlockAsm10BAvx: MOVQ DI, AX emit_literal_done_match_emit_encodeBlockAsm10BAvx: match_nolit_loop_encodeBlockAsm10BAvx: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeBlockAsm10BAvx matchlen_loopback_match_nolit_encodeBlockAsm10BAvx: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeBlockAsm10BAvx BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeBlockAsm10BAvx matchlen_loop_match_nolit_encodeBlockAsm10BAvx: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsm10BAvx matchlen_single_match_nolit_encodeBlockAsm10BAvx: TESTL SI, SI JZ match_nolit_end_encodeBlockAsm10BAvx matchlen_single_loopback_match_nolit_encodeBlockAsm10BAvx: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeBlockAsm10BAvx LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm10BAvx match_nolit_end_encodeBlockAsm10BAvx: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 CMPL BP, $0x00010000 JL two_byte_offset_match_nolit_encodeBlockAsm10BAvx four_bytes_loop_back_match_nolit_encodeBlockAsm10BAvx: CMPL R9, $0x40 JLE four_bytes_remain_match_nolit_encodeBlockAsm10BAvx MOVB $0xff, (AX) MOVL BP, 1(AX) LEAL -64(R9), R9 ADDQ $0x05, AX CMPL R9, $0x04 JL four_bytes_remain_match_nolit_encodeBlockAsm10BAvx emit_repeat_again_match_nolit_encodeBlockAsm10BAvx_emit_copy: MOVL R9, SI LEAL -4(R9), R9 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm10BAvx_emit_copy CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm10BAvx_emit_copy CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm10BAvx_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm10BAvx_emit_copy: CMPL R9, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm10BAvx_emit_copy CMPL R9, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm10BAvx_emit_copy CMPL R9, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm10BAvx_emit_copy LEAL -16842747(R9), R9 MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_encodeBlockAsm10BAvx_emit_copy repeat_five_match_nolit_encodeBlockAsm10BAvx_emit_copy: LEAL -65536(R9), R9 MOVL R9, BP MOVW $0x001d, (AX) MOVW R9, 2(AX) SARL $0x10, BP MOVB BP, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10BAvx repeat_four_match_nolit_encodeBlockAsm10BAvx_emit_copy: LEAL -256(R9), R9 MOVW $0x0019, (AX) MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10BAvx repeat_three_match_nolit_encodeBlockAsm10BAvx_emit_copy: LEAL -4(R9), R9 MOVW $0x0015, (AX) MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10BAvx repeat_two_match_nolit_encodeBlockAsm10BAvx_emit_copy: SHLL $0x02, R9 ORL $0x01, R9 MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10BAvx repeat_two_offset_match_nolit_encodeBlockAsm10BAvx_emit_copy: XORQ SI, SI LEAL 1(SI)(R9*4), R9 MOVB BP, 1(AX) SARL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10BAvx JMP four_bytes_loop_back_match_nolit_encodeBlockAsm10BAvx four_bytes_remain_match_nolit_encodeBlockAsm10BAvx: TESTL R9, R9 JZ match_nolit_emitcopy_end_encodeBlockAsm10BAvx MOVB $0x03, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10BAvx two_byte_offset_match_nolit_encodeBlockAsm10BAvx: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsm10BAvx MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX emit_repeat_again_match_nolit_encodeBlockAsm10BAvx_emit_copy_short: MOVL R9, SI LEAL -4(R9), R9 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm10BAvx_emit_copy_short CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm10BAvx_emit_copy_short CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm10BAvx_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm10BAvx_emit_copy_short: CMPL R9, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm10BAvx_emit_copy_short CMPL R9, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm10BAvx_emit_copy_short CMPL R9, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm10BAvx_emit_copy_short LEAL -16842747(R9), R9 MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_encodeBlockAsm10BAvx_emit_copy_short repeat_five_match_nolit_encodeBlockAsm10BAvx_emit_copy_short: LEAL -65536(R9), R9 MOVL R9, BP MOVW $0x001d, (AX) MOVW R9, 2(AX) SARL $0x10, BP MOVB BP, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10BAvx repeat_four_match_nolit_encodeBlockAsm10BAvx_emit_copy_short: LEAL -256(R9), R9 MOVW $0x0019, (AX) MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10BAvx repeat_three_match_nolit_encodeBlockAsm10BAvx_emit_copy_short: LEAL -4(R9), R9 MOVW $0x0015, (AX) MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10BAvx repeat_two_match_nolit_encodeBlockAsm10BAvx_emit_copy_short: SHLL $0x02, R9 ORL $0x01, R9 MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10BAvx repeat_two_offset_match_nolit_encodeBlockAsm10BAvx_emit_copy_short: XORQ SI, SI LEAL 1(SI)(R9*4), R9 MOVB BP, 1(AX) SARL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10BAvx JMP two_byte_offset_match_nolit_encodeBlockAsm10BAvx two_byte_offset_short_match_nolit_encodeBlockAsm10BAvx: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsm10BAvx CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeBlockAsm10BAvx MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10BAvx emit_copy_three_match_nolit_encodeBlockAsm10BAvx: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeBlockAsm10BAvx: MOVL CX, 12(SP) CMPL CX, 8(SP) JGE emit_remainder_encodeBlockAsm10BAvx CMPQ AX, (SP) JL match_nolit_dst_ok_encodeBlockAsm10BAvx MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm10BAvx: MOVQ -2(DX)(CX*1), SI MOVQ $0x9e3779b1, BP MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, R8 SHLQ $0x20, DI IMULQ BP, DI SHRQ $0x36, DI SHLQ $0x20, R8 IMULQ BP, R8 SHRQ $0x36, R8 LEAL -2(CX), R9 MOVL 24(SP)(R8*4), BP MOVL R9, 24(SP)(DI*4) MOVL CX, 24(SP)(R8*4) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeBlockAsm10BAvx INCL CX JMP search_loop_encodeBlockAsm10BAvx emit_remainder_encodeBlockAsm10BAvx: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 4(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeBlockAsm10BAvx MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsm10BAvx: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeBlockAsm10BAvx MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP MOVL BP, DX SUBL $0x01, DX JC emit_literal_done_emit_remainder_encodeBlockAsm10BAvx CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsm10BAvx CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm10BAvx CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeBlockAsm10BAvx CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeBlockAsm10BAvx MOVB $0xfc, (AX) MOVL DX, 1(AX) ADDQ $0x05, AX JMP memmove_emit_remainder_encodeBlockAsm10BAvx four_bytes_emit_remainder_encodeBlockAsm10BAvx: MOVL DX, BX SHRL $0x10, BX MOVB $0xf8, (AX) MOVW DX, 1(AX) MOVB BL, 3(AX) ADDQ $0x04, AX JMP memmove_emit_remainder_encodeBlockAsm10BAvx three_bytes_emit_remainder_encodeBlockAsm10BAvx: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_emit_remainder_encodeBlockAsm10BAvx two_bytes_emit_remainder_encodeBlockAsm10BAvx: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX JMP memmove_emit_remainder_encodeBlockAsm10BAvx one_byte_emit_remainder_encodeBlockAsm10BAvx: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeBlockAsm10BAvx: LEAQ (AX)(BP*1), DX MOVL BP, BX NOP emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_tail: TESTQ BX, BX JEQ memmove_end_copy_emit_remainder_encodeBlockAsm10BAvx CMPQ BX, $0x02 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_1or2 CMPQ BX, $0x04 JB emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_3 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_5through7 JE emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_9through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_17through32 CMPQ BX, $0x40 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_33through64 CMPQ BX, $0x80 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_65through128 CMPQ BX, $0x00000100 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_129through256 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_avxUnaligned emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), SI MOVB BP, (AX) MOVB SI, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_4: MOVL (CX), BP MOVL BP, (AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_3: MOVW (CX), BP MOVB 2(CX), SI MOVW BP, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_5through7: MOVL (CX), BP MOVL -4(CX)(BX*1), SI MOVL BP, (AX) MOVL SI, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_8: MOVQ (CX), BP MOVQ BP, (AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_9through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), SI MOVQ BP, (AX) MOVQ SI, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_65through128: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU -64(CX)(BX*1), X12 MOVOU -48(CX)(BX*1), X13 MOVOU -32(CX)(BX*1), X14 MOVOU -16(CX)(BX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(BX*1) MOVOU X13, -48(AX)(BX*1) MOVOU X14, -32(AX)(BX*1) MOVOU X15, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_129through256: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU -128(CX)(BX*1), X8 MOVOU -112(CX)(BX*1), X9 MOVOU -96(CX)(BX*1), X10 MOVOU -80(CX)(BX*1), X11 MOVOU -64(CX)(BX*1), X12 MOVOU -48(CX)(BX*1), X13 MOVOU -32(CX)(BX*1), X14 MOVOU -16(CX)(BX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(BX*1) MOVOU X9, -112(AX)(BX*1) MOVOU X10, -96(AX)(BX*1) MOVOU X11, -80(AX)(BX*1) MOVOU X12, -64(AX)(BX*1) MOVOU X13, -48(AX)(BX*1) MOVOU X14, -32(AX)(BX*1) MOVOU X15, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_256through2048: LEAQ -256(BX), BX MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU 128(CX), X8 MOVOU 144(CX), X9 MOVOU 160(CX), X10 MOVOU 176(CX), X11 MOVOU 192(CX), X12 MOVOU 208(CX), X13 MOVOU 224(CX), X14 MOVOU 240(CX), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ BX, $0x00000100 LEAQ 256(CX), CX LEAQ 256(AX), AX JGE emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_move_256through2048 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_tail emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_avxUnaligned: LEAQ (CX)(BX*1), SI MOVQ AX, R8 MOVOU -128(SI), X5 MOVOU -112(SI), X6 MOVQ $0x00000080, BP ANDQ $0xffffffe0, AX ADDQ $0x20, AX MOVOU -96(SI), X7 MOVOU -80(SI), X8 MOVQ AX, DI SUBQ R8, DI MOVOU -64(SI), X9 MOVOU -48(SI), X10 SUBQ DI, BX MOVOU -32(SI), X11 MOVOU -16(SI), X12 VMOVDQU (CX), Y4 ADDQ DI, CX SUBQ BP, BX emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_gobble_128_loop: VMOVDQU (CX), Y0 VMOVDQU 32(CX), Y1 VMOVDQU 64(CX), Y2 VMOVDQU 96(CX), Y3 ADDQ BP, CX VMOVDQA Y0, (AX) VMOVDQA Y1, 32(AX) VMOVDQA Y2, 64(AX) VMOVDQA Y3, 96(AX) ADDQ BP, AX SUBQ BP, BX JA emit_lit_memmove_emit_remainder_encodeBlockAsm10BAvx_memmove_gobble_128_loop ADDQ BP, BX ADDQ AX, BX VMOVDQU Y4, (R8) VZEROUPPER MOVOU X5, -128(BX) MOVOU X6, -112(BX) MOVOU X7, -96(BX) MOVOU X8, -80(BX) MOVOU X9, -64(BX) MOVOU X10, -48(BX) MOVOU X11, -32(BX) MOVOU X12, -16(BX) memmove_end_copy_emit_remainder_encodeBlockAsm10BAvx: MOVQ DX, AX emit_literal_done_emit_remainder_encodeBlockAsm10BAvx: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeBlockAsm8BAvx(dst []byte, src []byte) int // Requires: AVX, SSE2 TEXT ·encodeBlockAsm8BAvx(SB), $1048-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000008, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeBlockAsm8BAvx: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeBlockAsm8BAvx MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeBlockAsm8BAvx: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x04, BP LEAL 4(CX)(BP*1), BP MOVL 8(SP), DI CMPL BP, DI JGT emit_remainder_encodeBlockAsm8BAvx MOVL BP, 20(SP) MOVQ $0x9e3779b1, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x20, R9 IMULQ R8, R9 SHRQ $0x38, R9 SHLQ $0x20, R10 IMULQ R8, R10 SHRQ $0x38, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x20, R9 IMULQ R8, R9 SHRQ $0x38, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeBlockAsm8BAvx LEAL 1(CX), SI MOVL 12(SP), DI MOVL SI, BP SUBL 16(SP), BP JZ repeat_extend_back_end_encodeBlockAsm8BAvx repeat_extend_back_loop_encodeBlockAsm8BAvx: CMPL SI, DI JLE repeat_extend_back_end_encodeBlockAsm8BAvx MOVB -1(DX)(BP*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeBlockAsm8BAvx LEAL -1(SI), SI DECL BP JNZ repeat_extend_back_loop_encodeBlockAsm8BAvx repeat_extend_back_end_encodeBlockAsm8BAvx: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsm8BAvx MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(BP*1), R9 SUBL BP, R8 MOVL R8, BP SUBL $0x01, BP JC emit_literal_done_repeat_emit_encodeBlockAsm8BAvx CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeBlockAsm8BAvx CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm8BAvx CMPL BP, $0x00010000 JLT three_bytes_repeat_emit_encodeBlockAsm8BAvx CMPL BP, $0x01000000 JLT four_bytes_repeat_emit_encodeBlockAsm8BAvx MOVB $0xfc, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP memmove_repeat_emit_encodeBlockAsm8BAvx four_bytes_repeat_emit_encodeBlockAsm8BAvx: MOVL BP, R10 SHRL $0x10, R10 MOVB $0xf8, (AX) MOVW BP, 1(AX) MOVB R10, 3(AX) ADDQ $0x04, AX JMP memmove_repeat_emit_encodeBlockAsm8BAvx three_bytes_repeat_emit_encodeBlockAsm8BAvx: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_repeat_emit_encodeBlockAsm8BAvx two_bytes_repeat_emit_encodeBlockAsm8BAvx: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX JMP memmove_repeat_emit_encodeBlockAsm8BAvx one_byte_repeat_emit_encodeBlockAsm8BAvx: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeBlockAsm8BAvx: LEAQ (AX)(R8*1), BP NOP emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_tail: TESTQ R8, R8 JEQ memmove_end_copy_repeat_emit_encodeBlockAsm8BAvx CMPQ R8, $0x02 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_1or2 CMPQ R8, $0x04 JB emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_3 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_4 CMPQ R8, $0x08 JB emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_5through7 JE emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_9through16 CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_17through32 CMPQ R8, $0x40 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_33through64 CMPQ R8, $0x80 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_65through128 CMPQ R8, $0x00000100 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_129through256 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_avxUnaligned emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_1or2: MOVB (R9), R10 MOVB -1(R9)(R8*1), R11 MOVB R10, (AX) MOVB R11, -1(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_4: MOVL (R9), R10 MOVL R10, (AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_3: MOVW (R9), R10 MOVB 2(R9), R11 MOVW R10, (AX) MOVB R11, 2(AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_5through7: MOVL (R9), R10 MOVL -4(R9)(R8*1), R11 MOVL R10, (AX) MOVL R11, -4(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_8: MOVQ (R9), R10 MOVQ R10, (AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_9through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R11 MOVQ R10, (AX) MOVQ R11, -8(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_65through128: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU 32(R9), X2 MOVOU 48(R9), X3 MOVOU -64(R9)(R8*1), X12 MOVOU -48(R9)(R8*1), X13 MOVOU -32(R9)(R8*1), X14 MOVOU -16(R9)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_129through256: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU 32(R9), X2 MOVOU 48(R9), X3 MOVOU 64(R9), X4 MOVOU 80(R9), X5 MOVOU 96(R9), X6 MOVOU 112(R9), X7 MOVOU -128(R9)(R8*1), X8 MOVOU -112(R9)(R8*1), X9 MOVOU -96(R9)(R8*1), X10 MOVOU -80(R9)(R8*1), X11 MOVOU -64(R9)(R8*1), X12 MOVOU -48(R9)(R8*1), X13 MOVOU -32(R9)(R8*1), X14 MOVOU -16(R9)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(R8*1) MOVOU X9, -112(AX)(R8*1) MOVOU X10, -96(AX)(R8*1) MOVOU X11, -80(AX)(R8*1) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_256through2048: LEAQ -256(R8), R8 MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU 32(R9), X2 MOVOU 48(R9), X3 MOVOU 64(R9), X4 MOVOU 80(R9), X5 MOVOU 96(R9), X6 MOVOU 112(R9), X7 MOVOU 128(R9), X8 MOVOU 144(R9), X9 MOVOU 160(R9), X10 MOVOU 176(R9), X11 MOVOU 192(R9), X12 MOVOU 208(R9), X13 MOVOU 224(R9), X14 MOVOU 240(R9), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ R8, $0x00000100 LEAQ 256(R9), R9 LEAQ 256(AX), AX JGE emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_move_256through2048 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_tail emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_avxUnaligned: LEAQ (R9)(R8*1), R11 MOVQ AX, R13 MOVOU -128(R11), X5 MOVOU -112(R11), X6 MOVQ $0x00000080, R10 ANDQ $0xffffffe0, AX ADDQ $0x20, AX MOVOU -96(R11), X7 MOVOU -80(R11), X8 MOVQ AX, R12 SUBQ R13, R12 MOVOU -64(R11), X9 MOVOU -48(R11), X10 SUBQ R12, R8 MOVOU -32(R11), X11 MOVOU -16(R11), X12 VMOVDQU (R9), Y4 ADDQ R12, R9 SUBQ R10, R8 emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_gobble_128_loop: VMOVDQU (R9), Y0 VMOVDQU 32(R9), Y1 VMOVDQU 64(R9), Y2 VMOVDQU 96(R9), Y3 ADDQ R10, R9 VMOVDQA Y0, (AX) VMOVDQA Y1, 32(AX) VMOVDQA Y2, 64(AX) VMOVDQA Y3, 96(AX) ADDQ R10, AX SUBQ R10, R8 JA emit_lit_memmove_repeat_emit_encodeBlockAsm8BAvx_memmove_gobble_128_loop ADDQ R10, R8 ADDQ AX, R8 VMOVDQU Y4, (R13) VZEROUPPER MOVOU X5, -128(R8) MOVOU X6, -112(R8) MOVOU X7, -96(R8) MOVOU X8, -80(R8) MOVOU X9, -64(R8) MOVOU X10, -48(R8) MOVOU X11, -32(R8) MOVOU X12, -16(R8) memmove_end_copy_repeat_emit_encodeBlockAsm8BAvx: MOVQ BP, AX emit_literal_done_repeat_emit_encodeBlockAsm8BAvx: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), R8 SUBL CX, R8 LEAQ (DX)(CX*1), R9 LEAQ (DX)(BP*1), BP XORL R11, R11 CMPL R8, $0x08 JL matchlen_single_repeat_extend matchlen_loopback_repeat_extend: MOVQ (R9)(R11*1), R10 XORQ (BP)(R11*1), R10 TESTQ R10, R10 JZ matchlen_loop_repeat_extend BSFQ R10, R10 SARQ $0x03, R10 LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeBlockAsm8BAvx matchlen_loop_repeat_extend: LEAL -8(R8), R8 LEAL 8(R11), R11 CMPL R8, $0x08 JGE matchlen_loopback_repeat_extend matchlen_single_repeat_extend: TESTL R8, R8 JZ repeat_extend_forward_end_encodeBlockAsm8BAvx matchlen_single_loopback_repeat_extend: MOVB (R9)(R11*1), R10 CMPB (BP)(R11*1), R10 JNE repeat_extend_forward_end_encodeBlockAsm8BAvx LEAL 1(R11), R11 DECL R8 JNZ matchlen_single_loopback_repeat_extend repeat_extend_forward_end_encodeBlockAsm8BAvx: ADDL R11, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI TESTL DI, DI JZ repeat_as_copy_encodeBlockAsm8BAvx emit_repeat_again_match_repeat_encodeBlockAsm8BAvx: MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_match_repeat_encodeBlockAsm8BAvx CMPL DI, $0x0c JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm8BAvx CMPL SI, $0x00000800 JLT repeat_two_offset_match_repeat_encodeBlockAsm8BAvx cant_repeat_two_offset_match_repeat_encodeBlockAsm8BAvx: CMPL BP, $0x00000104 JLT repeat_three_match_repeat_encodeBlockAsm8BAvx CMPL BP, $0x00010100 JLT repeat_four_match_repeat_encodeBlockAsm8BAvx CMPL BP, $0x0100ffff JLT repeat_five_match_repeat_encodeBlockAsm8BAvx LEAL -16842747(BP), BP MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_repeat_encodeBlockAsm8BAvx repeat_five_match_repeat_encodeBlockAsm8BAvx: LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm8BAvx repeat_four_match_repeat_encodeBlockAsm8BAvx: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm8BAvx repeat_three_match_repeat_encodeBlockAsm8BAvx: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm8BAvx repeat_two_match_repeat_encodeBlockAsm8BAvx: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8BAvx repeat_two_offset_match_repeat_encodeBlockAsm8BAvx: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8BAvx repeat_as_copy_encodeBlockAsm8BAvx: CMPL SI, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeBlockAsm8BAvx four_bytes_loop_back_repeat_as_copy_encodeBlockAsm8BAvx: CMPL BP, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm8BAvx MOVB $0xff, (AX) MOVL SI, 1(AX) LEAL -64(BP), BP ADDQ $0x05, AX CMPL BP, $0x04 JL four_bytes_remain_repeat_as_copy_encodeBlockAsm8BAvx emit_repeat_again_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy: MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy: CMPL BP, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy CMPL BP, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy CMPL BP, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy LEAL -16842747(BP), BP MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy repeat_five_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy: LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm8BAvx repeat_four_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm8BAvx repeat_three_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm8BAvx repeat_two_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8BAvx repeat_two_offset_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8BAvx JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm8BAvx four_bytes_remain_repeat_as_copy_encodeBlockAsm8BAvx: TESTL BP, BP JZ repeat_end_emit_encodeBlockAsm8BAvx MOVB $0x03, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVL SI, 1(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm8BAvx two_byte_offset_repeat_as_copy_encodeBlockAsm8BAvx: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm8BAvx MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX emit_repeat_again_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy_short: MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy_short CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy_short CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy_short: CMPL BP, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy_short CMPL BP, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy_short CMPL BP, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy_short LEAL -16842747(BP), BP MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy_short repeat_five_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy_short: LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm8BAvx repeat_four_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy_short: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm8BAvx repeat_three_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy_short: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm8BAvx repeat_two_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy_short: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8BAvx repeat_two_offset_repeat_as_copy_encodeBlockAsm8BAvx_emit_copy_short: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8BAvx JMP two_byte_offset_repeat_as_copy_encodeBlockAsm8BAvx two_byte_offset_short_repeat_as_copy_encodeBlockAsm8BAvx: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsm8BAvx CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm8BAvx MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8BAvx emit_copy_three_repeat_as_copy_encodeBlockAsm8BAvx: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeBlockAsm8BAvx: MOVL CX, 12(SP) CMPL CX, 8(SP) JGE emit_remainder_encodeBlockAsm8BAvx JMP search_loop_encodeBlockAsm8BAvx no_repeat_found_encodeBlockAsm8BAvx: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeBlockAsm8BAvx SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeBlockAsm8BAvx MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeBlockAsm8BAvx MOVL 20(SP), CX JMP search_loop_encodeBlockAsm8BAvx candidate3_match_encodeBlockAsm8BAvx: ADDL $0x02, CX JMP candidate_match_encodeBlockAsm8BAvx candidate2_match_encodeBlockAsm8BAvx: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeBlockAsm8BAvx: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeBlockAsm8BAvx match_extend_back_loop_encodeBlockAsm8BAvx: CMPL CX, SI JLE match_extend_back_end_encodeBlockAsm8BAvx MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeBlockAsm8BAvx LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeBlockAsm8BAvx JMP match_extend_back_loop_encodeBlockAsm8BAvx match_extend_back_end_encodeBlockAsm8BAvx: MOVL CX, SI SUBL 12(SP), SI LEAQ 4(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeBlockAsm8BAvx MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm8BAvx: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeBlockAsm8BAvx MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 MOVL R8, DI SUBL $0x01, DI JC emit_literal_done_match_emit_encodeBlockAsm8BAvx CMPL DI, $0x3c JLT one_byte_match_emit_encodeBlockAsm8BAvx CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm8BAvx CMPL DI, $0x00010000 JLT three_bytes_match_emit_encodeBlockAsm8BAvx CMPL DI, $0x01000000 JLT four_bytes_match_emit_encodeBlockAsm8BAvx MOVB $0xfc, (AX) MOVL DI, 1(AX) ADDQ $0x05, AX JMP memmove_match_emit_encodeBlockAsm8BAvx four_bytes_match_emit_encodeBlockAsm8BAvx: MOVL DI, R9 SHRL $0x10, R9 MOVB $0xf8, (AX) MOVW DI, 1(AX) MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_match_emit_encodeBlockAsm8BAvx three_bytes_match_emit_encodeBlockAsm8BAvx: MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_match_emit_encodeBlockAsm8BAvx two_bytes_match_emit_encodeBlockAsm8BAvx: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX JMP memmove_match_emit_encodeBlockAsm8BAvx one_byte_match_emit_encodeBlockAsm8BAvx: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBlockAsm8BAvx: LEAQ (AX)(R8*1), DI NOP emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_tail: TESTQ R8, R8 JEQ memmove_end_copy_match_emit_encodeBlockAsm8BAvx CMPQ R8, $0x02 JBE emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_1or2 CMPQ R8, $0x04 JB emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_3 JBE emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_4 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_5through7 JE emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_9through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_17through32 CMPQ R8, $0x40 JBE emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_33through64 CMPQ R8, $0x80 JBE emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_65through128 CMPQ R8, $0x00000100 JBE emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_129through256 JMP emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_avxUnaligned emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), R10 MOVB R9, (AX) MOVB R10, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8BAvx emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_4: MOVL (SI), R9 MOVL R9, (AX) JMP memmove_end_copy_match_emit_encodeBlockAsm8BAvx emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), R10 MOVW R9, (AX) MOVB R10, 2(AX) JMP memmove_end_copy_match_emit_encodeBlockAsm8BAvx emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_5through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), R10 MOVL R9, (AX) MOVL R10, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8BAvx emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_8: MOVQ (SI), R9 MOVQ R9, (AX) JMP memmove_end_copy_match_emit_encodeBlockAsm8BAvx emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_9through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), R10 MOVQ R9, (AX) MOVQ R10, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8BAvx emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8BAvx emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8BAvx emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_65through128: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8BAvx emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_129through256: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU -128(SI)(R8*1), X8 MOVOU -112(SI)(R8*1), X9 MOVOU -96(SI)(R8*1), X10 MOVOU -80(SI)(R8*1), X11 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(R8*1) MOVOU X9, -112(AX)(R8*1) MOVOU X10, -96(AX)(R8*1) MOVOU X11, -80(AX)(R8*1) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8BAvx emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_256through2048: LEAQ -256(R8), R8 MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU 128(SI), X8 MOVOU 144(SI), X9 MOVOU 160(SI), X10 MOVOU 176(SI), X11 MOVOU 192(SI), X12 MOVOU 208(SI), X13 MOVOU 224(SI), X14 MOVOU 240(SI), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ R8, $0x00000100 LEAQ 256(SI), SI LEAQ 256(AX), AX JGE emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_move_256through2048 JMP emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_tail emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_avxUnaligned: LEAQ (SI)(R8*1), R10 MOVQ AX, R12 MOVOU -128(R10), X5 MOVOU -112(R10), X6 MOVQ $0x00000080, R9 ANDQ $0xffffffe0, AX ADDQ $0x20, AX MOVOU -96(R10), X7 MOVOU -80(R10), X8 MOVQ AX, R11 SUBQ R12, R11 MOVOU -64(R10), X9 MOVOU -48(R10), X10 SUBQ R11, R8 MOVOU -32(R10), X11 MOVOU -16(R10), X12 VMOVDQU (SI), Y4 ADDQ R11, SI SUBQ R9, R8 emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_gobble_128_loop: VMOVDQU (SI), Y0 VMOVDQU 32(SI), Y1 VMOVDQU 64(SI), Y2 VMOVDQU 96(SI), Y3 ADDQ R9, SI VMOVDQA Y0, (AX) VMOVDQA Y1, 32(AX) VMOVDQA Y2, 64(AX) VMOVDQA Y3, 96(AX) ADDQ R9, AX SUBQ R9, R8 JA emit_lit_memmove_match_emit_encodeBlockAsm8BAvx_memmove_gobble_128_loop ADDQ R9, R8 ADDQ AX, R8 VMOVDQU Y4, (R12) VZEROUPPER MOVOU X5, -128(R8) MOVOU X6, -112(R8) MOVOU X7, -96(R8) MOVOU X8, -80(R8) MOVOU X9, -64(R8) MOVOU X10, -48(R8) MOVOU X11, -32(R8) MOVOU X12, -16(R8) memmove_end_copy_match_emit_encodeBlockAsm8BAvx: MOVQ DI, AX emit_literal_done_match_emit_encodeBlockAsm8BAvx: match_nolit_loop_encodeBlockAsm8BAvx: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeBlockAsm8BAvx matchlen_loopback_match_nolit_encodeBlockAsm8BAvx: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeBlockAsm8BAvx BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeBlockAsm8BAvx matchlen_loop_match_nolit_encodeBlockAsm8BAvx: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsm8BAvx matchlen_single_match_nolit_encodeBlockAsm8BAvx: TESTL SI, SI JZ match_nolit_end_encodeBlockAsm8BAvx matchlen_single_loopback_match_nolit_encodeBlockAsm8BAvx: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeBlockAsm8BAvx LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm8BAvx match_nolit_end_encodeBlockAsm8BAvx: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 CMPL BP, $0x00010000 JL two_byte_offset_match_nolit_encodeBlockAsm8BAvx four_bytes_loop_back_match_nolit_encodeBlockAsm8BAvx: CMPL R9, $0x40 JLE four_bytes_remain_match_nolit_encodeBlockAsm8BAvx MOVB $0xff, (AX) MOVL BP, 1(AX) LEAL -64(R9), R9 ADDQ $0x05, AX CMPL R9, $0x04 JL four_bytes_remain_match_nolit_encodeBlockAsm8BAvx emit_repeat_again_match_nolit_encodeBlockAsm8BAvx_emit_copy: MOVL R9, SI LEAL -4(R9), R9 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm8BAvx_emit_copy CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm8BAvx_emit_copy CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm8BAvx_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm8BAvx_emit_copy: CMPL R9, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm8BAvx_emit_copy CMPL R9, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm8BAvx_emit_copy CMPL R9, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm8BAvx_emit_copy LEAL -16842747(R9), R9 MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_encodeBlockAsm8BAvx_emit_copy repeat_five_match_nolit_encodeBlockAsm8BAvx_emit_copy: LEAL -65536(R9), R9 MOVL R9, BP MOVW $0x001d, (AX) MOVW R9, 2(AX) SARL $0x10, BP MOVB BP, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8BAvx repeat_four_match_nolit_encodeBlockAsm8BAvx_emit_copy: LEAL -256(R9), R9 MOVW $0x0019, (AX) MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8BAvx repeat_three_match_nolit_encodeBlockAsm8BAvx_emit_copy: LEAL -4(R9), R9 MOVW $0x0015, (AX) MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8BAvx repeat_two_match_nolit_encodeBlockAsm8BAvx_emit_copy: SHLL $0x02, R9 ORL $0x01, R9 MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8BAvx repeat_two_offset_match_nolit_encodeBlockAsm8BAvx_emit_copy: XORQ SI, SI LEAL 1(SI)(R9*4), R9 MOVB BP, 1(AX) SARL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8BAvx JMP four_bytes_loop_back_match_nolit_encodeBlockAsm8BAvx four_bytes_remain_match_nolit_encodeBlockAsm8BAvx: TESTL R9, R9 JZ match_nolit_emitcopy_end_encodeBlockAsm8BAvx MOVB $0x03, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8BAvx two_byte_offset_match_nolit_encodeBlockAsm8BAvx: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsm8BAvx MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX emit_repeat_again_match_nolit_encodeBlockAsm8BAvx_emit_copy_short: MOVL R9, SI LEAL -4(R9), R9 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm8BAvx_emit_copy_short CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm8BAvx_emit_copy_short CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm8BAvx_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm8BAvx_emit_copy_short: CMPL R9, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm8BAvx_emit_copy_short CMPL R9, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm8BAvx_emit_copy_short CMPL R9, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm8BAvx_emit_copy_short LEAL -16842747(R9), R9 MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_encodeBlockAsm8BAvx_emit_copy_short repeat_five_match_nolit_encodeBlockAsm8BAvx_emit_copy_short: LEAL -65536(R9), R9 MOVL R9, BP MOVW $0x001d, (AX) MOVW R9, 2(AX) SARL $0x10, BP MOVB BP, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8BAvx repeat_four_match_nolit_encodeBlockAsm8BAvx_emit_copy_short: LEAL -256(R9), R9 MOVW $0x0019, (AX) MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8BAvx repeat_three_match_nolit_encodeBlockAsm8BAvx_emit_copy_short: LEAL -4(R9), R9 MOVW $0x0015, (AX) MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8BAvx repeat_two_match_nolit_encodeBlockAsm8BAvx_emit_copy_short: SHLL $0x02, R9 ORL $0x01, R9 MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8BAvx repeat_two_offset_match_nolit_encodeBlockAsm8BAvx_emit_copy_short: XORQ SI, SI LEAL 1(SI)(R9*4), R9 MOVB BP, 1(AX) SARL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8BAvx JMP two_byte_offset_match_nolit_encodeBlockAsm8BAvx two_byte_offset_short_match_nolit_encodeBlockAsm8BAvx: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsm8BAvx CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeBlockAsm8BAvx MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8BAvx emit_copy_three_match_nolit_encodeBlockAsm8BAvx: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeBlockAsm8BAvx: MOVL CX, 12(SP) CMPL CX, 8(SP) JGE emit_remainder_encodeBlockAsm8BAvx CMPQ AX, (SP) JL match_nolit_dst_ok_encodeBlockAsm8BAvx MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm8BAvx: MOVQ -2(DX)(CX*1), SI MOVQ $0x9e3779b1, BP MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, R8 SHLQ $0x20, DI IMULQ BP, DI SHRQ $0x38, DI SHLQ $0x20, R8 IMULQ BP, R8 SHRQ $0x38, R8 LEAL -2(CX), R9 MOVL 24(SP)(R8*4), BP MOVL R9, 24(SP)(DI*4) MOVL CX, 24(SP)(R8*4) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeBlockAsm8BAvx INCL CX JMP search_loop_encodeBlockAsm8BAvx emit_remainder_encodeBlockAsm8BAvx: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 4(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeBlockAsm8BAvx MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsm8BAvx: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeBlockAsm8BAvx MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP MOVL BP, DX SUBL $0x01, DX JC emit_literal_done_emit_remainder_encodeBlockAsm8BAvx CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsm8BAvx CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm8BAvx CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeBlockAsm8BAvx CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeBlockAsm8BAvx MOVB $0xfc, (AX) MOVL DX, 1(AX) ADDQ $0x05, AX JMP memmove_emit_remainder_encodeBlockAsm8BAvx four_bytes_emit_remainder_encodeBlockAsm8BAvx: MOVL DX, BX SHRL $0x10, BX MOVB $0xf8, (AX) MOVW DX, 1(AX) MOVB BL, 3(AX) ADDQ $0x04, AX JMP memmove_emit_remainder_encodeBlockAsm8BAvx three_bytes_emit_remainder_encodeBlockAsm8BAvx: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_emit_remainder_encodeBlockAsm8BAvx two_bytes_emit_remainder_encodeBlockAsm8BAvx: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX JMP memmove_emit_remainder_encodeBlockAsm8BAvx one_byte_emit_remainder_encodeBlockAsm8BAvx: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeBlockAsm8BAvx: LEAQ (AX)(BP*1), DX MOVL BP, BX NOP emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_tail: TESTQ BX, BX JEQ memmove_end_copy_emit_remainder_encodeBlockAsm8BAvx CMPQ BX, $0x02 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_1or2 CMPQ BX, $0x04 JB emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_3 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_5through7 JE emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_9through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_17through32 CMPQ BX, $0x40 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_33through64 CMPQ BX, $0x80 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_65through128 CMPQ BX, $0x00000100 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_129through256 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_avxUnaligned emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), SI MOVB BP, (AX) MOVB SI, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_4: MOVL (CX), BP MOVL BP, (AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_3: MOVW (CX), BP MOVB 2(CX), SI MOVW BP, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_5through7: MOVL (CX), BP MOVL -4(CX)(BX*1), SI MOVL BP, (AX) MOVL SI, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_8: MOVQ (CX), BP MOVQ BP, (AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_9through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), SI MOVQ BP, (AX) MOVQ SI, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_65through128: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU -64(CX)(BX*1), X12 MOVOU -48(CX)(BX*1), X13 MOVOU -32(CX)(BX*1), X14 MOVOU -16(CX)(BX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(BX*1) MOVOU X13, -48(AX)(BX*1) MOVOU X14, -32(AX)(BX*1) MOVOU X15, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_129through256: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU -128(CX)(BX*1), X8 MOVOU -112(CX)(BX*1), X9 MOVOU -96(CX)(BX*1), X10 MOVOU -80(CX)(BX*1), X11 MOVOU -64(CX)(BX*1), X12 MOVOU -48(CX)(BX*1), X13 MOVOU -32(CX)(BX*1), X14 MOVOU -16(CX)(BX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(BX*1) MOVOU X9, -112(AX)(BX*1) MOVOU X10, -96(AX)(BX*1) MOVOU X11, -80(AX)(BX*1) MOVOU X12, -64(AX)(BX*1) MOVOU X13, -48(AX)(BX*1) MOVOU X14, -32(AX)(BX*1) MOVOU X15, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_256through2048: LEAQ -256(BX), BX MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU 128(CX), X8 MOVOU 144(CX), X9 MOVOU 160(CX), X10 MOVOU 176(CX), X11 MOVOU 192(CX), X12 MOVOU 208(CX), X13 MOVOU 224(CX), X14 MOVOU 240(CX), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ BX, $0x00000100 LEAQ 256(CX), CX LEAQ 256(AX), AX JGE emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_move_256through2048 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_tail emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_avxUnaligned: LEAQ (CX)(BX*1), SI MOVQ AX, R8 MOVOU -128(SI), X5 MOVOU -112(SI), X6 MOVQ $0x00000080, BP ANDQ $0xffffffe0, AX ADDQ $0x20, AX MOVOU -96(SI), X7 MOVOU -80(SI), X8 MOVQ AX, DI SUBQ R8, DI MOVOU -64(SI), X9 MOVOU -48(SI), X10 SUBQ DI, BX MOVOU -32(SI), X11 MOVOU -16(SI), X12 VMOVDQU (CX), Y4 ADDQ DI, CX SUBQ BP, BX emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_gobble_128_loop: VMOVDQU (CX), Y0 VMOVDQU 32(CX), Y1 VMOVDQU 64(CX), Y2 VMOVDQU 96(CX), Y3 ADDQ BP, CX VMOVDQA Y0, (AX) VMOVDQA Y1, 32(AX) VMOVDQA Y2, 64(AX) VMOVDQA Y3, 96(AX) ADDQ BP, AX SUBQ BP, BX JA emit_lit_memmove_emit_remainder_encodeBlockAsm8BAvx_memmove_gobble_128_loop ADDQ BP, BX ADDQ AX, BX VMOVDQU Y4, (R8) VZEROUPPER MOVOU X5, -128(BX) MOVOU X6, -112(BX) MOVOU X7, -96(BX) MOVOU X8, -80(BX) MOVOU X9, -64(BX) MOVOU X10, -48(BX) MOVOU X11, -32(BX) MOVOU X12, -16(BX) memmove_end_copy_emit_remainder_encodeBlockAsm8BAvx: MOVQ DX, AX emit_literal_done_emit_remainder_encodeBlockAsm8BAvx: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeSnappyBlockAsm(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeSnappyBlockAsm(SB), $65560-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000200, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeSnappyBlockAsm MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeSnappyBlockAsm: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x06, BP LEAL 4(CX)(BP*1), BP MOVL 8(SP), DI CMPL BP, DI JGT emit_remainder_encodeSnappyBlockAsm MOVL BP, 20(SP) MOVQ $0x0000cf1bbcdcbf9b, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x10, R9 IMULQ R8, R9 SHRQ $0x32, R9 SHLQ $0x10, R10 IMULQ R8, R10 SHRQ $0x32, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x10, R9 IMULQ R8, R9 SHRQ $0x32, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeSnappyBlockAsm LEAL 1(CX), SI MOVL 12(SP), BP MOVL SI, DI SUBL 16(SP), DI JZ repeat_extend_back_end_encodeSnappyBlockAsm repeat_extend_back_loop_encodeSnappyBlockAsm: CMPL SI, BP JLE repeat_extend_back_end_encodeSnappyBlockAsm MOVB -1(DX)(DI*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeSnappyBlockAsm LEAL -1(SI), SI DECL DI JNZ repeat_extend_back_loop_encodeSnappyBlockAsm repeat_extend_back_end_encodeSnappyBlockAsm: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm MOVL SI, DI MOVL SI, 12(SP) LEAQ (DX)(BP*1), R8 SUBL BP, DI MOVL DI, BP SUBL $0x01, BP JC emit_literal_done_repeat_emit_encodeSnappyBlockAsm CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeSnappyBlockAsm CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeSnappyBlockAsm CMPL BP, $0x00010000 JLT three_bytes_repeat_emit_encodeSnappyBlockAsm CMPL BP, $0x01000000 JLT four_bytes_repeat_emit_encodeSnappyBlockAsm MOVB $0xfc, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP memmove_repeat_emit_encodeSnappyBlockAsm four_bytes_repeat_emit_encodeSnappyBlockAsm: MOVL BP, R9 SHRL $0x10, R9 MOVB $0xf8, (AX) MOVW BP, 1(AX) MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_repeat_emit_encodeSnappyBlockAsm three_bytes_repeat_emit_encodeSnappyBlockAsm: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_repeat_emit_encodeSnappyBlockAsm two_bytes_repeat_emit_encodeSnappyBlockAsm: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX JMP memmove_repeat_emit_encodeSnappyBlockAsm one_byte_repeat_emit_encodeSnappyBlockAsm: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeSnappyBlockAsm: LEAQ (AX)(DI*1), BP NOP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_tail: TESTQ DI, DI JEQ memmove_end_copy_repeat_emit_encodeSnappyBlockAsm CMPQ DI, $0x02 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_1or2 CMPQ DI, $0x04 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_3 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_4 CMPQ DI, $0x08 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_5through7 JE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8 CMPQ DI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_9through16 CMPQ DI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32 CMPQ DI, $0x40 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64 CMPQ DI, $0x80 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_65through128 CMPQ DI, $0x00000100 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_129through256 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_256through2048 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_1or2: MOVB (R8), R9 MOVB -1(R8)(DI*1), R8 MOVB R9, (AX) MOVB R8, -1(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_4: MOVL (R8), R9 MOVL R9, (AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_3: MOVW (R8), R9 MOVB 2(R8), R8 MOVW R9, (AX) MOVB R8, 2(AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_5through7: MOVL (R8), R9 MOVL -4(R8)(DI*1), R8 MOVL R9, (AX) MOVL R8, -4(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8: MOVQ (R8), R9 MOVQ R9, (AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_9through16: MOVQ (R8), R9 MOVQ -8(R8)(DI*1), R8 MOVQ R9, (AX) MOVQ R8, -8(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32: MOVOU (R8), X0 MOVOU -16(R8)(DI*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU -32(R8)(DI*1), X2 MOVOU -16(R8)(DI*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DI*1) MOVOU X3, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_65through128: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU -64(R8)(DI*1), X12 MOVOU -48(R8)(DI*1), X13 MOVOU -32(R8)(DI*1), X14 MOVOU -16(R8)(DI*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(DI*1) MOVOU X13, -48(AX)(DI*1) MOVOU X14, -32(AX)(DI*1) MOVOU X15, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_129through256: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU 64(R8), X4 MOVOU 80(R8), X5 MOVOU 96(R8), X6 MOVOU 112(R8), X7 MOVOU -128(R8)(DI*1), X8 MOVOU -112(R8)(DI*1), X9 MOVOU -96(R8)(DI*1), X10 MOVOU -80(R8)(DI*1), X11 MOVOU -64(R8)(DI*1), X12 MOVOU -48(R8)(DI*1), X13 MOVOU -32(R8)(DI*1), X14 MOVOU -16(R8)(DI*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(DI*1) MOVOU X9, -112(AX)(DI*1) MOVOU X10, -96(AX)(DI*1) MOVOU X11, -80(AX)(DI*1) MOVOU X12, -64(AX)(DI*1) MOVOU X13, -48(AX)(DI*1) MOVOU X14, -32(AX)(DI*1) MOVOU X15, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_256through2048: LEAQ -256(DI), DI MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU 64(R8), X4 MOVOU 80(R8), X5 MOVOU 96(R8), X6 MOVOU 112(R8), X7 MOVOU 128(R8), X8 MOVOU 144(R8), X9 MOVOU 160(R8), X10 MOVOU 176(R8), X11 MOVOU 192(R8), X12 MOVOU 208(R8), X13 MOVOU 224(R8), X14 MOVOU 240(R8), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ DI, $0x00000100 LEAQ 256(R8), R8 LEAQ 256(AX), AX JGE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_256through2048 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_tail memmove_end_copy_repeat_emit_encodeSnappyBlockAsm: MOVQ BP, AX emit_literal_done_repeat_emit_encodeSnappyBlockAsm: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 LEAQ (DX)(BP*1), BP XORL R10, R10 CMPL DI, $0x08 JL matchlen_single_repeat_extend matchlen_loopback_repeat_extend: MOVQ (R8)(R10*1), R9 XORQ (BP)(R10*1), R9 TESTQ R9, R9 JZ matchlen_loop_repeat_extend BSFQ R9, R9 SARQ $0x03, R9 LEAL (R10)(R9*1), R10 JMP repeat_extend_forward_end_encodeSnappyBlockAsm matchlen_loop_repeat_extend: LEAL -8(DI), DI LEAL 8(R10), R10 CMPL DI, $0x08 JGE matchlen_loopback_repeat_extend matchlen_single_repeat_extend: TESTL DI, DI JZ repeat_extend_forward_end_encodeSnappyBlockAsm matchlen_single_loopback_repeat_extend: MOVB (R8)(R10*1), R9 CMPB (BP)(R10*1), R9 JNE repeat_extend_forward_end_encodeSnappyBlockAsm LEAL 1(R10), R10 DECL DI JNZ matchlen_single_loopback_repeat_extend repeat_extend_forward_end_encodeSnappyBlockAsm: ADDL R10, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI CMPL SI, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm: CMPL BP, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm MOVB $0xff, (AX) MOVL SI, 1(AX) LEAL -64(BP), BP ADDQ $0x05, AX CMPL BP, $0x04 JL four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm: TESTL BP, BP JZ repeat_end_emit_encodeSnappyBlockAsm MOVB $0x03, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVL SI, 1(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeSnappyBlockAsm two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeSnappyBlockAsm emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeSnappyBlockAsm: MOVL CX, 12(SP) CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm JMP search_loop_encodeSnappyBlockAsm no_repeat_found_encodeSnappyBlockAsm: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeSnappyBlockAsm SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeSnappyBlockAsm MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeSnappyBlockAsm MOVL 20(SP), CX JMP search_loop_encodeSnappyBlockAsm candidate3_match_encodeSnappyBlockAsm: ADDL $0x02, CX JMP candidate_match_encodeSnappyBlockAsm candidate2_match_encodeSnappyBlockAsm: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeSnappyBlockAsm: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeSnappyBlockAsm match_extend_back_loop_encodeSnappyBlockAsm: CMPL CX, SI JLE match_extend_back_end_encodeSnappyBlockAsm MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeSnappyBlockAsm LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeSnappyBlockAsm JMP match_extend_back_loop_encodeSnappyBlockAsm match_extend_back_end_encodeSnappyBlockAsm: MOVL CX, SI SUBL 12(SP), SI LEAQ 4(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeSnappyBlockAsm MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeSnappyBlockAsm: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 MOVL R8, DI SUBL $0x01, DI JC emit_literal_done_match_emit_encodeSnappyBlockAsm CMPL DI, $0x3c JLT one_byte_match_emit_encodeSnappyBlockAsm CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeSnappyBlockAsm CMPL DI, $0x00010000 JLT three_bytes_match_emit_encodeSnappyBlockAsm CMPL DI, $0x01000000 JLT four_bytes_match_emit_encodeSnappyBlockAsm MOVB $0xfc, (AX) MOVL DI, 1(AX) ADDQ $0x05, AX JMP memmove_match_emit_encodeSnappyBlockAsm four_bytes_match_emit_encodeSnappyBlockAsm: MOVL DI, R9 SHRL $0x10, R9 MOVB $0xf8, (AX) MOVW DI, 1(AX) MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_match_emit_encodeSnappyBlockAsm three_bytes_match_emit_encodeSnappyBlockAsm: MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_match_emit_encodeSnappyBlockAsm two_bytes_match_emit_encodeSnappyBlockAsm: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX JMP memmove_match_emit_encodeSnappyBlockAsm one_byte_match_emit_encodeSnappyBlockAsm: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeSnappyBlockAsm: LEAQ (AX)(R8*1), DI NOP emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_tail: TESTQ R8, R8 JEQ memmove_end_copy_match_emit_encodeSnappyBlockAsm CMPQ R8, $0x02 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_1or2 CMPQ R8, $0x04 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_3 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_4 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_5through7 JE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_9through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32 CMPQ R8, $0x40 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64 CMPQ R8, $0x80 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_65through128 CMPQ R8, $0x00000100 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_129through256 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_256through2048 emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), SI MOVB R9, (AX) MOVB SI, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_4: MOVL (SI), R9 MOVL R9, (AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), SI MOVW R9, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_5through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), SI MOVL R9, (AX) MOVL SI, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8: MOVQ (SI), R9 MOVQ R9, (AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_9through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), SI MOVQ R9, (AX) MOVQ SI, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_65through128: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_129through256: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU -128(SI)(R8*1), X8 MOVOU -112(SI)(R8*1), X9 MOVOU -96(SI)(R8*1), X10 MOVOU -80(SI)(R8*1), X11 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(R8*1) MOVOU X9, -112(AX)(R8*1) MOVOU X10, -96(AX)(R8*1) MOVOU X11, -80(AX)(R8*1) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_256through2048: LEAQ -256(R8), R8 MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU 128(SI), X8 MOVOU 144(SI), X9 MOVOU 160(SI), X10 MOVOU 176(SI), X11 MOVOU 192(SI), X12 MOVOU 208(SI), X13 MOVOU 224(SI), X14 MOVOU 240(SI), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ R8, $0x00000100 LEAQ 256(SI), SI LEAQ 256(AX), AX JGE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_256through2048 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_tail memmove_end_copy_match_emit_encodeSnappyBlockAsm: MOVQ DI, AX emit_literal_done_match_emit_encodeSnappyBlockAsm: match_nolit_loop_encodeSnappyBlockAsm: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeSnappyBlockAsm matchlen_loopback_match_nolit_encodeSnappyBlockAsm: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeSnappyBlockAsm matchlen_loop_match_nolit_encodeSnappyBlockAsm: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm matchlen_single_match_nolit_encodeSnappyBlockAsm: TESTL SI, SI JZ match_nolit_end_encodeSnappyBlockAsm matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeSnappyBlockAsm LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm match_nolit_end_encodeSnappyBlockAsm: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 CMPL BP, $0x00010000 JL two_byte_offset_match_nolit_encodeSnappyBlockAsm four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm: CMPL R9, $0x40 JLE four_bytes_remain_match_nolit_encodeSnappyBlockAsm MOVB $0xff, (AX) MOVL BP, 1(AX) LEAL -64(R9), R9 ADDQ $0x05, AX CMPL R9, $0x04 JL four_bytes_remain_match_nolit_encodeSnappyBlockAsm JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm four_bytes_remain_match_nolit_encodeSnappyBlockAsm: TESTL R9, R9 JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm MOVB $0x03, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm two_byte_offset_match_nolit_encodeSnappyBlockAsm: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm two_byte_offset_short_match_nolit_encodeSnappyBlockAsm: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm emit_copy_three_match_nolit_encodeSnappyBlockAsm: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBlockAsm: MOVL CX, 12(SP) CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm CMPQ AX, (SP) JL match_nolit_dst_ok_encodeSnappyBlockAsm MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm: MOVQ -2(DX)(CX*1), SI MOVQ $0x0000cf1bbcdcbf9b, BP MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, R8 SHLQ $0x10, DI IMULQ BP, DI SHRQ $0x32, DI SHLQ $0x10, R8 IMULQ BP, R8 SHRQ $0x32, R8 LEAL -2(CX), R9 MOVL 24(SP)(R8*4), BP MOVL R9, 24(SP)(DI*4) MOVL CX, 24(SP)(R8*4) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeSnappyBlockAsm INCL CX JMP search_loop_encodeSnappyBlockAsm emit_remainder_encodeSnappyBlockAsm: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 4(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeSnappyBlockAsm MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeSnappyBlockAsm: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP MOVL BP, DX SUBL $0x01, DX JC emit_literal_done_emit_remainder_encodeSnappyBlockAsm CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeSnappyBlockAsm CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeSnappyBlockAsm CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeSnappyBlockAsm CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeSnappyBlockAsm MOVB $0xfc, (AX) MOVL DX, 1(AX) ADDQ $0x05, AX JMP memmove_emit_remainder_encodeSnappyBlockAsm four_bytes_emit_remainder_encodeSnappyBlockAsm: MOVL DX, BX SHRL $0x10, BX MOVB $0xf8, (AX) MOVW DX, 1(AX) MOVB BL, 3(AX) ADDQ $0x04, AX JMP memmove_emit_remainder_encodeSnappyBlockAsm three_bytes_emit_remainder_encodeSnappyBlockAsm: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_emit_remainder_encodeSnappyBlockAsm two_bytes_emit_remainder_encodeSnappyBlockAsm: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX JMP memmove_emit_remainder_encodeSnappyBlockAsm one_byte_emit_remainder_encodeSnappyBlockAsm: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeSnappyBlockAsm: LEAQ (AX)(BP*1), DX MOVL BP, BX NOP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_tail: TESTQ BX, BX JEQ memmove_end_copy_emit_remainder_encodeSnappyBlockAsm CMPQ BX, $0x02 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_1or2 CMPQ BX, $0x04 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_3 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_5through7 JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_9through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32 CMPQ BX, $0x40 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64 CMPQ BX, $0x80 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_65through128 CMPQ BX, $0x00000100 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_129through256 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_256through2048 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_4: MOVL (CX), BP MOVL BP, (AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_5through7: MOVL (CX), BP MOVL -4(CX)(BX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8: MOVQ (CX), BP MOVQ BP, (AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_9through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_65through128: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU -64(CX)(BX*1), X12 MOVOU -48(CX)(BX*1), X13 MOVOU -32(CX)(BX*1), X14 MOVOU -16(CX)(BX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(BX*1) MOVOU X13, -48(AX)(BX*1) MOVOU X14, -32(AX)(BX*1) MOVOU X15, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_129through256: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU -128(CX)(BX*1), X8 MOVOU -112(CX)(BX*1), X9 MOVOU -96(CX)(BX*1), X10 MOVOU -80(CX)(BX*1), X11 MOVOU -64(CX)(BX*1), X12 MOVOU -48(CX)(BX*1), X13 MOVOU -32(CX)(BX*1), X14 MOVOU -16(CX)(BX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(BX*1) MOVOU X9, -112(AX)(BX*1) MOVOU X10, -96(AX)(BX*1) MOVOU X11, -80(AX)(BX*1) MOVOU X12, -64(AX)(BX*1) MOVOU X13, -48(AX)(BX*1) MOVOU X14, -32(AX)(BX*1) MOVOU X15, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_256through2048: LEAQ -256(BX), BX MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU 128(CX), X8 MOVOU 144(CX), X9 MOVOU 160(CX), X10 MOVOU 176(CX), X11 MOVOU 192(CX), X12 MOVOU 208(CX), X13 MOVOU 224(CX), X14 MOVOU 240(CX), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ BX, $0x00000100 LEAQ 256(CX), CX LEAQ 256(AX), AX JGE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_256through2048 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_tail memmove_end_copy_emit_remainder_encodeSnappyBlockAsm: MOVQ DX, AX emit_literal_done_emit_remainder_encodeSnappyBlockAsm: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeSnappyBlockAsm12B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeSnappyBlockAsm12B(SB), $16408-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000080, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm12B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeSnappyBlockAsm12B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeSnappyBlockAsm12B: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x05, BP LEAL 4(CX)(BP*1), BP MOVL 8(SP), DI CMPL BP, DI JGT emit_remainder_encodeSnappyBlockAsm12B MOVL BP, 20(SP) MOVQ $0x000000cf1bbcdcbb, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x18, R9 IMULQ R8, R9 SHRQ $0x34, R9 SHLQ $0x18, R10 IMULQ R8, R10 SHRQ $0x34, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x18, R9 IMULQ R8, R9 SHRQ $0x34, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeSnappyBlockAsm12B LEAL 1(CX), SI MOVL 12(SP), BP MOVL SI, DI SUBL 16(SP), DI JZ repeat_extend_back_end_encodeSnappyBlockAsm12B repeat_extend_back_loop_encodeSnappyBlockAsm12B: CMPL SI, BP JLE repeat_extend_back_end_encodeSnappyBlockAsm12B MOVB -1(DX)(DI*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeSnappyBlockAsm12B LEAL -1(SI), SI DECL DI JNZ repeat_extend_back_loop_encodeSnappyBlockAsm12B repeat_extend_back_end_encodeSnappyBlockAsm12B: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B MOVL SI, DI MOVL SI, 12(SP) LEAQ (DX)(BP*1), R8 SUBL BP, DI MOVL DI, BP SUBL $0x01, BP JC emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeSnappyBlockAsm12B CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeSnappyBlockAsm12B CMPL BP, $0x00010000 JLT three_bytes_repeat_emit_encodeSnappyBlockAsm12B CMPL BP, $0x01000000 JLT four_bytes_repeat_emit_encodeSnappyBlockAsm12B MOVB $0xfc, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP memmove_repeat_emit_encodeSnappyBlockAsm12B four_bytes_repeat_emit_encodeSnappyBlockAsm12B: MOVL BP, R9 SHRL $0x10, R9 MOVB $0xf8, (AX) MOVW BP, 1(AX) MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_repeat_emit_encodeSnappyBlockAsm12B three_bytes_repeat_emit_encodeSnappyBlockAsm12B: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_repeat_emit_encodeSnappyBlockAsm12B two_bytes_repeat_emit_encodeSnappyBlockAsm12B: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX JMP memmove_repeat_emit_encodeSnappyBlockAsm12B one_byte_repeat_emit_encodeSnappyBlockAsm12B: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeSnappyBlockAsm12B: LEAQ (AX)(DI*1), BP NOP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_tail: TESTQ DI, DI JEQ memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B CMPQ DI, $0x02 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_1or2 CMPQ DI, $0x04 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_3 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_4 CMPQ DI, $0x08 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_5through7 JE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8 CMPQ DI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_9through16 CMPQ DI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32 CMPQ DI, $0x40 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64 CMPQ DI, $0x80 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_65through128 CMPQ DI, $0x00000100 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_129through256 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_256through2048 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_1or2: MOVB (R8), R9 MOVB -1(R8)(DI*1), R8 MOVB R9, (AX) MOVB R8, -1(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_4: MOVL (R8), R9 MOVL R9, (AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_3: MOVW (R8), R9 MOVB 2(R8), R8 MOVW R9, (AX) MOVB R8, 2(AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_5through7: MOVL (R8), R9 MOVL -4(R8)(DI*1), R8 MOVL R9, (AX) MOVL R8, -4(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8: MOVQ (R8), R9 MOVQ R9, (AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_9through16: MOVQ (R8), R9 MOVQ -8(R8)(DI*1), R8 MOVQ R9, (AX) MOVQ R8, -8(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32: MOVOU (R8), X0 MOVOU -16(R8)(DI*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU -32(R8)(DI*1), X2 MOVOU -16(R8)(DI*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DI*1) MOVOU X3, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_65through128: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU -64(R8)(DI*1), X12 MOVOU -48(R8)(DI*1), X13 MOVOU -32(R8)(DI*1), X14 MOVOU -16(R8)(DI*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(DI*1) MOVOU X13, -48(AX)(DI*1) MOVOU X14, -32(AX)(DI*1) MOVOU X15, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_129through256: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU 64(R8), X4 MOVOU 80(R8), X5 MOVOU 96(R8), X6 MOVOU 112(R8), X7 MOVOU -128(R8)(DI*1), X8 MOVOU -112(R8)(DI*1), X9 MOVOU -96(R8)(DI*1), X10 MOVOU -80(R8)(DI*1), X11 MOVOU -64(R8)(DI*1), X12 MOVOU -48(R8)(DI*1), X13 MOVOU -32(R8)(DI*1), X14 MOVOU -16(R8)(DI*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(DI*1) MOVOU X9, -112(AX)(DI*1) MOVOU X10, -96(AX)(DI*1) MOVOU X11, -80(AX)(DI*1) MOVOU X12, -64(AX)(DI*1) MOVOU X13, -48(AX)(DI*1) MOVOU X14, -32(AX)(DI*1) MOVOU X15, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_256through2048: LEAQ -256(DI), DI MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU 64(R8), X4 MOVOU 80(R8), X5 MOVOU 96(R8), X6 MOVOU 112(R8), X7 MOVOU 128(R8), X8 MOVOU 144(R8), X9 MOVOU 160(R8), X10 MOVOU 176(R8), X11 MOVOU 192(R8), X12 MOVOU 208(R8), X13 MOVOU 224(R8), X14 MOVOU 240(R8), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ DI, $0x00000100 LEAQ 256(R8), R8 LEAQ 256(AX), AX JGE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_256through2048 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_tail memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B: MOVQ BP, AX emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 LEAQ (DX)(BP*1), BP XORL R10, R10 CMPL DI, $0x08 JL matchlen_single_repeat_extend matchlen_loopback_repeat_extend: MOVQ (R8)(R10*1), R9 XORQ (BP)(R10*1), R9 TESTQ R9, R9 JZ matchlen_loop_repeat_extend BSFQ R9, R9 SARQ $0x03, R9 LEAL (R10)(R9*1), R10 JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B matchlen_loop_repeat_extend: LEAL -8(DI), DI LEAL 8(R10), R10 CMPL DI, $0x08 JGE matchlen_loopback_repeat_extend matchlen_single_repeat_extend: TESTL DI, DI JZ repeat_extend_forward_end_encodeSnappyBlockAsm12B matchlen_single_loopback_repeat_extend: MOVB (R8)(R10*1), R9 CMPB (BP)(R10*1), R9 JNE repeat_extend_forward_end_encodeSnappyBlockAsm12B LEAL 1(R10), R10 DECL DI JNZ matchlen_single_loopback_repeat_extend repeat_extend_forward_end_encodeSnappyBlockAsm12B: ADDL R10, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI CMPL SI, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm12B: CMPL BP, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm12B MOVB $0xff, (AX) MOVL SI, 1(AX) LEAL -64(BP), BP ADDQ $0x05, AX CMPL BP, $0x04 JL four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm12B JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm12B four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm12B: TESTL BP, BP JZ repeat_end_emit_encodeSnappyBlockAsm12B MOVB $0x03, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVL SI, 1(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeSnappyBlockAsm12B two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeSnappyBlockAsm12B emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeSnappyBlockAsm12B: MOVL CX, 12(SP) CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm12B JMP search_loop_encodeSnappyBlockAsm12B no_repeat_found_encodeSnappyBlockAsm12B: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeSnappyBlockAsm12B SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeSnappyBlockAsm12B MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeSnappyBlockAsm12B MOVL 20(SP), CX JMP search_loop_encodeSnappyBlockAsm12B candidate3_match_encodeSnappyBlockAsm12B: ADDL $0x02, CX JMP candidate_match_encodeSnappyBlockAsm12B candidate2_match_encodeSnappyBlockAsm12B: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeSnappyBlockAsm12B: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeSnappyBlockAsm12B match_extend_back_loop_encodeSnappyBlockAsm12B: CMPL CX, SI JLE match_extend_back_end_encodeSnappyBlockAsm12B MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeSnappyBlockAsm12B LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeSnappyBlockAsm12B JMP match_extend_back_loop_encodeSnappyBlockAsm12B match_extend_back_end_encodeSnappyBlockAsm12B: MOVL CX, SI SUBL 12(SP), SI LEAQ 4(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeSnappyBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeSnappyBlockAsm12B: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm12B MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 MOVL R8, DI SUBL $0x01, DI JC emit_literal_done_match_emit_encodeSnappyBlockAsm12B CMPL DI, $0x3c JLT one_byte_match_emit_encodeSnappyBlockAsm12B CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeSnappyBlockAsm12B CMPL DI, $0x00010000 JLT three_bytes_match_emit_encodeSnappyBlockAsm12B CMPL DI, $0x01000000 JLT four_bytes_match_emit_encodeSnappyBlockAsm12B MOVB $0xfc, (AX) MOVL DI, 1(AX) ADDQ $0x05, AX JMP memmove_match_emit_encodeSnappyBlockAsm12B four_bytes_match_emit_encodeSnappyBlockAsm12B: MOVL DI, R9 SHRL $0x10, R9 MOVB $0xf8, (AX) MOVW DI, 1(AX) MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_match_emit_encodeSnappyBlockAsm12B three_bytes_match_emit_encodeSnappyBlockAsm12B: MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_match_emit_encodeSnappyBlockAsm12B two_bytes_match_emit_encodeSnappyBlockAsm12B: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX JMP memmove_match_emit_encodeSnappyBlockAsm12B one_byte_match_emit_encodeSnappyBlockAsm12B: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeSnappyBlockAsm12B: LEAQ (AX)(R8*1), DI NOP emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_tail: TESTQ R8, R8 JEQ memmove_end_copy_match_emit_encodeSnappyBlockAsm12B CMPQ R8, $0x02 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_1or2 CMPQ R8, $0x04 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_3 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_4 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_5through7 JE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_9through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32 CMPQ R8, $0x40 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64 CMPQ R8, $0x80 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_65through128 CMPQ R8, $0x00000100 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_129through256 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_256through2048 emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), SI MOVB R9, (AX) MOVB SI, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_4: MOVL (SI), R9 MOVL R9, (AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), SI MOVW R9, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_5through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), SI MOVL R9, (AX) MOVL SI, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8: MOVQ (SI), R9 MOVQ R9, (AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_9through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), SI MOVQ R9, (AX) MOVQ SI, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_65through128: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_129through256: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU -128(SI)(R8*1), X8 MOVOU -112(SI)(R8*1), X9 MOVOU -96(SI)(R8*1), X10 MOVOU -80(SI)(R8*1), X11 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(R8*1) MOVOU X9, -112(AX)(R8*1) MOVOU X10, -96(AX)(R8*1) MOVOU X11, -80(AX)(R8*1) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_256through2048: LEAQ -256(R8), R8 MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU 128(SI), X8 MOVOU 144(SI), X9 MOVOU 160(SI), X10 MOVOU 176(SI), X11 MOVOU 192(SI), X12 MOVOU 208(SI), X13 MOVOU 224(SI), X14 MOVOU 240(SI), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ R8, $0x00000100 LEAQ 256(SI), SI LEAQ 256(AX), AX JGE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_256through2048 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_tail memmove_end_copy_match_emit_encodeSnappyBlockAsm12B: MOVQ DI, AX emit_literal_done_match_emit_encodeSnappyBlockAsm12B: match_nolit_loop_encodeSnappyBlockAsm12B: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeSnappyBlockAsm12B matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm12B BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeSnappyBlockAsm12B matchlen_loop_match_nolit_encodeSnappyBlockAsm12B: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B matchlen_single_match_nolit_encodeSnappyBlockAsm12B: TESTL SI, SI JZ match_nolit_end_encodeSnappyBlockAsm12B matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm12B: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeSnappyBlockAsm12B LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm12B match_nolit_end_encodeSnappyBlockAsm12B: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 CMPL BP, $0x00010000 JL two_byte_offset_match_nolit_encodeSnappyBlockAsm12B four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm12B: CMPL R9, $0x40 JLE four_bytes_remain_match_nolit_encodeSnappyBlockAsm12B MOVB $0xff, (AX) MOVL BP, 1(AX) LEAL -64(R9), R9 ADDQ $0x05, AX CMPL R9, $0x04 JL four_bytes_remain_match_nolit_encodeSnappyBlockAsm12B JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm12B four_bytes_remain_match_nolit_encodeSnappyBlockAsm12B: TESTL R9, R9 JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm12B MOVB $0x03, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm12B two_byte_offset_match_nolit_encodeSnappyBlockAsm12B: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm12B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm12B emit_copy_three_match_nolit_encodeSnappyBlockAsm12B: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBlockAsm12B: MOVL CX, 12(SP) CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm12B CMPQ AX, (SP) JL match_nolit_dst_ok_encodeSnappyBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm12B: MOVQ -2(DX)(CX*1), SI MOVQ $0x000000cf1bbcdcbb, BP MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, R8 SHLQ $0x18, DI IMULQ BP, DI SHRQ $0x34, DI SHLQ $0x18, R8 IMULQ BP, R8 SHRQ $0x34, R8 LEAL -2(CX), R9 MOVL 24(SP)(R8*4), BP MOVL R9, 24(SP)(DI*4) MOVL CX, 24(SP)(R8*4) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeSnappyBlockAsm12B INCL CX JMP search_loop_encodeSnappyBlockAsm12B emit_remainder_encodeSnappyBlockAsm12B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 4(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeSnappyBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeSnappyBlockAsm12B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP MOVL BP, DX SUBL $0x01, DX JC emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeSnappyBlockAsm12B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeSnappyBlockAsm12B CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeSnappyBlockAsm12B CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeSnappyBlockAsm12B MOVB $0xfc, (AX) MOVL DX, 1(AX) ADDQ $0x05, AX JMP memmove_emit_remainder_encodeSnappyBlockAsm12B four_bytes_emit_remainder_encodeSnappyBlockAsm12B: MOVL DX, BX SHRL $0x10, BX MOVB $0xf8, (AX) MOVW DX, 1(AX) MOVB BL, 3(AX) ADDQ $0x04, AX JMP memmove_emit_remainder_encodeSnappyBlockAsm12B three_bytes_emit_remainder_encodeSnappyBlockAsm12B: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_emit_remainder_encodeSnappyBlockAsm12B two_bytes_emit_remainder_encodeSnappyBlockAsm12B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX JMP memmove_emit_remainder_encodeSnappyBlockAsm12B one_byte_emit_remainder_encodeSnappyBlockAsm12B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeSnappyBlockAsm12B: LEAQ (AX)(BP*1), DX MOVL BP, BX NOP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_tail: TESTQ BX, BX JEQ memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B CMPQ BX, $0x02 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_1or2 CMPQ BX, $0x04 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_3 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_5through7 JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_9through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32 CMPQ BX, $0x40 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64 CMPQ BX, $0x80 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_65through128 CMPQ BX, $0x00000100 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_129through256 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_256through2048 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_4: MOVL (CX), BP MOVL BP, (AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_5through7: MOVL (CX), BP MOVL -4(CX)(BX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8: MOVQ (CX), BP MOVQ BP, (AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_9through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_65through128: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU -64(CX)(BX*1), X12 MOVOU -48(CX)(BX*1), X13 MOVOU -32(CX)(BX*1), X14 MOVOU -16(CX)(BX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(BX*1) MOVOU X13, -48(AX)(BX*1) MOVOU X14, -32(AX)(BX*1) MOVOU X15, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_129through256: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU -128(CX)(BX*1), X8 MOVOU -112(CX)(BX*1), X9 MOVOU -96(CX)(BX*1), X10 MOVOU -80(CX)(BX*1), X11 MOVOU -64(CX)(BX*1), X12 MOVOU -48(CX)(BX*1), X13 MOVOU -32(CX)(BX*1), X14 MOVOU -16(CX)(BX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(BX*1) MOVOU X9, -112(AX)(BX*1) MOVOU X10, -96(AX)(BX*1) MOVOU X11, -80(AX)(BX*1) MOVOU X12, -64(AX)(BX*1) MOVOU X13, -48(AX)(BX*1) MOVOU X14, -32(AX)(BX*1) MOVOU X15, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_256through2048: LEAQ -256(BX), BX MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU 128(CX), X8 MOVOU 144(CX), X9 MOVOU 160(CX), X10 MOVOU 176(CX), X11 MOVOU 192(CX), X12 MOVOU 208(CX), X13 MOVOU 224(CX), X14 MOVOU 240(CX), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ BX, $0x00000100 LEAQ 256(CX), CX LEAQ 256(AX), AX JGE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_256through2048 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_tail memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B: MOVQ DX, AX emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeSnappyBlockAsm10B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeSnappyBlockAsm10B(SB), $4120-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000020, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm10B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeSnappyBlockAsm10B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeSnappyBlockAsm10B: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x05, BP LEAL 4(CX)(BP*1), BP MOVL 8(SP), DI CMPL BP, DI JGT emit_remainder_encodeSnappyBlockAsm10B MOVL BP, 20(SP) MOVQ $0x000000cf1bbcdcbb, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x18, R9 IMULQ R8, R9 SHRQ $0x36, R9 SHLQ $0x18, R10 IMULQ R8, R10 SHRQ $0x36, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x18, R9 IMULQ R8, R9 SHRQ $0x36, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeSnappyBlockAsm10B LEAL 1(CX), SI MOVL 12(SP), BP MOVL SI, DI SUBL 16(SP), DI JZ repeat_extend_back_end_encodeSnappyBlockAsm10B repeat_extend_back_loop_encodeSnappyBlockAsm10B: CMPL SI, BP JLE repeat_extend_back_end_encodeSnappyBlockAsm10B MOVB -1(DX)(DI*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeSnappyBlockAsm10B LEAL -1(SI), SI DECL DI JNZ repeat_extend_back_loop_encodeSnappyBlockAsm10B repeat_extend_back_end_encodeSnappyBlockAsm10B: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B MOVL SI, DI MOVL SI, 12(SP) LEAQ (DX)(BP*1), R8 SUBL BP, DI MOVL DI, BP SUBL $0x01, BP JC emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeSnappyBlockAsm10B CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeSnappyBlockAsm10B CMPL BP, $0x00010000 JLT three_bytes_repeat_emit_encodeSnappyBlockAsm10B CMPL BP, $0x01000000 JLT four_bytes_repeat_emit_encodeSnappyBlockAsm10B MOVB $0xfc, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP memmove_repeat_emit_encodeSnappyBlockAsm10B four_bytes_repeat_emit_encodeSnappyBlockAsm10B: MOVL BP, R9 SHRL $0x10, R9 MOVB $0xf8, (AX) MOVW BP, 1(AX) MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_repeat_emit_encodeSnappyBlockAsm10B three_bytes_repeat_emit_encodeSnappyBlockAsm10B: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_repeat_emit_encodeSnappyBlockAsm10B two_bytes_repeat_emit_encodeSnappyBlockAsm10B: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX JMP memmove_repeat_emit_encodeSnappyBlockAsm10B one_byte_repeat_emit_encodeSnappyBlockAsm10B: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeSnappyBlockAsm10B: LEAQ (AX)(DI*1), BP NOP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_tail: TESTQ DI, DI JEQ memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B CMPQ DI, $0x02 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_1or2 CMPQ DI, $0x04 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_3 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_4 CMPQ DI, $0x08 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_5through7 JE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8 CMPQ DI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_9through16 CMPQ DI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32 CMPQ DI, $0x40 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64 CMPQ DI, $0x80 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_65through128 CMPQ DI, $0x00000100 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_129through256 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_256through2048 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_1or2: MOVB (R8), R9 MOVB -1(R8)(DI*1), R8 MOVB R9, (AX) MOVB R8, -1(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_4: MOVL (R8), R9 MOVL R9, (AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_3: MOVW (R8), R9 MOVB 2(R8), R8 MOVW R9, (AX) MOVB R8, 2(AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_5through7: MOVL (R8), R9 MOVL -4(R8)(DI*1), R8 MOVL R9, (AX) MOVL R8, -4(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8: MOVQ (R8), R9 MOVQ R9, (AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_9through16: MOVQ (R8), R9 MOVQ -8(R8)(DI*1), R8 MOVQ R9, (AX) MOVQ R8, -8(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32: MOVOU (R8), X0 MOVOU -16(R8)(DI*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU -32(R8)(DI*1), X2 MOVOU -16(R8)(DI*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DI*1) MOVOU X3, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_65through128: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU -64(R8)(DI*1), X12 MOVOU -48(R8)(DI*1), X13 MOVOU -32(R8)(DI*1), X14 MOVOU -16(R8)(DI*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(DI*1) MOVOU X13, -48(AX)(DI*1) MOVOU X14, -32(AX)(DI*1) MOVOU X15, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_129through256: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU 64(R8), X4 MOVOU 80(R8), X5 MOVOU 96(R8), X6 MOVOU 112(R8), X7 MOVOU -128(R8)(DI*1), X8 MOVOU -112(R8)(DI*1), X9 MOVOU -96(R8)(DI*1), X10 MOVOU -80(R8)(DI*1), X11 MOVOU -64(R8)(DI*1), X12 MOVOU -48(R8)(DI*1), X13 MOVOU -32(R8)(DI*1), X14 MOVOU -16(R8)(DI*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(DI*1) MOVOU X9, -112(AX)(DI*1) MOVOU X10, -96(AX)(DI*1) MOVOU X11, -80(AX)(DI*1) MOVOU X12, -64(AX)(DI*1) MOVOU X13, -48(AX)(DI*1) MOVOU X14, -32(AX)(DI*1) MOVOU X15, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_256through2048: LEAQ -256(DI), DI MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU 64(R8), X4 MOVOU 80(R8), X5 MOVOU 96(R8), X6 MOVOU 112(R8), X7 MOVOU 128(R8), X8 MOVOU 144(R8), X9 MOVOU 160(R8), X10 MOVOU 176(R8), X11 MOVOU 192(R8), X12 MOVOU 208(R8), X13 MOVOU 224(R8), X14 MOVOU 240(R8), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ DI, $0x00000100 LEAQ 256(R8), R8 LEAQ 256(AX), AX JGE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_256through2048 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_tail memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B: MOVQ BP, AX emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 LEAQ (DX)(BP*1), BP XORL R10, R10 CMPL DI, $0x08 JL matchlen_single_repeat_extend matchlen_loopback_repeat_extend: MOVQ (R8)(R10*1), R9 XORQ (BP)(R10*1), R9 TESTQ R9, R9 JZ matchlen_loop_repeat_extend BSFQ R9, R9 SARQ $0x03, R9 LEAL (R10)(R9*1), R10 JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B matchlen_loop_repeat_extend: LEAL -8(DI), DI LEAL 8(R10), R10 CMPL DI, $0x08 JGE matchlen_loopback_repeat_extend matchlen_single_repeat_extend: TESTL DI, DI JZ repeat_extend_forward_end_encodeSnappyBlockAsm10B matchlen_single_loopback_repeat_extend: MOVB (R8)(R10*1), R9 CMPB (BP)(R10*1), R9 JNE repeat_extend_forward_end_encodeSnappyBlockAsm10B LEAL 1(R10), R10 DECL DI JNZ matchlen_single_loopback_repeat_extend repeat_extend_forward_end_encodeSnappyBlockAsm10B: ADDL R10, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI CMPL SI, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm10B: CMPL BP, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm10B MOVB $0xff, (AX) MOVL SI, 1(AX) LEAL -64(BP), BP ADDQ $0x05, AX CMPL BP, $0x04 JL four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm10B JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm10B four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm10B: TESTL BP, BP JZ repeat_end_emit_encodeSnappyBlockAsm10B MOVB $0x03, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVL SI, 1(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeSnappyBlockAsm10B two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeSnappyBlockAsm10B emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeSnappyBlockAsm10B: MOVL CX, 12(SP) CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm10B JMP search_loop_encodeSnappyBlockAsm10B no_repeat_found_encodeSnappyBlockAsm10B: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeSnappyBlockAsm10B SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeSnappyBlockAsm10B MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeSnappyBlockAsm10B MOVL 20(SP), CX JMP search_loop_encodeSnappyBlockAsm10B candidate3_match_encodeSnappyBlockAsm10B: ADDL $0x02, CX JMP candidate_match_encodeSnappyBlockAsm10B candidate2_match_encodeSnappyBlockAsm10B: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeSnappyBlockAsm10B: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeSnappyBlockAsm10B match_extend_back_loop_encodeSnappyBlockAsm10B: CMPL CX, SI JLE match_extend_back_end_encodeSnappyBlockAsm10B MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeSnappyBlockAsm10B LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeSnappyBlockAsm10B JMP match_extend_back_loop_encodeSnappyBlockAsm10B match_extend_back_end_encodeSnappyBlockAsm10B: MOVL CX, SI SUBL 12(SP), SI LEAQ 4(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeSnappyBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeSnappyBlockAsm10B: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm10B MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 MOVL R8, DI SUBL $0x01, DI JC emit_literal_done_match_emit_encodeSnappyBlockAsm10B CMPL DI, $0x3c JLT one_byte_match_emit_encodeSnappyBlockAsm10B CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeSnappyBlockAsm10B CMPL DI, $0x00010000 JLT three_bytes_match_emit_encodeSnappyBlockAsm10B CMPL DI, $0x01000000 JLT four_bytes_match_emit_encodeSnappyBlockAsm10B MOVB $0xfc, (AX) MOVL DI, 1(AX) ADDQ $0x05, AX JMP memmove_match_emit_encodeSnappyBlockAsm10B four_bytes_match_emit_encodeSnappyBlockAsm10B: MOVL DI, R9 SHRL $0x10, R9 MOVB $0xf8, (AX) MOVW DI, 1(AX) MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_match_emit_encodeSnappyBlockAsm10B three_bytes_match_emit_encodeSnappyBlockAsm10B: MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_match_emit_encodeSnappyBlockAsm10B two_bytes_match_emit_encodeSnappyBlockAsm10B: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX JMP memmove_match_emit_encodeSnappyBlockAsm10B one_byte_match_emit_encodeSnappyBlockAsm10B: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeSnappyBlockAsm10B: LEAQ (AX)(R8*1), DI NOP emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_tail: TESTQ R8, R8 JEQ memmove_end_copy_match_emit_encodeSnappyBlockAsm10B CMPQ R8, $0x02 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_1or2 CMPQ R8, $0x04 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_3 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_4 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_5through7 JE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_9through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32 CMPQ R8, $0x40 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64 CMPQ R8, $0x80 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_65through128 CMPQ R8, $0x00000100 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_129through256 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_256through2048 emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), SI MOVB R9, (AX) MOVB SI, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_4: MOVL (SI), R9 MOVL R9, (AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), SI MOVW R9, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_5through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), SI MOVL R9, (AX) MOVL SI, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8: MOVQ (SI), R9 MOVQ R9, (AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_9through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), SI MOVQ R9, (AX) MOVQ SI, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_65through128: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_129through256: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU -128(SI)(R8*1), X8 MOVOU -112(SI)(R8*1), X9 MOVOU -96(SI)(R8*1), X10 MOVOU -80(SI)(R8*1), X11 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(R8*1) MOVOU X9, -112(AX)(R8*1) MOVOU X10, -96(AX)(R8*1) MOVOU X11, -80(AX)(R8*1) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_256through2048: LEAQ -256(R8), R8 MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU 128(SI), X8 MOVOU 144(SI), X9 MOVOU 160(SI), X10 MOVOU 176(SI), X11 MOVOU 192(SI), X12 MOVOU 208(SI), X13 MOVOU 224(SI), X14 MOVOU 240(SI), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ R8, $0x00000100 LEAQ 256(SI), SI LEAQ 256(AX), AX JGE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_256through2048 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_tail memmove_end_copy_match_emit_encodeSnappyBlockAsm10B: MOVQ DI, AX emit_literal_done_match_emit_encodeSnappyBlockAsm10B: match_nolit_loop_encodeSnappyBlockAsm10B: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeSnappyBlockAsm10B matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm10B BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeSnappyBlockAsm10B matchlen_loop_match_nolit_encodeSnappyBlockAsm10B: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B matchlen_single_match_nolit_encodeSnappyBlockAsm10B: TESTL SI, SI JZ match_nolit_end_encodeSnappyBlockAsm10B matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm10B: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeSnappyBlockAsm10B LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm10B match_nolit_end_encodeSnappyBlockAsm10B: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 CMPL BP, $0x00010000 JL two_byte_offset_match_nolit_encodeSnappyBlockAsm10B four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm10B: CMPL R9, $0x40 JLE four_bytes_remain_match_nolit_encodeSnappyBlockAsm10B MOVB $0xff, (AX) MOVL BP, 1(AX) LEAL -64(R9), R9 ADDQ $0x05, AX CMPL R9, $0x04 JL four_bytes_remain_match_nolit_encodeSnappyBlockAsm10B JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm10B four_bytes_remain_match_nolit_encodeSnappyBlockAsm10B: TESTL R9, R9 JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm10B MOVB $0x03, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm10B two_byte_offset_match_nolit_encodeSnappyBlockAsm10B: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm10B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm10B emit_copy_three_match_nolit_encodeSnappyBlockAsm10B: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBlockAsm10B: MOVL CX, 12(SP) CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm10B CMPQ AX, (SP) JL match_nolit_dst_ok_encodeSnappyBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm10B: MOVQ -2(DX)(CX*1), SI MOVQ $0x000000cf1bbcdcbb, BP MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, R8 SHLQ $0x18, DI IMULQ BP, DI SHRQ $0x36, DI SHLQ $0x18, R8 IMULQ BP, R8 SHRQ $0x36, R8 LEAL -2(CX), R9 MOVL 24(SP)(R8*4), BP MOVL R9, 24(SP)(DI*4) MOVL CX, 24(SP)(R8*4) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeSnappyBlockAsm10B INCL CX JMP search_loop_encodeSnappyBlockAsm10B emit_remainder_encodeSnappyBlockAsm10B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 4(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeSnappyBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeSnappyBlockAsm10B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP MOVL BP, DX SUBL $0x01, DX JC emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeSnappyBlockAsm10B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeSnappyBlockAsm10B CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeSnappyBlockAsm10B CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeSnappyBlockAsm10B MOVB $0xfc, (AX) MOVL DX, 1(AX) ADDQ $0x05, AX JMP memmove_emit_remainder_encodeSnappyBlockAsm10B four_bytes_emit_remainder_encodeSnappyBlockAsm10B: MOVL DX, BX SHRL $0x10, BX MOVB $0xf8, (AX) MOVW DX, 1(AX) MOVB BL, 3(AX) ADDQ $0x04, AX JMP memmove_emit_remainder_encodeSnappyBlockAsm10B three_bytes_emit_remainder_encodeSnappyBlockAsm10B: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_emit_remainder_encodeSnappyBlockAsm10B two_bytes_emit_remainder_encodeSnappyBlockAsm10B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX JMP memmove_emit_remainder_encodeSnappyBlockAsm10B one_byte_emit_remainder_encodeSnappyBlockAsm10B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeSnappyBlockAsm10B: LEAQ (AX)(BP*1), DX MOVL BP, BX NOP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_tail: TESTQ BX, BX JEQ memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B CMPQ BX, $0x02 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_1or2 CMPQ BX, $0x04 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_3 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_5through7 JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_9through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32 CMPQ BX, $0x40 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64 CMPQ BX, $0x80 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_65through128 CMPQ BX, $0x00000100 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_129through256 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_256through2048 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_4: MOVL (CX), BP MOVL BP, (AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_5through7: MOVL (CX), BP MOVL -4(CX)(BX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8: MOVQ (CX), BP MOVQ BP, (AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_9through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_65through128: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU -64(CX)(BX*1), X12 MOVOU -48(CX)(BX*1), X13 MOVOU -32(CX)(BX*1), X14 MOVOU -16(CX)(BX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(BX*1) MOVOU X13, -48(AX)(BX*1) MOVOU X14, -32(AX)(BX*1) MOVOU X15, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_129through256: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU -128(CX)(BX*1), X8 MOVOU -112(CX)(BX*1), X9 MOVOU -96(CX)(BX*1), X10 MOVOU -80(CX)(BX*1), X11 MOVOU -64(CX)(BX*1), X12 MOVOU -48(CX)(BX*1), X13 MOVOU -32(CX)(BX*1), X14 MOVOU -16(CX)(BX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(BX*1) MOVOU X9, -112(AX)(BX*1) MOVOU X10, -96(AX)(BX*1) MOVOU X11, -80(AX)(BX*1) MOVOU X12, -64(AX)(BX*1) MOVOU X13, -48(AX)(BX*1) MOVOU X14, -32(AX)(BX*1) MOVOU X15, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_256through2048: LEAQ -256(BX), BX MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU 128(CX), X8 MOVOU 144(CX), X9 MOVOU 160(CX), X10 MOVOU 176(CX), X11 MOVOU 192(CX), X12 MOVOU 208(CX), X13 MOVOU 224(CX), X14 MOVOU 240(CX), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ BX, $0x00000100 LEAQ 256(CX), CX LEAQ 256(AX), AX JGE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_256through2048 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_tail memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B: MOVQ DX, AX emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeSnappyBlockAsm8B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeSnappyBlockAsm8B(SB), $1048-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000008, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm8B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeSnappyBlockAsm8B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeSnappyBlockAsm8B: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x04, BP LEAL 4(CX)(BP*1), BP MOVL 8(SP), DI CMPL BP, DI JGT emit_remainder_encodeSnappyBlockAsm8B MOVL BP, 20(SP) MOVQ $0x9e3779b1, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x20, R9 IMULQ R8, R9 SHRQ $0x38, R9 SHLQ $0x20, R10 IMULQ R8, R10 SHRQ $0x38, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x20, R9 IMULQ R8, R9 SHRQ $0x38, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeSnappyBlockAsm8B LEAL 1(CX), SI MOVL 12(SP), BP MOVL SI, DI SUBL 16(SP), DI JZ repeat_extend_back_end_encodeSnappyBlockAsm8B repeat_extend_back_loop_encodeSnappyBlockAsm8B: CMPL SI, BP JLE repeat_extend_back_end_encodeSnappyBlockAsm8B MOVB -1(DX)(DI*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeSnappyBlockAsm8B LEAL -1(SI), SI DECL DI JNZ repeat_extend_back_loop_encodeSnappyBlockAsm8B repeat_extend_back_end_encodeSnappyBlockAsm8B: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B MOVL SI, DI MOVL SI, 12(SP) LEAQ (DX)(BP*1), R8 SUBL BP, DI MOVL DI, BP SUBL $0x01, BP JC emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeSnappyBlockAsm8B CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeSnappyBlockAsm8B CMPL BP, $0x00010000 JLT three_bytes_repeat_emit_encodeSnappyBlockAsm8B CMPL BP, $0x01000000 JLT four_bytes_repeat_emit_encodeSnappyBlockAsm8B MOVB $0xfc, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP memmove_repeat_emit_encodeSnappyBlockAsm8B four_bytes_repeat_emit_encodeSnappyBlockAsm8B: MOVL BP, R9 SHRL $0x10, R9 MOVB $0xf8, (AX) MOVW BP, 1(AX) MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_repeat_emit_encodeSnappyBlockAsm8B three_bytes_repeat_emit_encodeSnappyBlockAsm8B: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_repeat_emit_encodeSnappyBlockAsm8B two_bytes_repeat_emit_encodeSnappyBlockAsm8B: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX JMP memmove_repeat_emit_encodeSnappyBlockAsm8B one_byte_repeat_emit_encodeSnappyBlockAsm8B: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeSnappyBlockAsm8B: LEAQ (AX)(DI*1), BP NOP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_tail: TESTQ DI, DI JEQ memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B CMPQ DI, $0x02 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_1or2 CMPQ DI, $0x04 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_3 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_4 CMPQ DI, $0x08 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_5through7 JE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8 CMPQ DI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_9through16 CMPQ DI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32 CMPQ DI, $0x40 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64 CMPQ DI, $0x80 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_65through128 CMPQ DI, $0x00000100 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_129through256 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_256through2048 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_1or2: MOVB (R8), R9 MOVB -1(R8)(DI*1), R8 MOVB R9, (AX) MOVB R8, -1(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_4: MOVL (R8), R9 MOVL R9, (AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_3: MOVW (R8), R9 MOVB 2(R8), R8 MOVW R9, (AX) MOVB R8, 2(AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_5through7: MOVL (R8), R9 MOVL -4(R8)(DI*1), R8 MOVL R9, (AX) MOVL R8, -4(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8: MOVQ (R8), R9 MOVQ R9, (AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_9through16: MOVQ (R8), R9 MOVQ -8(R8)(DI*1), R8 MOVQ R9, (AX) MOVQ R8, -8(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32: MOVOU (R8), X0 MOVOU -16(R8)(DI*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU -32(R8)(DI*1), X2 MOVOU -16(R8)(DI*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DI*1) MOVOU X3, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_65through128: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU -64(R8)(DI*1), X12 MOVOU -48(R8)(DI*1), X13 MOVOU -32(R8)(DI*1), X14 MOVOU -16(R8)(DI*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(DI*1) MOVOU X13, -48(AX)(DI*1) MOVOU X14, -32(AX)(DI*1) MOVOU X15, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_129through256: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU 64(R8), X4 MOVOU 80(R8), X5 MOVOU 96(R8), X6 MOVOU 112(R8), X7 MOVOU -128(R8)(DI*1), X8 MOVOU -112(R8)(DI*1), X9 MOVOU -96(R8)(DI*1), X10 MOVOU -80(R8)(DI*1), X11 MOVOU -64(R8)(DI*1), X12 MOVOU -48(R8)(DI*1), X13 MOVOU -32(R8)(DI*1), X14 MOVOU -16(R8)(DI*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(DI*1) MOVOU X9, -112(AX)(DI*1) MOVOU X10, -96(AX)(DI*1) MOVOU X11, -80(AX)(DI*1) MOVOU X12, -64(AX)(DI*1) MOVOU X13, -48(AX)(DI*1) MOVOU X14, -32(AX)(DI*1) MOVOU X15, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_256through2048: LEAQ -256(DI), DI MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU 64(R8), X4 MOVOU 80(R8), X5 MOVOU 96(R8), X6 MOVOU 112(R8), X7 MOVOU 128(R8), X8 MOVOU 144(R8), X9 MOVOU 160(R8), X10 MOVOU 176(R8), X11 MOVOU 192(R8), X12 MOVOU 208(R8), X13 MOVOU 224(R8), X14 MOVOU 240(R8), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ DI, $0x00000100 LEAQ 256(R8), R8 LEAQ 256(AX), AX JGE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_256through2048 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_tail memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B: MOVQ BP, AX emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 LEAQ (DX)(BP*1), BP XORL R10, R10 CMPL DI, $0x08 JL matchlen_single_repeat_extend matchlen_loopback_repeat_extend: MOVQ (R8)(R10*1), R9 XORQ (BP)(R10*1), R9 TESTQ R9, R9 JZ matchlen_loop_repeat_extend BSFQ R9, R9 SARQ $0x03, R9 LEAL (R10)(R9*1), R10 JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B matchlen_loop_repeat_extend: LEAL -8(DI), DI LEAL 8(R10), R10 CMPL DI, $0x08 JGE matchlen_loopback_repeat_extend matchlen_single_repeat_extend: TESTL DI, DI JZ repeat_extend_forward_end_encodeSnappyBlockAsm8B matchlen_single_loopback_repeat_extend: MOVB (R8)(R10*1), R9 CMPB (BP)(R10*1), R9 JNE repeat_extend_forward_end_encodeSnappyBlockAsm8B LEAL 1(R10), R10 DECL DI JNZ matchlen_single_loopback_repeat_extend repeat_extend_forward_end_encodeSnappyBlockAsm8B: ADDL R10, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI CMPL SI, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm8B: CMPL BP, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm8B MOVB $0xff, (AX) MOVL SI, 1(AX) LEAL -64(BP), BP ADDQ $0x05, AX CMPL BP, $0x04 JL four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm8B JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm8B four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm8B: TESTL BP, BP JZ repeat_end_emit_encodeSnappyBlockAsm8B MOVB $0x03, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVL SI, 1(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeSnappyBlockAsm8B two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeSnappyBlockAsm8B emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeSnappyBlockAsm8B: MOVL CX, 12(SP) CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm8B JMP search_loop_encodeSnappyBlockAsm8B no_repeat_found_encodeSnappyBlockAsm8B: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeSnappyBlockAsm8B SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeSnappyBlockAsm8B MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeSnappyBlockAsm8B MOVL 20(SP), CX JMP search_loop_encodeSnappyBlockAsm8B candidate3_match_encodeSnappyBlockAsm8B: ADDL $0x02, CX JMP candidate_match_encodeSnappyBlockAsm8B candidate2_match_encodeSnappyBlockAsm8B: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeSnappyBlockAsm8B: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeSnappyBlockAsm8B match_extend_back_loop_encodeSnappyBlockAsm8B: CMPL CX, SI JLE match_extend_back_end_encodeSnappyBlockAsm8B MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeSnappyBlockAsm8B LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeSnappyBlockAsm8B JMP match_extend_back_loop_encodeSnappyBlockAsm8B match_extend_back_end_encodeSnappyBlockAsm8B: MOVL CX, SI SUBL 12(SP), SI LEAQ 4(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeSnappyBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeSnappyBlockAsm8B: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm8B MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 MOVL R8, DI SUBL $0x01, DI JC emit_literal_done_match_emit_encodeSnappyBlockAsm8B CMPL DI, $0x3c JLT one_byte_match_emit_encodeSnappyBlockAsm8B CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeSnappyBlockAsm8B CMPL DI, $0x00010000 JLT three_bytes_match_emit_encodeSnappyBlockAsm8B CMPL DI, $0x01000000 JLT four_bytes_match_emit_encodeSnappyBlockAsm8B MOVB $0xfc, (AX) MOVL DI, 1(AX) ADDQ $0x05, AX JMP memmove_match_emit_encodeSnappyBlockAsm8B four_bytes_match_emit_encodeSnappyBlockAsm8B: MOVL DI, R9 SHRL $0x10, R9 MOVB $0xf8, (AX) MOVW DI, 1(AX) MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_match_emit_encodeSnappyBlockAsm8B three_bytes_match_emit_encodeSnappyBlockAsm8B: MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_match_emit_encodeSnappyBlockAsm8B two_bytes_match_emit_encodeSnappyBlockAsm8B: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX JMP memmove_match_emit_encodeSnappyBlockAsm8B one_byte_match_emit_encodeSnappyBlockAsm8B: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeSnappyBlockAsm8B: LEAQ (AX)(R8*1), DI NOP emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_tail: TESTQ R8, R8 JEQ memmove_end_copy_match_emit_encodeSnappyBlockAsm8B CMPQ R8, $0x02 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_1or2 CMPQ R8, $0x04 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_3 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_4 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_5through7 JE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_9through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32 CMPQ R8, $0x40 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64 CMPQ R8, $0x80 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_65through128 CMPQ R8, $0x00000100 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_129through256 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_256through2048 emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), SI MOVB R9, (AX) MOVB SI, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_4: MOVL (SI), R9 MOVL R9, (AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), SI MOVW R9, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_5through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), SI MOVL R9, (AX) MOVL SI, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8: MOVQ (SI), R9 MOVQ R9, (AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_9through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), SI MOVQ R9, (AX) MOVQ SI, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_65through128: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_129through256: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU -128(SI)(R8*1), X8 MOVOU -112(SI)(R8*1), X9 MOVOU -96(SI)(R8*1), X10 MOVOU -80(SI)(R8*1), X11 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(R8*1) MOVOU X9, -112(AX)(R8*1) MOVOU X10, -96(AX)(R8*1) MOVOU X11, -80(AX)(R8*1) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_256through2048: LEAQ -256(R8), R8 MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU 128(SI), X8 MOVOU 144(SI), X9 MOVOU 160(SI), X10 MOVOU 176(SI), X11 MOVOU 192(SI), X12 MOVOU 208(SI), X13 MOVOU 224(SI), X14 MOVOU 240(SI), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ R8, $0x00000100 LEAQ 256(SI), SI LEAQ 256(AX), AX JGE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_256through2048 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_tail memmove_end_copy_match_emit_encodeSnappyBlockAsm8B: MOVQ DI, AX emit_literal_done_match_emit_encodeSnappyBlockAsm8B: match_nolit_loop_encodeSnappyBlockAsm8B: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeSnappyBlockAsm8B matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm8B BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeSnappyBlockAsm8B matchlen_loop_match_nolit_encodeSnappyBlockAsm8B: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B matchlen_single_match_nolit_encodeSnappyBlockAsm8B: TESTL SI, SI JZ match_nolit_end_encodeSnappyBlockAsm8B matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm8B: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeSnappyBlockAsm8B LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm8B match_nolit_end_encodeSnappyBlockAsm8B: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 CMPL BP, $0x00010000 JL two_byte_offset_match_nolit_encodeSnappyBlockAsm8B four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm8B: CMPL R9, $0x40 JLE four_bytes_remain_match_nolit_encodeSnappyBlockAsm8B MOVB $0xff, (AX) MOVL BP, 1(AX) LEAL -64(R9), R9 ADDQ $0x05, AX CMPL R9, $0x04 JL four_bytes_remain_match_nolit_encodeSnappyBlockAsm8B JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm8B four_bytes_remain_match_nolit_encodeSnappyBlockAsm8B: TESTL R9, R9 JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm8B MOVB $0x03, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm8B two_byte_offset_match_nolit_encodeSnappyBlockAsm8B: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm8B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm8B CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm8B MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm8B emit_copy_three_match_nolit_encodeSnappyBlockAsm8B: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBlockAsm8B: MOVL CX, 12(SP) CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm8B CMPQ AX, (SP) JL match_nolit_dst_ok_encodeSnappyBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm8B: MOVQ -2(DX)(CX*1), SI MOVQ $0x9e3779b1, BP MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, R8 SHLQ $0x20, DI IMULQ BP, DI SHRQ $0x38, DI SHLQ $0x20, R8 IMULQ BP, R8 SHRQ $0x38, R8 LEAL -2(CX), R9 MOVL 24(SP)(R8*4), BP MOVL R9, 24(SP)(DI*4) MOVL CX, 24(SP)(R8*4) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeSnappyBlockAsm8B INCL CX JMP search_loop_encodeSnappyBlockAsm8B emit_remainder_encodeSnappyBlockAsm8B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 4(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeSnappyBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeSnappyBlockAsm8B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP MOVL BP, DX SUBL $0x01, DX JC emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeSnappyBlockAsm8B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeSnappyBlockAsm8B CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeSnappyBlockAsm8B CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeSnappyBlockAsm8B MOVB $0xfc, (AX) MOVL DX, 1(AX) ADDQ $0x05, AX JMP memmove_emit_remainder_encodeSnappyBlockAsm8B four_bytes_emit_remainder_encodeSnappyBlockAsm8B: MOVL DX, BX SHRL $0x10, BX MOVB $0xf8, (AX) MOVW DX, 1(AX) MOVB BL, 3(AX) ADDQ $0x04, AX JMP memmove_emit_remainder_encodeSnappyBlockAsm8B three_bytes_emit_remainder_encodeSnappyBlockAsm8B: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_emit_remainder_encodeSnappyBlockAsm8B two_bytes_emit_remainder_encodeSnappyBlockAsm8B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX JMP memmove_emit_remainder_encodeSnappyBlockAsm8B one_byte_emit_remainder_encodeSnappyBlockAsm8B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeSnappyBlockAsm8B: LEAQ (AX)(BP*1), DX MOVL BP, BX NOP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_tail: TESTQ BX, BX JEQ memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B CMPQ BX, $0x02 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_1or2 CMPQ BX, $0x04 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_3 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_5through7 JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_9through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32 CMPQ BX, $0x40 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64 CMPQ BX, $0x80 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_65through128 CMPQ BX, $0x00000100 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_129through256 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_256through2048 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_4: MOVL (CX), BP MOVL BP, (AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_5through7: MOVL (CX), BP MOVL -4(CX)(BX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8: MOVQ (CX), BP MOVQ BP, (AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_9through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_65through128: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU -64(CX)(BX*1), X12 MOVOU -48(CX)(BX*1), X13 MOVOU -32(CX)(BX*1), X14 MOVOU -16(CX)(BX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(BX*1) MOVOU X13, -48(AX)(BX*1) MOVOU X14, -32(AX)(BX*1) MOVOU X15, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_129through256: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU -128(CX)(BX*1), X8 MOVOU -112(CX)(BX*1), X9 MOVOU -96(CX)(BX*1), X10 MOVOU -80(CX)(BX*1), X11 MOVOU -64(CX)(BX*1), X12 MOVOU -48(CX)(BX*1), X13 MOVOU -32(CX)(BX*1), X14 MOVOU -16(CX)(BX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(BX*1) MOVOU X9, -112(AX)(BX*1) MOVOU X10, -96(AX)(BX*1) MOVOU X11, -80(AX)(BX*1) MOVOU X12, -64(AX)(BX*1) MOVOU X13, -48(AX)(BX*1) MOVOU X14, -32(AX)(BX*1) MOVOU X15, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_256through2048: LEAQ -256(BX), BX MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU 128(CX), X8 MOVOU 144(CX), X9 MOVOU 160(CX), X10 MOVOU 176(CX), X11 MOVOU 192(CX), X12 MOVOU 208(CX), X13 MOVOU 224(CX), X14 MOVOU 240(CX), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ BX, $0x00000100 LEAQ 256(CX), CX LEAQ 256(AX), AX JGE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_256through2048 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_tail memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B: MOVQ DX, AX emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeSnappyBlockAsmAvx(dst []byte, src []byte) int // Requires: AVX, SSE2 TEXT ·encodeSnappyBlockAsmAvx(SB), $65560-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000200, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeSnappyBlockAsmAvx: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeSnappyBlockAsmAvx MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeSnappyBlockAsmAvx: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x06, BP LEAL 4(CX)(BP*1), BP MOVL 8(SP), DI CMPL BP, DI JGT emit_remainder_encodeSnappyBlockAsmAvx MOVL BP, 20(SP) MOVQ $0x0000cf1bbcdcbf9b, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x10, R9 IMULQ R8, R9 SHRQ $0x32, R9 SHLQ $0x10, R10 IMULQ R8, R10 SHRQ $0x32, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x10, R9 IMULQ R8, R9 SHRQ $0x32, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeSnappyBlockAsmAvx LEAL 1(CX), SI MOVL 12(SP), BP MOVL SI, DI SUBL 16(SP), DI JZ repeat_extend_back_end_encodeSnappyBlockAsmAvx repeat_extend_back_loop_encodeSnappyBlockAsmAvx: CMPL SI, BP JLE repeat_extend_back_end_encodeSnappyBlockAsmAvx MOVB -1(DX)(DI*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeSnappyBlockAsmAvx LEAL -1(SI), SI DECL DI JNZ repeat_extend_back_loop_encodeSnappyBlockAsmAvx repeat_extend_back_end_encodeSnappyBlockAsmAvx: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsmAvx MOVL SI, DI MOVL SI, 12(SP) LEAQ (DX)(BP*1), R8 SUBL BP, DI MOVL DI, BP SUBL $0x01, BP JC emit_literal_done_repeat_emit_encodeSnappyBlockAsmAvx CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeSnappyBlockAsmAvx CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeSnappyBlockAsmAvx CMPL BP, $0x00010000 JLT three_bytes_repeat_emit_encodeSnappyBlockAsmAvx CMPL BP, $0x01000000 JLT four_bytes_repeat_emit_encodeSnappyBlockAsmAvx MOVB $0xfc, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP memmove_repeat_emit_encodeSnappyBlockAsmAvx four_bytes_repeat_emit_encodeSnappyBlockAsmAvx: MOVL BP, R9 SHRL $0x10, R9 MOVB $0xf8, (AX) MOVW BP, 1(AX) MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_repeat_emit_encodeSnappyBlockAsmAvx three_bytes_repeat_emit_encodeSnappyBlockAsmAvx: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_repeat_emit_encodeSnappyBlockAsmAvx two_bytes_repeat_emit_encodeSnappyBlockAsmAvx: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX JMP memmove_repeat_emit_encodeSnappyBlockAsmAvx one_byte_repeat_emit_encodeSnappyBlockAsmAvx: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeSnappyBlockAsmAvx: LEAQ (AX)(DI*1), BP NOP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_tail: TESTQ DI, DI JEQ memmove_end_copy_repeat_emit_encodeSnappyBlockAsmAvx CMPQ DI, $0x02 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_1or2 CMPQ DI, $0x04 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_3 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_4 CMPQ DI, $0x08 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_5through7 JE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_8 CMPQ DI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_9through16 CMPQ DI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_17through32 CMPQ DI, $0x40 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_33through64 CMPQ DI, $0x80 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_65through128 CMPQ DI, $0x00000100 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_129through256 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_avxUnaligned emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_1or2: MOVB (R8), R9 MOVB -1(R8)(DI*1), R10 MOVB R9, (AX) MOVB R10, -1(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsmAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_4: MOVL (R8), R9 MOVL R9, (AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsmAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_3: MOVW (R8), R9 MOVB 2(R8), R10 MOVW R9, (AX) MOVB R10, 2(AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsmAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_5through7: MOVL (R8), R9 MOVL -4(R8)(DI*1), R10 MOVL R9, (AX) MOVL R10, -4(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsmAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_8: MOVQ (R8), R9 MOVQ R9, (AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsmAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_9through16: MOVQ (R8), R9 MOVQ -8(R8)(DI*1), R10 MOVQ R9, (AX) MOVQ R10, -8(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsmAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_17through32: MOVOU (R8), X0 MOVOU -16(R8)(DI*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsmAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_33through64: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU -32(R8)(DI*1), X2 MOVOU -16(R8)(DI*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DI*1) MOVOU X3, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsmAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_65through128: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU -64(R8)(DI*1), X12 MOVOU -48(R8)(DI*1), X13 MOVOU -32(R8)(DI*1), X14 MOVOU -16(R8)(DI*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(DI*1) MOVOU X13, -48(AX)(DI*1) MOVOU X14, -32(AX)(DI*1) MOVOU X15, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsmAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_129through256: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU 64(R8), X4 MOVOU 80(R8), X5 MOVOU 96(R8), X6 MOVOU 112(R8), X7 MOVOU -128(R8)(DI*1), X8 MOVOU -112(R8)(DI*1), X9 MOVOU -96(R8)(DI*1), X10 MOVOU -80(R8)(DI*1), X11 MOVOU -64(R8)(DI*1), X12 MOVOU -48(R8)(DI*1), X13 MOVOU -32(R8)(DI*1), X14 MOVOU -16(R8)(DI*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(DI*1) MOVOU X9, -112(AX)(DI*1) MOVOU X10, -96(AX)(DI*1) MOVOU X11, -80(AX)(DI*1) MOVOU X12, -64(AX)(DI*1) MOVOU X13, -48(AX)(DI*1) MOVOU X14, -32(AX)(DI*1) MOVOU X15, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsmAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_256through2048: LEAQ -256(DI), DI MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU 64(R8), X4 MOVOU 80(R8), X5 MOVOU 96(R8), X6 MOVOU 112(R8), X7 MOVOU 128(R8), X8 MOVOU 144(R8), X9 MOVOU 160(R8), X10 MOVOU 176(R8), X11 MOVOU 192(R8), X12 MOVOU 208(R8), X13 MOVOU 224(R8), X14 MOVOU 240(R8), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ DI, $0x00000100 LEAQ 256(R8), R8 LEAQ 256(AX), AX JGE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_move_256through2048 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_tail emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_avxUnaligned: LEAQ (R8)(DI*1), R10 MOVQ AX, R12 MOVOU -128(R10), X5 MOVOU -112(R10), X6 MOVQ $0x00000080, R9 ANDQ $0xffffffe0, AX ADDQ $0x20, AX MOVOU -96(R10), X7 MOVOU -80(R10), X8 MOVQ AX, R11 SUBQ R12, R11 MOVOU -64(R10), X9 MOVOU -48(R10), X10 SUBQ R11, DI MOVOU -32(R10), X11 MOVOU -16(R10), X12 VMOVDQU (R8), Y4 ADDQ R11, R8 SUBQ R9, DI emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_gobble_128_loop: VMOVDQU (R8), Y0 VMOVDQU 32(R8), Y1 VMOVDQU 64(R8), Y2 VMOVDQU 96(R8), Y3 ADDQ R9, R8 VMOVDQA Y0, (AX) VMOVDQA Y1, 32(AX) VMOVDQA Y2, 64(AX) VMOVDQA Y3, 96(AX) ADDQ R9, AX SUBQ R9, DI JA emit_lit_memmove_repeat_emit_encodeSnappyBlockAsmAvx_memmove_gobble_128_loop ADDQ R9, DI ADDQ AX, DI VMOVDQU Y4, (R12) VZEROUPPER MOVOU X5, -128(DI) MOVOU X6, -112(DI) MOVOU X7, -96(DI) MOVOU X8, -80(DI) MOVOU X9, -64(DI) MOVOU X10, -48(DI) MOVOU X11, -32(DI) MOVOU X12, -16(DI) memmove_end_copy_repeat_emit_encodeSnappyBlockAsmAvx: MOVQ BP, AX emit_literal_done_repeat_emit_encodeSnappyBlockAsmAvx: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 LEAQ (DX)(BP*1), BP XORL R10, R10 CMPL DI, $0x08 JL matchlen_single_repeat_extend matchlen_loopback_repeat_extend: MOVQ (R8)(R10*1), R9 XORQ (BP)(R10*1), R9 TESTQ R9, R9 JZ matchlen_loop_repeat_extend BSFQ R9, R9 SARQ $0x03, R9 LEAL (R10)(R9*1), R10 JMP repeat_extend_forward_end_encodeSnappyBlockAsmAvx matchlen_loop_repeat_extend: LEAL -8(DI), DI LEAL 8(R10), R10 CMPL DI, $0x08 JGE matchlen_loopback_repeat_extend matchlen_single_repeat_extend: TESTL DI, DI JZ repeat_extend_forward_end_encodeSnappyBlockAsmAvx matchlen_single_loopback_repeat_extend: MOVB (R8)(R10*1), R9 CMPB (BP)(R10*1), R9 JNE repeat_extend_forward_end_encodeSnappyBlockAsmAvx LEAL 1(R10), R10 DECL DI JNZ matchlen_single_loopback_repeat_extend repeat_extend_forward_end_encodeSnappyBlockAsmAvx: ADDL R10, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI CMPL SI, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeSnappyBlockAsmAvx four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsmAvx: CMPL BP, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsmAvx MOVB $0xff, (AX) MOVL SI, 1(AX) LEAL -64(BP), BP ADDQ $0x05, AX CMPL BP, $0x04 JL four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsmAvx JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsmAvx four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsmAvx: TESTL BP, BP JZ repeat_end_emit_encodeSnappyBlockAsmAvx MOVB $0x03, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVL SI, 1(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeSnappyBlockAsmAvx two_byte_offset_repeat_as_copy_encodeSnappyBlockAsmAvx: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsmAvx MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsmAvx two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsmAvx: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsmAvx CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsmAvx MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeSnappyBlockAsmAvx emit_copy_three_repeat_as_copy_encodeSnappyBlockAsmAvx: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeSnappyBlockAsmAvx: MOVL CX, 12(SP) CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBlockAsmAvx JMP search_loop_encodeSnappyBlockAsmAvx no_repeat_found_encodeSnappyBlockAsmAvx: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeSnappyBlockAsmAvx SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeSnappyBlockAsmAvx MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeSnappyBlockAsmAvx MOVL 20(SP), CX JMP search_loop_encodeSnappyBlockAsmAvx candidate3_match_encodeSnappyBlockAsmAvx: ADDL $0x02, CX JMP candidate_match_encodeSnappyBlockAsmAvx candidate2_match_encodeSnappyBlockAsmAvx: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeSnappyBlockAsmAvx: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeSnappyBlockAsmAvx match_extend_back_loop_encodeSnappyBlockAsmAvx: CMPL CX, SI JLE match_extend_back_end_encodeSnappyBlockAsmAvx MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeSnappyBlockAsmAvx LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeSnappyBlockAsmAvx JMP match_extend_back_loop_encodeSnappyBlockAsmAvx match_extend_back_end_encodeSnappyBlockAsmAvx: MOVL CX, SI SUBL 12(SP), SI LEAQ 4(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeSnappyBlockAsmAvx MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeSnappyBlockAsmAvx: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsmAvx MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 MOVL R8, DI SUBL $0x01, DI JC emit_literal_done_match_emit_encodeSnappyBlockAsmAvx CMPL DI, $0x3c JLT one_byte_match_emit_encodeSnappyBlockAsmAvx CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeSnappyBlockAsmAvx CMPL DI, $0x00010000 JLT three_bytes_match_emit_encodeSnappyBlockAsmAvx CMPL DI, $0x01000000 JLT four_bytes_match_emit_encodeSnappyBlockAsmAvx MOVB $0xfc, (AX) MOVL DI, 1(AX) ADDQ $0x05, AX JMP memmove_match_emit_encodeSnappyBlockAsmAvx four_bytes_match_emit_encodeSnappyBlockAsmAvx: MOVL DI, R9 SHRL $0x10, R9 MOVB $0xf8, (AX) MOVW DI, 1(AX) MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_match_emit_encodeSnappyBlockAsmAvx three_bytes_match_emit_encodeSnappyBlockAsmAvx: MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_match_emit_encodeSnappyBlockAsmAvx two_bytes_match_emit_encodeSnappyBlockAsmAvx: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX JMP memmove_match_emit_encodeSnappyBlockAsmAvx one_byte_match_emit_encodeSnappyBlockAsmAvx: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeSnappyBlockAsmAvx: LEAQ (AX)(R8*1), DI NOP emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_tail: TESTQ R8, R8 JEQ memmove_end_copy_match_emit_encodeSnappyBlockAsmAvx CMPQ R8, $0x02 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_1or2 CMPQ R8, $0x04 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_3 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_4 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_5through7 JE emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_9through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_17through32 CMPQ R8, $0x40 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_33through64 CMPQ R8, $0x80 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_65through128 CMPQ R8, $0x00000100 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_129through256 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_avxUnaligned emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), R10 MOVB R9, (AX) MOVB R10, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsmAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_4: MOVL (SI), R9 MOVL R9, (AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsmAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), R10 MOVW R9, (AX) MOVB R10, 2(AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsmAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_5through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), R10 MOVL R9, (AX) MOVL R10, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsmAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_8: MOVQ (SI), R9 MOVQ R9, (AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsmAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_9through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), R10 MOVQ R9, (AX) MOVQ R10, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsmAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsmAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsmAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_65through128: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsmAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_129through256: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU -128(SI)(R8*1), X8 MOVOU -112(SI)(R8*1), X9 MOVOU -96(SI)(R8*1), X10 MOVOU -80(SI)(R8*1), X11 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(R8*1) MOVOU X9, -112(AX)(R8*1) MOVOU X10, -96(AX)(R8*1) MOVOU X11, -80(AX)(R8*1) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsmAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_256through2048: LEAQ -256(R8), R8 MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU 128(SI), X8 MOVOU 144(SI), X9 MOVOU 160(SI), X10 MOVOU 176(SI), X11 MOVOU 192(SI), X12 MOVOU 208(SI), X13 MOVOU 224(SI), X14 MOVOU 240(SI), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ R8, $0x00000100 LEAQ 256(SI), SI LEAQ 256(AX), AX JGE emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_move_256through2048 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_tail emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_avxUnaligned: LEAQ (SI)(R8*1), R10 MOVQ AX, R12 MOVOU -128(R10), X5 MOVOU -112(R10), X6 MOVQ $0x00000080, R9 ANDQ $0xffffffe0, AX ADDQ $0x20, AX MOVOU -96(R10), X7 MOVOU -80(R10), X8 MOVQ AX, R11 SUBQ R12, R11 MOVOU -64(R10), X9 MOVOU -48(R10), X10 SUBQ R11, R8 MOVOU -32(R10), X11 MOVOU -16(R10), X12 VMOVDQU (SI), Y4 ADDQ R11, SI SUBQ R9, R8 emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_gobble_128_loop: VMOVDQU (SI), Y0 VMOVDQU 32(SI), Y1 VMOVDQU 64(SI), Y2 VMOVDQU 96(SI), Y3 ADDQ R9, SI VMOVDQA Y0, (AX) VMOVDQA Y1, 32(AX) VMOVDQA Y2, 64(AX) VMOVDQA Y3, 96(AX) ADDQ R9, AX SUBQ R9, R8 JA emit_lit_memmove_match_emit_encodeSnappyBlockAsmAvx_memmove_gobble_128_loop ADDQ R9, R8 ADDQ AX, R8 VMOVDQU Y4, (R12) VZEROUPPER MOVOU X5, -128(R8) MOVOU X6, -112(R8) MOVOU X7, -96(R8) MOVOU X8, -80(R8) MOVOU X9, -64(R8) MOVOU X10, -48(R8) MOVOU X11, -32(R8) MOVOU X12, -16(R8) memmove_end_copy_match_emit_encodeSnappyBlockAsmAvx: MOVQ DI, AX emit_literal_done_match_emit_encodeSnappyBlockAsmAvx: match_nolit_loop_encodeSnappyBlockAsmAvx: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeSnappyBlockAsmAvx matchlen_loopback_match_nolit_encodeSnappyBlockAsmAvx: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeSnappyBlockAsmAvx BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeSnappyBlockAsmAvx matchlen_loop_match_nolit_encodeSnappyBlockAsmAvx: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsmAvx matchlen_single_match_nolit_encodeSnappyBlockAsmAvx: TESTL SI, SI JZ match_nolit_end_encodeSnappyBlockAsmAvx matchlen_single_loopback_match_nolit_encodeSnappyBlockAsmAvx: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeSnappyBlockAsmAvx LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsmAvx match_nolit_end_encodeSnappyBlockAsmAvx: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 CMPL BP, $0x00010000 JL two_byte_offset_match_nolit_encodeSnappyBlockAsmAvx four_bytes_loop_back_match_nolit_encodeSnappyBlockAsmAvx: CMPL R9, $0x40 JLE four_bytes_remain_match_nolit_encodeSnappyBlockAsmAvx MOVB $0xff, (AX) MOVL BP, 1(AX) LEAL -64(R9), R9 ADDQ $0x05, AX CMPL R9, $0x04 JL four_bytes_remain_match_nolit_encodeSnappyBlockAsmAvx JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsmAvx four_bytes_remain_match_nolit_encodeSnappyBlockAsmAvx: TESTL R9, R9 JZ match_nolit_emitcopy_end_encodeSnappyBlockAsmAvx MOVB $0x03, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsmAvx two_byte_offset_match_nolit_encodeSnappyBlockAsmAvx: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsmAvx MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsmAvx two_byte_offset_short_match_nolit_encodeSnappyBlockAsmAvx: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeSnappyBlockAsmAvx CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsmAvx MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsmAvx emit_copy_three_match_nolit_encodeSnappyBlockAsmAvx: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBlockAsmAvx: MOVL CX, 12(SP) CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBlockAsmAvx CMPQ AX, (SP) JL match_nolit_dst_ok_encodeSnappyBlockAsmAvx MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsmAvx: MOVQ -2(DX)(CX*1), SI MOVQ $0x0000cf1bbcdcbf9b, BP MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, R8 SHLQ $0x10, DI IMULQ BP, DI SHRQ $0x32, DI SHLQ $0x10, R8 IMULQ BP, R8 SHRQ $0x32, R8 LEAL -2(CX), R9 MOVL 24(SP)(R8*4), BP MOVL R9, 24(SP)(DI*4) MOVL CX, 24(SP)(R8*4) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeSnappyBlockAsmAvx INCL CX JMP search_loop_encodeSnappyBlockAsmAvx emit_remainder_encodeSnappyBlockAsmAvx: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 4(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeSnappyBlockAsmAvx MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeSnappyBlockAsmAvx: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsmAvx MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP MOVL BP, DX SUBL $0x01, DX JC emit_literal_done_emit_remainder_encodeSnappyBlockAsmAvx CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeSnappyBlockAsmAvx CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeSnappyBlockAsmAvx CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeSnappyBlockAsmAvx CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeSnappyBlockAsmAvx MOVB $0xfc, (AX) MOVL DX, 1(AX) ADDQ $0x05, AX JMP memmove_emit_remainder_encodeSnappyBlockAsmAvx four_bytes_emit_remainder_encodeSnappyBlockAsmAvx: MOVL DX, BX SHRL $0x10, BX MOVB $0xf8, (AX) MOVW DX, 1(AX) MOVB BL, 3(AX) ADDQ $0x04, AX JMP memmove_emit_remainder_encodeSnappyBlockAsmAvx three_bytes_emit_remainder_encodeSnappyBlockAsmAvx: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_emit_remainder_encodeSnappyBlockAsmAvx two_bytes_emit_remainder_encodeSnappyBlockAsmAvx: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX JMP memmove_emit_remainder_encodeSnappyBlockAsmAvx one_byte_emit_remainder_encodeSnappyBlockAsmAvx: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeSnappyBlockAsmAvx: LEAQ (AX)(BP*1), DX MOVL BP, BX NOP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_tail: TESTQ BX, BX JEQ memmove_end_copy_emit_remainder_encodeSnappyBlockAsmAvx CMPQ BX, $0x02 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_1or2 CMPQ BX, $0x04 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_3 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_5through7 JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_9through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_17through32 CMPQ BX, $0x40 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_33through64 CMPQ BX, $0x80 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_65through128 CMPQ BX, $0x00000100 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_129through256 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_avxUnaligned emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), SI MOVB BP, (AX) MOVB SI, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsmAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_4: MOVL (CX), BP MOVL BP, (AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsmAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_3: MOVW (CX), BP MOVB 2(CX), SI MOVW BP, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsmAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_5through7: MOVL (CX), BP MOVL -4(CX)(BX*1), SI MOVL BP, (AX) MOVL SI, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsmAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_8: MOVQ (CX), BP MOVQ BP, (AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsmAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_9through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), SI MOVQ BP, (AX) MOVQ SI, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsmAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsmAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsmAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_65through128: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU -64(CX)(BX*1), X12 MOVOU -48(CX)(BX*1), X13 MOVOU -32(CX)(BX*1), X14 MOVOU -16(CX)(BX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(BX*1) MOVOU X13, -48(AX)(BX*1) MOVOU X14, -32(AX)(BX*1) MOVOU X15, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsmAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_129through256: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU -128(CX)(BX*1), X8 MOVOU -112(CX)(BX*1), X9 MOVOU -96(CX)(BX*1), X10 MOVOU -80(CX)(BX*1), X11 MOVOU -64(CX)(BX*1), X12 MOVOU -48(CX)(BX*1), X13 MOVOU -32(CX)(BX*1), X14 MOVOU -16(CX)(BX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(BX*1) MOVOU X9, -112(AX)(BX*1) MOVOU X10, -96(AX)(BX*1) MOVOU X11, -80(AX)(BX*1) MOVOU X12, -64(AX)(BX*1) MOVOU X13, -48(AX)(BX*1) MOVOU X14, -32(AX)(BX*1) MOVOU X15, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsmAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_256through2048: LEAQ -256(BX), BX MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU 128(CX), X8 MOVOU 144(CX), X9 MOVOU 160(CX), X10 MOVOU 176(CX), X11 MOVOU 192(CX), X12 MOVOU 208(CX), X13 MOVOU 224(CX), X14 MOVOU 240(CX), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ BX, $0x00000100 LEAQ 256(CX), CX LEAQ 256(AX), AX JGE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_move_256through2048 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_tail emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_avxUnaligned: LEAQ (CX)(BX*1), SI MOVQ AX, R8 MOVOU -128(SI), X5 MOVOU -112(SI), X6 MOVQ $0x00000080, BP ANDQ $0xffffffe0, AX ADDQ $0x20, AX MOVOU -96(SI), X7 MOVOU -80(SI), X8 MOVQ AX, DI SUBQ R8, DI MOVOU -64(SI), X9 MOVOU -48(SI), X10 SUBQ DI, BX MOVOU -32(SI), X11 MOVOU -16(SI), X12 VMOVDQU (CX), Y4 ADDQ DI, CX SUBQ BP, BX emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_gobble_128_loop: VMOVDQU (CX), Y0 VMOVDQU 32(CX), Y1 VMOVDQU 64(CX), Y2 VMOVDQU 96(CX), Y3 ADDQ BP, CX VMOVDQA Y0, (AX) VMOVDQA Y1, 32(AX) VMOVDQA Y2, 64(AX) VMOVDQA Y3, 96(AX) ADDQ BP, AX SUBQ BP, BX JA emit_lit_memmove_emit_remainder_encodeSnappyBlockAsmAvx_memmove_gobble_128_loop ADDQ BP, BX ADDQ AX, BX VMOVDQU Y4, (R8) VZEROUPPER MOVOU X5, -128(BX) MOVOU X6, -112(BX) MOVOU X7, -96(BX) MOVOU X8, -80(BX) MOVOU X9, -64(BX) MOVOU X10, -48(BX) MOVOU X11, -32(BX) MOVOU X12, -16(BX) memmove_end_copy_emit_remainder_encodeSnappyBlockAsmAvx: MOVQ DX, AX emit_literal_done_emit_remainder_encodeSnappyBlockAsmAvx: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeSnappyBlockAsm12BAvx(dst []byte, src []byte) int // Requires: AVX, SSE2 TEXT ·encodeSnappyBlockAsm12BAvx(SB), $16408-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000080, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm12BAvx: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeSnappyBlockAsm12BAvx MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeSnappyBlockAsm12BAvx: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x05, BP LEAL 4(CX)(BP*1), BP MOVL 8(SP), DI CMPL BP, DI JGT emit_remainder_encodeSnappyBlockAsm12BAvx MOVL BP, 20(SP) MOVQ $0x000000cf1bbcdcbb, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x18, R9 IMULQ R8, R9 SHRQ $0x34, R9 SHLQ $0x18, R10 IMULQ R8, R10 SHRQ $0x34, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x18, R9 IMULQ R8, R9 SHRQ $0x34, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeSnappyBlockAsm12BAvx LEAL 1(CX), SI MOVL 12(SP), BP MOVL SI, DI SUBL 16(SP), DI JZ repeat_extend_back_end_encodeSnappyBlockAsm12BAvx repeat_extend_back_loop_encodeSnappyBlockAsm12BAvx: CMPL SI, BP JLE repeat_extend_back_end_encodeSnappyBlockAsm12BAvx MOVB -1(DX)(DI*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeSnappyBlockAsm12BAvx LEAL -1(SI), SI DECL DI JNZ repeat_extend_back_loop_encodeSnappyBlockAsm12BAvx repeat_extend_back_end_encodeSnappyBlockAsm12BAvx: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm12BAvx MOVL SI, DI MOVL SI, 12(SP) LEAQ (DX)(BP*1), R8 SUBL BP, DI MOVL DI, BP SUBL $0x01, BP JC emit_literal_done_repeat_emit_encodeSnappyBlockAsm12BAvx CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeSnappyBlockAsm12BAvx CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeSnappyBlockAsm12BAvx CMPL BP, $0x00010000 JLT three_bytes_repeat_emit_encodeSnappyBlockAsm12BAvx CMPL BP, $0x01000000 JLT four_bytes_repeat_emit_encodeSnappyBlockAsm12BAvx MOVB $0xfc, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP memmove_repeat_emit_encodeSnappyBlockAsm12BAvx four_bytes_repeat_emit_encodeSnappyBlockAsm12BAvx: MOVL BP, R9 SHRL $0x10, R9 MOVB $0xf8, (AX) MOVW BP, 1(AX) MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_repeat_emit_encodeSnappyBlockAsm12BAvx three_bytes_repeat_emit_encodeSnappyBlockAsm12BAvx: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_repeat_emit_encodeSnappyBlockAsm12BAvx two_bytes_repeat_emit_encodeSnappyBlockAsm12BAvx: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX JMP memmove_repeat_emit_encodeSnappyBlockAsm12BAvx one_byte_repeat_emit_encodeSnappyBlockAsm12BAvx: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeSnappyBlockAsm12BAvx: LEAQ (AX)(DI*1), BP NOP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_tail: TESTQ DI, DI JEQ memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12BAvx CMPQ DI, $0x02 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_1or2 CMPQ DI, $0x04 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_3 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_4 CMPQ DI, $0x08 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_5through7 JE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_8 CMPQ DI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_9through16 CMPQ DI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_17through32 CMPQ DI, $0x40 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_33through64 CMPQ DI, $0x80 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_65through128 CMPQ DI, $0x00000100 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_129through256 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_avxUnaligned emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_1or2: MOVB (R8), R9 MOVB -1(R8)(DI*1), R10 MOVB R9, (AX) MOVB R10, -1(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_4: MOVL (R8), R9 MOVL R9, (AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_3: MOVW (R8), R9 MOVB 2(R8), R10 MOVW R9, (AX) MOVB R10, 2(AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_5through7: MOVL (R8), R9 MOVL -4(R8)(DI*1), R10 MOVL R9, (AX) MOVL R10, -4(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_8: MOVQ (R8), R9 MOVQ R9, (AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_9through16: MOVQ (R8), R9 MOVQ -8(R8)(DI*1), R10 MOVQ R9, (AX) MOVQ R10, -8(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_17through32: MOVOU (R8), X0 MOVOU -16(R8)(DI*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_33through64: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU -32(R8)(DI*1), X2 MOVOU -16(R8)(DI*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DI*1) MOVOU X3, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_65through128: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU -64(R8)(DI*1), X12 MOVOU -48(R8)(DI*1), X13 MOVOU -32(R8)(DI*1), X14 MOVOU -16(R8)(DI*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(DI*1) MOVOU X13, -48(AX)(DI*1) MOVOU X14, -32(AX)(DI*1) MOVOU X15, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_129through256: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU 64(R8), X4 MOVOU 80(R8), X5 MOVOU 96(R8), X6 MOVOU 112(R8), X7 MOVOU -128(R8)(DI*1), X8 MOVOU -112(R8)(DI*1), X9 MOVOU -96(R8)(DI*1), X10 MOVOU -80(R8)(DI*1), X11 MOVOU -64(R8)(DI*1), X12 MOVOU -48(R8)(DI*1), X13 MOVOU -32(R8)(DI*1), X14 MOVOU -16(R8)(DI*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(DI*1) MOVOU X9, -112(AX)(DI*1) MOVOU X10, -96(AX)(DI*1) MOVOU X11, -80(AX)(DI*1) MOVOU X12, -64(AX)(DI*1) MOVOU X13, -48(AX)(DI*1) MOVOU X14, -32(AX)(DI*1) MOVOU X15, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_256through2048: LEAQ -256(DI), DI MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU 64(R8), X4 MOVOU 80(R8), X5 MOVOU 96(R8), X6 MOVOU 112(R8), X7 MOVOU 128(R8), X8 MOVOU 144(R8), X9 MOVOU 160(R8), X10 MOVOU 176(R8), X11 MOVOU 192(R8), X12 MOVOU 208(R8), X13 MOVOU 224(R8), X14 MOVOU 240(R8), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ DI, $0x00000100 LEAQ 256(R8), R8 LEAQ 256(AX), AX JGE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_move_256through2048 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_tail emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_avxUnaligned: LEAQ (R8)(DI*1), R10 MOVQ AX, R12 MOVOU -128(R10), X5 MOVOU -112(R10), X6 MOVQ $0x00000080, R9 ANDQ $0xffffffe0, AX ADDQ $0x20, AX MOVOU -96(R10), X7 MOVOU -80(R10), X8 MOVQ AX, R11 SUBQ R12, R11 MOVOU -64(R10), X9 MOVOU -48(R10), X10 SUBQ R11, DI MOVOU -32(R10), X11 MOVOU -16(R10), X12 VMOVDQU (R8), Y4 ADDQ R11, R8 SUBQ R9, DI emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_gobble_128_loop: VMOVDQU (R8), Y0 VMOVDQU 32(R8), Y1 VMOVDQU 64(R8), Y2 VMOVDQU 96(R8), Y3 ADDQ R9, R8 VMOVDQA Y0, (AX) VMOVDQA Y1, 32(AX) VMOVDQA Y2, 64(AX) VMOVDQA Y3, 96(AX) ADDQ R9, AX SUBQ R9, DI JA emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12BAvx_memmove_gobble_128_loop ADDQ R9, DI ADDQ AX, DI VMOVDQU Y4, (R12) VZEROUPPER MOVOU X5, -128(DI) MOVOU X6, -112(DI) MOVOU X7, -96(DI) MOVOU X8, -80(DI) MOVOU X9, -64(DI) MOVOU X10, -48(DI) MOVOU X11, -32(DI) MOVOU X12, -16(DI) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12BAvx: MOVQ BP, AX emit_literal_done_repeat_emit_encodeSnappyBlockAsm12BAvx: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 LEAQ (DX)(BP*1), BP XORL R10, R10 CMPL DI, $0x08 JL matchlen_single_repeat_extend matchlen_loopback_repeat_extend: MOVQ (R8)(R10*1), R9 XORQ (BP)(R10*1), R9 TESTQ R9, R9 JZ matchlen_loop_repeat_extend BSFQ R9, R9 SARQ $0x03, R9 LEAL (R10)(R9*1), R10 JMP repeat_extend_forward_end_encodeSnappyBlockAsm12BAvx matchlen_loop_repeat_extend: LEAL -8(DI), DI LEAL 8(R10), R10 CMPL DI, $0x08 JGE matchlen_loopback_repeat_extend matchlen_single_repeat_extend: TESTL DI, DI JZ repeat_extend_forward_end_encodeSnappyBlockAsm12BAvx matchlen_single_loopback_repeat_extend: MOVB (R8)(R10*1), R9 CMPB (BP)(R10*1), R9 JNE repeat_extend_forward_end_encodeSnappyBlockAsm12BAvx LEAL 1(R10), R10 DECL DI JNZ matchlen_single_loopback_repeat_extend repeat_extend_forward_end_encodeSnappyBlockAsm12BAvx: ADDL R10, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI CMPL SI, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12BAvx four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm12BAvx: CMPL BP, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm12BAvx MOVB $0xff, (AX) MOVL SI, 1(AX) LEAL -64(BP), BP ADDQ $0x05, AX CMPL BP, $0x04 JL four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm12BAvx JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm12BAvx four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm12BAvx: TESTL BP, BP JZ repeat_end_emit_encodeSnappyBlockAsm12BAvx MOVB $0x03, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVL SI, 1(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeSnappyBlockAsm12BAvx two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12BAvx: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12BAvx MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12BAvx two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12BAvx: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12BAvx CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12BAvx MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeSnappyBlockAsm12BAvx emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12BAvx: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeSnappyBlockAsm12BAvx: MOVL CX, 12(SP) CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm12BAvx JMP search_loop_encodeSnappyBlockAsm12BAvx no_repeat_found_encodeSnappyBlockAsm12BAvx: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeSnappyBlockAsm12BAvx SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeSnappyBlockAsm12BAvx MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeSnappyBlockAsm12BAvx MOVL 20(SP), CX JMP search_loop_encodeSnappyBlockAsm12BAvx candidate3_match_encodeSnappyBlockAsm12BAvx: ADDL $0x02, CX JMP candidate_match_encodeSnappyBlockAsm12BAvx candidate2_match_encodeSnappyBlockAsm12BAvx: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeSnappyBlockAsm12BAvx: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeSnappyBlockAsm12BAvx match_extend_back_loop_encodeSnappyBlockAsm12BAvx: CMPL CX, SI JLE match_extend_back_end_encodeSnappyBlockAsm12BAvx MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeSnappyBlockAsm12BAvx LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeSnappyBlockAsm12BAvx JMP match_extend_back_loop_encodeSnappyBlockAsm12BAvx match_extend_back_end_encodeSnappyBlockAsm12BAvx: MOVL CX, SI SUBL 12(SP), SI LEAQ 4(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeSnappyBlockAsm12BAvx MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeSnappyBlockAsm12BAvx: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm12BAvx MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 MOVL R8, DI SUBL $0x01, DI JC emit_literal_done_match_emit_encodeSnappyBlockAsm12BAvx CMPL DI, $0x3c JLT one_byte_match_emit_encodeSnappyBlockAsm12BAvx CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeSnappyBlockAsm12BAvx CMPL DI, $0x00010000 JLT three_bytes_match_emit_encodeSnappyBlockAsm12BAvx CMPL DI, $0x01000000 JLT four_bytes_match_emit_encodeSnappyBlockAsm12BAvx MOVB $0xfc, (AX) MOVL DI, 1(AX) ADDQ $0x05, AX JMP memmove_match_emit_encodeSnappyBlockAsm12BAvx four_bytes_match_emit_encodeSnappyBlockAsm12BAvx: MOVL DI, R9 SHRL $0x10, R9 MOVB $0xf8, (AX) MOVW DI, 1(AX) MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_match_emit_encodeSnappyBlockAsm12BAvx three_bytes_match_emit_encodeSnappyBlockAsm12BAvx: MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_match_emit_encodeSnappyBlockAsm12BAvx two_bytes_match_emit_encodeSnappyBlockAsm12BAvx: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX JMP memmove_match_emit_encodeSnappyBlockAsm12BAvx one_byte_match_emit_encodeSnappyBlockAsm12BAvx: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeSnappyBlockAsm12BAvx: LEAQ (AX)(R8*1), DI NOP emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_tail: TESTQ R8, R8 JEQ memmove_end_copy_match_emit_encodeSnappyBlockAsm12BAvx CMPQ R8, $0x02 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_1or2 CMPQ R8, $0x04 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_3 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_4 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_5through7 JE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_9through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_17through32 CMPQ R8, $0x40 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_33through64 CMPQ R8, $0x80 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_65through128 CMPQ R8, $0x00000100 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_129through256 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_avxUnaligned emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), R10 MOVB R9, (AX) MOVB R10, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12BAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_4: MOVL (SI), R9 MOVL R9, (AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12BAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), R10 MOVW R9, (AX) MOVB R10, 2(AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12BAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_5through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), R10 MOVL R9, (AX) MOVL R10, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12BAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_8: MOVQ (SI), R9 MOVQ R9, (AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12BAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_9through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), R10 MOVQ R9, (AX) MOVQ R10, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12BAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12BAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12BAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_65through128: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12BAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_129through256: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU -128(SI)(R8*1), X8 MOVOU -112(SI)(R8*1), X9 MOVOU -96(SI)(R8*1), X10 MOVOU -80(SI)(R8*1), X11 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(R8*1) MOVOU X9, -112(AX)(R8*1) MOVOU X10, -96(AX)(R8*1) MOVOU X11, -80(AX)(R8*1) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12BAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_256through2048: LEAQ -256(R8), R8 MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU 128(SI), X8 MOVOU 144(SI), X9 MOVOU 160(SI), X10 MOVOU 176(SI), X11 MOVOU 192(SI), X12 MOVOU 208(SI), X13 MOVOU 224(SI), X14 MOVOU 240(SI), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ R8, $0x00000100 LEAQ 256(SI), SI LEAQ 256(AX), AX JGE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_move_256through2048 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_tail emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_avxUnaligned: LEAQ (SI)(R8*1), R10 MOVQ AX, R12 MOVOU -128(R10), X5 MOVOU -112(R10), X6 MOVQ $0x00000080, R9 ANDQ $0xffffffe0, AX ADDQ $0x20, AX MOVOU -96(R10), X7 MOVOU -80(R10), X8 MOVQ AX, R11 SUBQ R12, R11 MOVOU -64(R10), X9 MOVOU -48(R10), X10 SUBQ R11, R8 MOVOU -32(R10), X11 MOVOU -16(R10), X12 VMOVDQU (SI), Y4 ADDQ R11, SI SUBQ R9, R8 emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_gobble_128_loop: VMOVDQU (SI), Y0 VMOVDQU 32(SI), Y1 VMOVDQU 64(SI), Y2 VMOVDQU 96(SI), Y3 ADDQ R9, SI VMOVDQA Y0, (AX) VMOVDQA Y1, 32(AX) VMOVDQA Y2, 64(AX) VMOVDQA Y3, 96(AX) ADDQ R9, AX SUBQ R9, R8 JA emit_lit_memmove_match_emit_encodeSnappyBlockAsm12BAvx_memmove_gobble_128_loop ADDQ R9, R8 ADDQ AX, R8 VMOVDQU Y4, (R12) VZEROUPPER MOVOU X5, -128(R8) MOVOU X6, -112(R8) MOVOU X7, -96(R8) MOVOU X8, -80(R8) MOVOU X9, -64(R8) MOVOU X10, -48(R8) MOVOU X11, -32(R8) MOVOU X12, -16(R8) memmove_end_copy_match_emit_encodeSnappyBlockAsm12BAvx: MOVQ DI, AX emit_literal_done_match_emit_encodeSnappyBlockAsm12BAvx: match_nolit_loop_encodeSnappyBlockAsm12BAvx: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeSnappyBlockAsm12BAvx matchlen_loopback_match_nolit_encodeSnappyBlockAsm12BAvx: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm12BAvx BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeSnappyBlockAsm12BAvx matchlen_loop_match_nolit_encodeSnappyBlockAsm12BAvx: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm12BAvx matchlen_single_match_nolit_encodeSnappyBlockAsm12BAvx: TESTL SI, SI JZ match_nolit_end_encodeSnappyBlockAsm12BAvx matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm12BAvx: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeSnappyBlockAsm12BAvx LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm12BAvx match_nolit_end_encodeSnappyBlockAsm12BAvx: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 CMPL BP, $0x00010000 JL two_byte_offset_match_nolit_encodeSnappyBlockAsm12BAvx four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm12BAvx: CMPL R9, $0x40 JLE four_bytes_remain_match_nolit_encodeSnappyBlockAsm12BAvx MOVB $0xff, (AX) MOVL BP, 1(AX) LEAL -64(R9), R9 ADDQ $0x05, AX CMPL R9, $0x04 JL four_bytes_remain_match_nolit_encodeSnappyBlockAsm12BAvx JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm12BAvx four_bytes_remain_match_nolit_encodeSnappyBlockAsm12BAvx: TESTL R9, R9 JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm12BAvx MOVB $0x03, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm12BAvx two_byte_offset_match_nolit_encodeSnappyBlockAsm12BAvx: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12BAvx MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm12BAvx two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12BAvx: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm12BAvx CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm12BAvx MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm12BAvx emit_copy_three_match_nolit_encodeSnappyBlockAsm12BAvx: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBlockAsm12BAvx: MOVL CX, 12(SP) CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm12BAvx CMPQ AX, (SP) JL match_nolit_dst_ok_encodeSnappyBlockAsm12BAvx MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm12BAvx: MOVQ -2(DX)(CX*1), SI MOVQ $0x000000cf1bbcdcbb, BP MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, R8 SHLQ $0x18, DI IMULQ BP, DI SHRQ $0x34, DI SHLQ $0x18, R8 IMULQ BP, R8 SHRQ $0x34, R8 LEAL -2(CX), R9 MOVL 24(SP)(R8*4), BP MOVL R9, 24(SP)(DI*4) MOVL CX, 24(SP)(R8*4) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeSnappyBlockAsm12BAvx INCL CX JMP search_loop_encodeSnappyBlockAsm12BAvx emit_remainder_encodeSnappyBlockAsm12BAvx: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 4(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeSnappyBlockAsm12BAvx MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeSnappyBlockAsm12BAvx: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm12BAvx MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP MOVL BP, DX SUBL $0x01, DX JC emit_literal_done_emit_remainder_encodeSnappyBlockAsm12BAvx CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeSnappyBlockAsm12BAvx CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeSnappyBlockAsm12BAvx CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeSnappyBlockAsm12BAvx CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeSnappyBlockAsm12BAvx MOVB $0xfc, (AX) MOVL DX, 1(AX) ADDQ $0x05, AX JMP memmove_emit_remainder_encodeSnappyBlockAsm12BAvx four_bytes_emit_remainder_encodeSnappyBlockAsm12BAvx: MOVL DX, BX SHRL $0x10, BX MOVB $0xf8, (AX) MOVW DX, 1(AX) MOVB BL, 3(AX) ADDQ $0x04, AX JMP memmove_emit_remainder_encodeSnappyBlockAsm12BAvx three_bytes_emit_remainder_encodeSnappyBlockAsm12BAvx: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_emit_remainder_encodeSnappyBlockAsm12BAvx two_bytes_emit_remainder_encodeSnappyBlockAsm12BAvx: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX JMP memmove_emit_remainder_encodeSnappyBlockAsm12BAvx one_byte_emit_remainder_encodeSnappyBlockAsm12BAvx: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeSnappyBlockAsm12BAvx: LEAQ (AX)(BP*1), DX MOVL BP, BX NOP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_tail: TESTQ BX, BX JEQ memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12BAvx CMPQ BX, $0x02 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_1or2 CMPQ BX, $0x04 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_3 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_5through7 JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_9through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_17through32 CMPQ BX, $0x40 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_33through64 CMPQ BX, $0x80 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_65through128 CMPQ BX, $0x00000100 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_129through256 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_avxUnaligned emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), SI MOVB BP, (AX) MOVB SI, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_4: MOVL (CX), BP MOVL BP, (AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_3: MOVW (CX), BP MOVB 2(CX), SI MOVW BP, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_5through7: MOVL (CX), BP MOVL -4(CX)(BX*1), SI MOVL BP, (AX) MOVL SI, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_8: MOVQ (CX), BP MOVQ BP, (AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_9through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), SI MOVQ BP, (AX) MOVQ SI, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_65through128: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU -64(CX)(BX*1), X12 MOVOU -48(CX)(BX*1), X13 MOVOU -32(CX)(BX*1), X14 MOVOU -16(CX)(BX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(BX*1) MOVOU X13, -48(AX)(BX*1) MOVOU X14, -32(AX)(BX*1) MOVOU X15, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_129through256: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU -128(CX)(BX*1), X8 MOVOU -112(CX)(BX*1), X9 MOVOU -96(CX)(BX*1), X10 MOVOU -80(CX)(BX*1), X11 MOVOU -64(CX)(BX*1), X12 MOVOU -48(CX)(BX*1), X13 MOVOU -32(CX)(BX*1), X14 MOVOU -16(CX)(BX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(BX*1) MOVOU X9, -112(AX)(BX*1) MOVOU X10, -96(AX)(BX*1) MOVOU X11, -80(AX)(BX*1) MOVOU X12, -64(AX)(BX*1) MOVOU X13, -48(AX)(BX*1) MOVOU X14, -32(AX)(BX*1) MOVOU X15, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_256through2048: LEAQ -256(BX), BX MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU 128(CX), X8 MOVOU 144(CX), X9 MOVOU 160(CX), X10 MOVOU 176(CX), X11 MOVOU 192(CX), X12 MOVOU 208(CX), X13 MOVOU 224(CX), X14 MOVOU 240(CX), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ BX, $0x00000100 LEAQ 256(CX), CX LEAQ 256(AX), AX JGE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_move_256through2048 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_tail emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_avxUnaligned: LEAQ (CX)(BX*1), SI MOVQ AX, R8 MOVOU -128(SI), X5 MOVOU -112(SI), X6 MOVQ $0x00000080, BP ANDQ $0xffffffe0, AX ADDQ $0x20, AX MOVOU -96(SI), X7 MOVOU -80(SI), X8 MOVQ AX, DI SUBQ R8, DI MOVOU -64(SI), X9 MOVOU -48(SI), X10 SUBQ DI, BX MOVOU -32(SI), X11 MOVOU -16(SI), X12 VMOVDQU (CX), Y4 ADDQ DI, CX SUBQ BP, BX emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_gobble_128_loop: VMOVDQU (CX), Y0 VMOVDQU 32(CX), Y1 VMOVDQU 64(CX), Y2 VMOVDQU 96(CX), Y3 ADDQ BP, CX VMOVDQA Y0, (AX) VMOVDQA Y1, 32(AX) VMOVDQA Y2, 64(AX) VMOVDQA Y3, 96(AX) ADDQ BP, AX SUBQ BP, BX JA emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12BAvx_memmove_gobble_128_loop ADDQ BP, BX ADDQ AX, BX VMOVDQU Y4, (R8) VZEROUPPER MOVOU X5, -128(BX) MOVOU X6, -112(BX) MOVOU X7, -96(BX) MOVOU X8, -80(BX) MOVOU X9, -64(BX) MOVOU X10, -48(BX) MOVOU X11, -32(BX) MOVOU X12, -16(BX) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12BAvx: MOVQ DX, AX emit_literal_done_emit_remainder_encodeSnappyBlockAsm12BAvx: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeSnappyBlockAsm10BAvx(dst []byte, src []byte) int // Requires: AVX, SSE2 TEXT ·encodeSnappyBlockAsm10BAvx(SB), $4120-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000020, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm10BAvx: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeSnappyBlockAsm10BAvx MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeSnappyBlockAsm10BAvx: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x05, BP LEAL 4(CX)(BP*1), BP MOVL 8(SP), DI CMPL BP, DI JGT emit_remainder_encodeSnappyBlockAsm10BAvx MOVL BP, 20(SP) MOVQ $0x9e3779b1, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x20, R9 IMULQ R8, R9 SHRQ $0x36, R9 SHLQ $0x20, R10 IMULQ R8, R10 SHRQ $0x36, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x20, R9 IMULQ R8, R9 SHRQ $0x36, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeSnappyBlockAsm10BAvx LEAL 1(CX), SI MOVL 12(SP), BP MOVL SI, DI SUBL 16(SP), DI JZ repeat_extend_back_end_encodeSnappyBlockAsm10BAvx repeat_extend_back_loop_encodeSnappyBlockAsm10BAvx: CMPL SI, BP JLE repeat_extend_back_end_encodeSnappyBlockAsm10BAvx MOVB -1(DX)(DI*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeSnappyBlockAsm10BAvx LEAL -1(SI), SI DECL DI JNZ repeat_extend_back_loop_encodeSnappyBlockAsm10BAvx repeat_extend_back_end_encodeSnappyBlockAsm10BAvx: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm10BAvx MOVL SI, DI MOVL SI, 12(SP) LEAQ (DX)(BP*1), R8 SUBL BP, DI MOVL DI, BP SUBL $0x01, BP JC emit_literal_done_repeat_emit_encodeSnappyBlockAsm10BAvx CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeSnappyBlockAsm10BAvx CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeSnappyBlockAsm10BAvx CMPL BP, $0x00010000 JLT three_bytes_repeat_emit_encodeSnappyBlockAsm10BAvx CMPL BP, $0x01000000 JLT four_bytes_repeat_emit_encodeSnappyBlockAsm10BAvx MOVB $0xfc, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP memmove_repeat_emit_encodeSnappyBlockAsm10BAvx four_bytes_repeat_emit_encodeSnappyBlockAsm10BAvx: MOVL BP, R9 SHRL $0x10, R9 MOVB $0xf8, (AX) MOVW BP, 1(AX) MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_repeat_emit_encodeSnappyBlockAsm10BAvx three_bytes_repeat_emit_encodeSnappyBlockAsm10BAvx: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_repeat_emit_encodeSnappyBlockAsm10BAvx two_bytes_repeat_emit_encodeSnappyBlockAsm10BAvx: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX JMP memmove_repeat_emit_encodeSnappyBlockAsm10BAvx one_byte_repeat_emit_encodeSnappyBlockAsm10BAvx: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeSnappyBlockAsm10BAvx: LEAQ (AX)(DI*1), BP NOP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_tail: TESTQ DI, DI JEQ memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10BAvx CMPQ DI, $0x02 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_1or2 CMPQ DI, $0x04 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_3 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_4 CMPQ DI, $0x08 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_5through7 JE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_8 CMPQ DI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_9through16 CMPQ DI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_17through32 CMPQ DI, $0x40 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_33through64 CMPQ DI, $0x80 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_65through128 CMPQ DI, $0x00000100 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_129through256 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_avxUnaligned emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_1or2: MOVB (R8), R9 MOVB -1(R8)(DI*1), R10 MOVB R9, (AX) MOVB R10, -1(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10BAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_4: MOVL (R8), R9 MOVL R9, (AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10BAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_3: MOVW (R8), R9 MOVB 2(R8), R10 MOVW R9, (AX) MOVB R10, 2(AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10BAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_5through7: MOVL (R8), R9 MOVL -4(R8)(DI*1), R10 MOVL R9, (AX) MOVL R10, -4(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10BAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_8: MOVQ (R8), R9 MOVQ R9, (AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10BAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_9through16: MOVQ (R8), R9 MOVQ -8(R8)(DI*1), R10 MOVQ R9, (AX) MOVQ R10, -8(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10BAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_17through32: MOVOU (R8), X0 MOVOU -16(R8)(DI*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10BAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_33through64: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU -32(R8)(DI*1), X2 MOVOU -16(R8)(DI*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DI*1) MOVOU X3, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10BAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_65through128: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU -64(R8)(DI*1), X12 MOVOU -48(R8)(DI*1), X13 MOVOU -32(R8)(DI*1), X14 MOVOU -16(R8)(DI*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(DI*1) MOVOU X13, -48(AX)(DI*1) MOVOU X14, -32(AX)(DI*1) MOVOU X15, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10BAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_129through256: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU 64(R8), X4 MOVOU 80(R8), X5 MOVOU 96(R8), X6 MOVOU 112(R8), X7 MOVOU -128(R8)(DI*1), X8 MOVOU -112(R8)(DI*1), X9 MOVOU -96(R8)(DI*1), X10 MOVOU -80(R8)(DI*1), X11 MOVOU -64(R8)(DI*1), X12 MOVOU -48(R8)(DI*1), X13 MOVOU -32(R8)(DI*1), X14 MOVOU -16(R8)(DI*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(DI*1) MOVOU X9, -112(AX)(DI*1) MOVOU X10, -96(AX)(DI*1) MOVOU X11, -80(AX)(DI*1) MOVOU X12, -64(AX)(DI*1) MOVOU X13, -48(AX)(DI*1) MOVOU X14, -32(AX)(DI*1) MOVOU X15, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10BAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_256through2048: LEAQ -256(DI), DI MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU 64(R8), X4 MOVOU 80(R8), X5 MOVOU 96(R8), X6 MOVOU 112(R8), X7 MOVOU 128(R8), X8 MOVOU 144(R8), X9 MOVOU 160(R8), X10 MOVOU 176(R8), X11 MOVOU 192(R8), X12 MOVOU 208(R8), X13 MOVOU 224(R8), X14 MOVOU 240(R8), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ DI, $0x00000100 LEAQ 256(R8), R8 LEAQ 256(AX), AX JGE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_move_256through2048 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_tail emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_avxUnaligned: LEAQ (R8)(DI*1), R10 MOVQ AX, R12 MOVOU -128(R10), X5 MOVOU -112(R10), X6 MOVQ $0x00000080, R9 ANDQ $0xffffffe0, AX ADDQ $0x20, AX MOVOU -96(R10), X7 MOVOU -80(R10), X8 MOVQ AX, R11 SUBQ R12, R11 MOVOU -64(R10), X9 MOVOU -48(R10), X10 SUBQ R11, DI MOVOU -32(R10), X11 MOVOU -16(R10), X12 VMOVDQU (R8), Y4 ADDQ R11, R8 SUBQ R9, DI emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_gobble_128_loop: VMOVDQU (R8), Y0 VMOVDQU 32(R8), Y1 VMOVDQU 64(R8), Y2 VMOVDQU 96(R8), Y3 ADDQ R9, R8 VMOVDQA Y0, (AX) VMOVDQA Y1, 32(AX) VMOVDQA Y2, 64(AX) VMOVDQA Y3, 96(AX) ADDQ R9, AX SUBQ R9, DI JA emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10BAvx_memmove_gobble_128_loop ADDQ R9, DI ADDQ AX, DI VMOVDQU Y4, (R12) VZEROUPPER MOVOU X5, -128(DI) MOVOU X6, -112(DI) MOVOU X7, -96(DI) MOVOU X8, -80(DI) MOVOU X9, -64(DI) MOVOU X10, -48(DI) MOVOU X11, -32(DI) MOVOU X12, -16(DI) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10BAvx: MOVQ BP, AX emit_literal_done_repeat_emit_encodeSnappyBlockAsm10BAvx: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 LEAQ (DX)(BP*1), BP XORL R10, R10 CMPL DI, $0x08 JL matchlen_single_repeat_extend matchlen_loopback_repeat_extend: MOVQ (R8)(R10*1), R9 XORQ (BP)(R10*1), R9 TESTQ R9, R9 JZ matchlen_loop_repeat_extend BSFQ R9, R9 SARQ $0x03, R9 LEAL (R10)(R9*1), R10 JMP repeat_extend_forward_end_encodeSnappyBlockAsm10BAvx matchlen_loop_repeat_extend: LEAL -8(DI), DI LEAL 8(R10), R10 CMPL DI, $0x08 JGE matchlen_loopback_repeat_extend matchlen_single_repeat_extend: TESTL DI, DI JZ repeat_extend_forward_end_encodeSnappyBlockAsm10BAvx matchlen_single_loopback_repeat_extend: MOVB (R8)(R10*1), R9 CMPB (BP)(R10*1), R9 JNE repeat_extend_forward_end_encodeSnappyBlockAsm10BAvx LEAL 1(R10), R10 DECL DI JNZ matchlen_single_loopback_repeat_extend repeat_extend_forward_end_encodeSnappyBlockAsm10BAvx: ADDL R10, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI CMPL SI, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10BAvx four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm10BAvx: CMPL BP, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm10BAvx MOVB $0xff, (AX) MOVL SI, 1(AX) LEAL -64(BP), BP ADDQ $0x05, AX CMPL BP, $0x04 JL four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm10BAvx JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm10BAvx four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm10BAvx: TESTL BP, BP JZ repeat_end_emit_encodeSnappyBlockAsm10BAvx MOVB $0x03, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVL SI, 1(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeSnappyBlockAsm10BAvx two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10BAvx: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10BAvx MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10BAvx two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10BAvx: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10BAvx CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10BAvx MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeSnappyBlockAsm10BAvx emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10BAvx: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeSnappyBlockAsm10BAvx: MOVL CX, 12(SP) CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm10BAvx JMP search_loop_encodeSnappyBlockAsm10BAvx no_repeat_found_encodeSnappyBlockAsm10BAvx: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeSnappyBlockAsm10BAvx SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeSnappyBlockAsm10BAvx MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeSnappyBlockAsm10BAvx MOVL 20(SP), CX JMP search_loop_encodeSnappyBlockAsm10BAvx candidate3_match_encodeSnappyBlockAsm10BAvx: ADDL $0x02, CX JMP candidate_match_encodeSnappyBlockAsm10BAvx candidate2_match_encodeSnappyBlockAsm10BAvx: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeSnappyBlockAsm10BAvx: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeSnappyBlockAsm10BAvx match_extend_back_loop_encodeSnappyBlockAsm10BAvx: CMPL CX, SI JLE match_extend_back_end_encodeSnappyBlockAsm10BAvx MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeSnappyBlockAsm10BAvx LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeSnappyBlockAsm10BAvx JMP match_extend_back_loop_encodeSnappyBlockAsm10BAvx match_extend_back_end_encodeSnappyBlockAsm10BAvx: MOVL CX, SI SUBL 12(SP), SI LEAQ 4(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeSnappyBlockAsm10BAvx MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeSnappyBlockAsm10BAvx: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm10BAvx MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 MOVL R8, DI SUBL $0x01, DI JC emit_literal_done_match_emit_encodeSnappyBlockAsm10BAvx CMPL DI, $0x3c JLT one_byte_match_emit_encodeSnappyBlockAsm10BAvx CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeSnappyBlockAsm10BAvx CMPL DI, $0x00010000 JLT three_bytes_match_emit_encodeSnappyBlockAsm10BAvx CMPL DI, $0x01000000 JLT four_bytes_match_emit_encodeSnappyBlockAsm10BAvx MOVB $0xfc, (AX) MOVL DI, 1(AX) ADDQ $0x05, AX JMP memmove_match_emit_encodeSnappyBlockAsm10BAvx four_bytes_match_emit_encodeSnappyBlockAsm10BAvx: MOVL DI, R9 SHRL $0x10, R9 MOVB $0xf8, (AX) MOVW DI, 1(AX) MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_match_emit_encodeSnappyBlockAsm10BAvx three_bytes_match_emit_encodeSnappyBlockAsm10BAvx: MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_match_emit_encodeSnappyBlockAsm10BAvx two_bytes_match_emit_encodeSnappyBlockAsm10BAvx: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX JMP memmove_match_emit_encodeSnappyBlockAsm10BAvx one_byte_match_emit_encodeSnappyBlockAsm10BAvx: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeSnappyBlockAsm10BAvx: LEAQ (AX)(R8*1), DI NOP emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_tail: TESTQ R8, R8 JEQ memmove_end_copy_match_emit_encodeSnappyBlockAsm10BAvx CMPQ R8, $0x02 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_1or2 CMPQ R8, $0x04 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_3 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_4 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_5through7 JE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_9through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_17through32 CMPQ R8, $0x40 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_33through64 CMPQ R8, $0x80 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_65through128 CMPQ R8, $0x00000100 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_129through256 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_avxUnaligned emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), R10 MOVB R9, (AX) MOVB R10, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10BAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_4: MOVL (SI), R9 MOVL R9, (AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10BAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), R10 MOVW R9, (AX) MOVB R10, 2(AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10BAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_5through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), R10 MOVL R9, (AX) MOVL R10, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10BAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_8: MOVQ (SI), R9 MOVQ R9, (AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10BAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_9through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), R10 MOVQ R9, (AX) MOVQ R10, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10BAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10BAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10BAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_65through128: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10BAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_129through256: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU -128(SI)(R8*1), X8 MOVOU -112(SI)(R8*1), X9 MOVOU -96(SI)(R8*1), X10 MOVOU -80(SI)(R8*1), X11 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(R8*1) MOVOU X9, -112(AX)(R8*1) MOVOU X10, -96(AX)(R8*1) MOVOU X11, -80(AX)(R8*1) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10BAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_256through2048: LEAQ -256(R8), R8 MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU 128(SI), X8 MOVOU 144(SI), X9 MOVOU 160(SI), X10 MOVOU 176(SI), X11 MOVOU 192(SI), X12 MOVOU 208(SI), X13 MOVOU 224(SI), X14 MOVOU 240(SI), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ R8, $0x00000100 LEAQ 256(SI), SI LEAQ 256(AX), AX JGE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_move_256through2048 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_tail emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_avxUnaligned: LEAQ (SI)(R8*1), R10 MOVQ AX, R12 MOVOU -128(R10), X5 MOVOU -112(R10), X6 MOVQ $0x00000080, R9 ANDQ $0xffffffe0, AX ADDQ $0x20, AX MOVOU -96(R10), X7 MOVOU -80(R10), X8 MOVQ AX, R11 SUBQ R12, R11 MOVOU -64(R10), X9 MOVOU -48(R10), X10 SUBQ R11, R8 MOVOU -32(R10), X11 MOVOU -16(R10), X12 VMOVDQU (SI), Y4 ADDQ R11, SI SUBQ R9, R8 emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_gobble_128_loop: VMOVDQU (SI), Y0 VMOVDQU 32(SI), Y1 VMOVDQU 64(SI), Y2 VMOVDQU 96(SI), Y3 ADDQ R9, SI VMOVDQA Y0, (AX) VMOVDQA Y1, 32(AX) VMOVDQA Y2, 64(AX) VMOVDQA Y3, 96(AX) ADDQ R9, AX SUBQ R9, R8 JA emit_lit_memmove_match_emit_encodeSnappyBlockAsm10BAvx_memmove_gobble_128_loop ADDQ R9, R8 ADDQ AX, R8 VMOVDQU Y4, (R12) VZEROUPPER MOVOU X5, -128(R8) MOVOU X6, -112(R8) MOVOU X7, -96(R8) MOVOU X8, -80(R8) MOVOU X9, -64(R8) MOVOU X10, -48(R8) MOVOU X11, -32(R8) MOVOU X12, -16(R8) memmove_end_copy_match_emit_encodeSnappyBlockAsm10BAvx: MOVQ DI, AX emit_literal_done_match_emit_encodeSnappyBlockAsm10BAvx: match_nolit_loop_encodeSnappyBlockAsm10BAvx: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeSnappyBlockAsm10BAvx matchlen_loopback_match_nolit_encodeSnappyBlockAsm10BAvx: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm10BAvx BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeSnappyBlockAsm10BAvx matchlen_loop_match_nolit_encodeSnappyBlockAsm10BAvx: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm10BAvx matchlen_single_match_nolit_encodeSnappyBlockAsm10BAvx: TESTL SI, SI JZ match_nolit_end_encodeSnappyBlockAsm10BAvx matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm10BAvx: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeSnappyBlockAsm10BAvx LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm10BAvx match_nolit_end_encodeSnappyBlockAsm10BAvx: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 CMPL BP, $0x00010000 JL two_byte_offset_match_nolit_encodeSnappyBlockAsm10BAvx four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm10BAvx: CMPL R9, $0x40 JLE four_bytes_remain_match_nolit_encodeSnappyBlockAsm10BAvx MOVB $0xff, (AX) MOVL BP, 1(AX) LEAL -64(R9), R9 ADDQ $0x05, AX CMPL R9, $0x04 JL four_bytes_remain_match_nolit_encodeSnappyBlockAsm10BAvx JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm10BAvx four_bytes_remain_match_nolit_encodeSnappyBlockAsm10BAvx: TESTL R9, R9 JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm10BAvx MOVB $0x03, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm10BAvx two_byte_offset_match_nolit_encodeSnappyBlockAsm10BAvx: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10BAvx MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm10BAvx two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10BAvx: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm10BAvx CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm10BAvx MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm10BAvx emit_copy_three_match_nolit_encodeSnappyBlockAsm10BAvx: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBlockAsm10BAvx: MOVL CX, 12(SP) CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm10BAvx CMPQ AX, (SP) JL match_nolit_dst_ok_encodeSnappyBlockAsm10BAvx MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm10BAvx: MOVQ -2(DX)(CX*1), SI MOVQ $0x9e3779b1, BP MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, R8 SHLQ $0x20, DI IMULQ BP, DI SHRQ $0x36, DI SHLQ $0x20, R8 IMULQ BP, R8 SHRQ $0x36, R8 LEAL -2(CX), R9 MOVL 24(SP)(R8*4), BP MOVL R9, 24(SP)(DI*4) MOVL CX, 24(SP)(R8*4) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeSnappyBlockAsm10BAvx INCL CX JMP search_loop_encodeSnappyBlockAsm10BAvx emit_remainder_encodeSnappyBlockAsm10BAvx: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 4(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeSnappyBlockAsm10BAvx MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeSnappyBlockAsm10BAvx: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm10BAvx MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP MOVL BP, DX SUBL $0x01, DX JC emit_literal_done_emit_remainder_encodeSnappyBlockAsm10BAvx CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeSnappyBlockAsm10BAvx CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeSnappyBlockAsm10BAvx CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeSnappyBlockAsm10BAvx CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeSnappyBlockAsm10BAvx MOVB $0xfc, (AX) MOVL DX, 1(AX) ADDQ $0x05, AX JMP memmove_emit_remainder_encodeSnappyBlockAsm10BAvx four_bytes_emit_remainder_encodeSnappyBlockAsm10BAvx: MOVL DX, BX SHRL $0x10, BX MOVB $0xf8, (AX) MOVW DX, 1(AX) MOVB BL, 3(AX) ADDQ $0x04, AX JMP memmove_emit_remainder_encodeSnappyBlockAsm10BAvx three_bytes_emit_remainder_encodeSnappyBlockAsm10BAvx: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_emit_remainder_encodeSnappyBlockAsm10BAvx two_bytes_emit_remainder_encodeSnappyBlockAsm10BAvx: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX JMP memmove_emit_remainder_encodeSnappyBlockAsm10BAvx one_byte_emit_remainder_encodeSnappyBlockAsm10BAvx: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeSnappyBlockAsm10BAvx: LEAQ (AX)(BP*1), DX MOVL BP, BX NOP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_tail: TESTQ BX, BX JEQ memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10BAvx CMPQ BX, $0x02 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_1or2 CMPQ BX, $0x04 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_3 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_5through7 JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_9through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_17through32 CMPQ BX, $0x40 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_33through64 CMPQ BX, $0x80 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_65through128 CMPQ BX, $0x00000100 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_129through256 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_avxUnaligned emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), SI MOVB BP, (AX) MOVB SI, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10BAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_4: MOVL (CX), BP MOVL BP, (AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10BAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_3: MOVW (CX), BP MOVB 2(CX), SI MOVW BP, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10BAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_5through7: MOVL (CX), BP MOVL -4(CX)(BX*1), SI MOVL BP, (AX) MOVL SI, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10BAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_8: MOVQ (CX), BP MOVQ BP, (AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10BAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_9through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), SI MOVQ BP, (AX) MOVQ SI, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10BAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10BAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10BAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_65through128: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU -64(CX)(BX*1), X12 MOVOU -48(CX)(BX*1), X13 MOVOU -32(CX)(BX*1), X14 MOVOU -16(CX)(BX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(BX*1) MOVOU X13, -48(AX)(BX*1) MOVOU X14, -32(AX)(BX*1) MOVOU X15, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10BAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_129through256: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU -128(CX)(BX*1), X8 MOVOU -112(CX)(BX*1), X9 MOVOU -96(CX)(BX*1), X10 MOVOU -80(CX)(BX*1), X11 MOVOU -64(CX)(BX*1), X12 MOVOU -48(CX)(BX*1), X13 MOVOU -32(CX)(BX*1), X14 MOVOU -16(CX)(BX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(BX*1) MOVOU X9, -112(AX)(BX*1) MOVOU X10, -96(AX)(BX*1) MOVOU X11, -80(AX)(BX*1) MOVOU X12, -64(AX)(BX*1) MOVOU X13, -48(AX)(BX*1) MOVOU X14, -32(AX)(BX*1) MOVOU X15, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10BAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_256through2048: LEAQ -256(BX), BX MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU 128(CX), X8 MOVOU 144(CX), X9 MOVOU 160(CX), X10 MOVOU 176(CX), X11 MOVOU 192(CX), X12 MOVOU 208(CX), X13 MOVOU 224(CX), X14 MOVOU 240(CX), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ BX, $0x00000100 LEAQ 256(CX), CX LEAQ 256(AX), AX JGE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_move_256through2048 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_tail emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_avxUnaligned: LEAQ (CX)(BX*1), SI MOVQ AX, R8 MOVOU -128(SI), X5 MOVOU -112(SI), X6 MOVQ $0x00000080, BP ANDQ $0xffffffe0, AX ADDQ $0x20, AX MOVOU -96(SI), X7 MOVOU -80(SI), X8 MOVQ AX, DI SUBQ R8, DI MOVOU -64(SI), X9 MOVOU -48(SI), X10 SUBQ DI, BX MOVOU -32(SI), X11 MOVOU -16(SI), X12 VMOVDQU (CX), Y4 ADDQ DI, CX SUBQ BP, BX emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_gobble_128_loop: VMOVDQU (CX), Y0 VMOVDQU 32(CX), Y1 VMOVDQU 64(CX), Y2 VMOVDQU 96(CX), Y3 ADDQ BP, CX VMOVDQA Y0, (AX) VMOVDQA Y1, 32(AX) VMOVDQA Y2, 64(AX) VMOVDQA Y3, 96(AX) ADDQ BP, AX SUBQ BP, BX JA emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10BAvx_memmove_gobble_128_loop ADDQ BP, BX ADDQ AX, BX VMOVDQU Y4, (R8) VZEROUPPER MOVOU X5, -128(BX) MOVOU X6, -112(BX) MOVOU X7, -96(BX) MOVOU X8, -80(BX) MOVOU X9, -64(BX) MOVOU X10, -48(BX) MOVOU X11, -32(BX) MOVOU X12, -16(BX) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10BAvx: MOVQ DX, AX emit_literal_done_emit_remainder_encodeSnappyBlockAsm10BAvx: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeSnappyBlockAsm8BAvx(dst []byte, src []byte) int // Requires: AVX, SSE2 TEXT ·encodeSnappyBlockAsm8BAvx(SB), $1048-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000008, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm8BAvx: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeSnappyBlockAsm8BAvx MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeSnappyBlockAsm8BAvx: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x04, BP LEAL 4(CX)(BP*1), BP MOVL 8(SP), DI CMPL BP, DI JGT emit_remainder_encodeSnappyBlockAsm8BAvx MOVL BP, 20(SP) MOVQ $0x9e3779b1, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x20, R9 IMULQ R8, R9 SHRQ $0x38, R9 SHLQ $0x20, R10 IMULQ R8, R10 SHRQ $0x38, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x20, R9 IMULQ R8, R9 SHRQ $0x38, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeSnappyBlockAsm8BAvx LEAL 1(CX), SI MOVL 12(SP), BP MOVL SI, DI SUBL 16(SP), DI JZ repeat_extend_back_end_encodeSnappyBlockAsm8BAvx repeat_extend_back_loop_encodeSnappyBlockAsm8BAvx: CMPL SI, BP JLE repeat_extend_back_end_encodeSnappyBlockAsm8BAvx MOVB -1(DX)(DI*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeSnappyBlockAsm8BAvx LEAL -1(SI), SI DECL DI JNZ repeat_extend_back_loop_encodeSnappyBlockAsm8BAvx repeat_extend_back_end_encodeSnappyBlockAsm8BAvx: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm8BAvx MOVL SI, DI MOVL SI, 12(SP) LEAQ (DX)(BP*1), R8 SUBL BP, DI MOVL DI, BP SUBL $0x01, BP JC emit_literal_done_repeat_emit_encodeSnappyBlockAsm8BAvx CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeSnappyBlockAsm8BAvx CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeSnappyBlockAsm8BAvx CMPL BP, $0x00010000 JLT three_bytes_repeat_emit_encodeSnappyBlockAsm8BAvx CMPL BP, $0x01000000 JLT four_bytes_repeat_emit_encodeSnappyBlockAsm8BAvx MOVB $0xfc, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP memmove_repeat_emit_encodeSnappyBlockAsm8BAvx four_bytes_repeat_emit_encodeSnappyBlockAsm8BAvx: MOVL BP, R9 SHRL $0x10, R9 MOVB $0xf8, (AX) MOVW BP, 1(AX) MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_repeat_emit_encodeSnappyBlockAsm8BAvx three_bytes_repeat_emit_encodeSnappyBlockAsm8BAvx: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_repeat_emit_encodeSnappyBlockAsm8BAvx two_bytes_repeat_emit_encodeSnappyBlockAsm8BAvx: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX JMP memmove_repeat_emit_encodeSnappyBlockAsm8BAvx one_byte_repeat_emit_encodeSnappyBlockAsm8BAvx: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeSnappyBlockAsm8BAvx: LEAQ (AX)(DI*1), BP NOP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_tail: TESTQ DI, DI JEQ memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8BAvx CMPQ DI, $0x02 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_1or2 CMPQ DI, $0x04 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_3 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_4 CMPQ DI, $0x08 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_5through7 JE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_8 CMPQ DI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_9through16 CMPQ DI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_17through32 CMPQ DI, $0x40 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_33through64 CMPQ DI, $0x80 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_65through128 CMPQ DI, $0x00000100 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_129through256 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_avxUnaligned emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_1or2: MOVB (R8), R9 MOVB -1(R8)(DI*1), R10 MOVB R9, (AX) MOVB R10, -1(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8BAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_4: MOVL (R8), R9 MOVL R9, (AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8BAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_3: MOVW (R8), R9 MOVB 2(R8), R10 MOVW R9, (AX) MOVB R10, 2(AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8BAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_5through7: MOVL (R8), R9 MOVL -4(R8)(DI*1), R10 MOVL R9, (AX) MOVL R10, -4(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8BAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_8: MOVQ (R8), R9 MOVQ R9, (AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8BAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_9through16: MOVQ (R8), R9 MOVQ -8(R8)(DI*1), R10 MOVQ R9, (AX) MOVQ R10, -8(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8BAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_17through32: MOVOU (R8), X0 MOVOU -16(R8)(DI*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8BAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_33through64: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU -32(R8)(DI*1), X2 MOVOU -16(R8)(DI*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DI*1) MOVOU X3, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8BAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_65through128: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU -64(R8)(DI*1), X12 MOVOU -48(R8)(DI*1), X13 MOVOU -32(R8)(DI*1), X14 MOVOU -16(R8)(DI*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(DI*1) MOVOU X13, -48(AX)(DI*1) MOVOU X14, -32(AX)(DI*1) MOVOU X15, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8BAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_129through256: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU 64(R8), X4 MOVOU 80(R8), X5 MOVOU 96(R8), X6 MOVOU 112(R8), X7 MOVOU -128(R8)(DI*1), X8 MOVOU -112(R8)(DI*1), X9 MOVOU -96(R8)(DI*1), X10 MOVOU -80(R8)(DI*1), X11 MOVOU -64(R8)(DI*1), X12 MOVOU -48(R8)(DI*1), X13 MOVOU -32(R8)(DI*1), X14 MOVOU -16(R8)(DI*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(DI*1) MOVOU X9, -112(AX)(DI*1) MOVOU X10, -96(AX)(DI*1) MOVOU X11, -80(AX)(DI*1) MOVOU X12, -64(AX)(DI*1) MOVOU X13, -48(AX)(DI*1) MOVOU X14, -32(AX)(DI*1) MOVOU X15, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8BAvx emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_256through2048: LEAQ -256(DI), DI MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU 64(R8), X4 MOVOU 80(R8), X5 MOVOU 96(R8), X6 MOVOU 112(R8), X7 MOVOU 128(R8), X8 MOVOU 144(R8), X9 MOVOU 160(R8), X10 MOVOU 176(R8), X11 MOVOU 192(R8), X12 MOVOU 208(R8), X13 MOVOU 224(R8), X14 MOVOU 240(R8), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ DI, $0x00000100 LEAQ 256(R8), R8 LEAQ 256(AX), AX JGE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_move_256through2048 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_tail emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_avxUnaligned: LEAQ (R8)(DI*1), R10 MOVQ AX, R12 MOVOU -128(R10), X5 MOVOU -112(R10), X6 MOVQ $0x00000080, R9 ANDQ $0xffffffe0, AX ADDQ $0x20, AX MOVOU -96(R10), X7 MOVOU -80(R10), X8 MOVQ AX, R11 SUBQ R12, R11 MOVOU -64(R10), X9 MOVOU -48(R10), X10 SUBQ R11, DI MOVOU -32(R10), X11 MOVOU -16(R10), X12 VMOVDQU (R8), Y4 ADDQ R11, R8 SUBQ R9, DI emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_gobble_128_loop: VMOVDQU (R8), Y0 VMOVDQU 32(R8), Y1 VMOVDQU 64(R8), Y2 VMOVDQU 96(R8), Y3 ADDQ R9, R8 VMOVDQA Y0, (AX) VMOVDQA Y1, 32(AX) VMOVDQA Y2, 64(AX) VMOVDQA Y3, 96(AX) ADDQ R9, AX SUBQ R9, DI JA emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8BAvx_memmove_gobble_128_loop ADDQ R9, DI ADDQ AX, DI VMOVDQU Y4, (R12) VZEROUPPER MOVOU X5, -128(DI) MOVOU X6, -112(DI) MOVOU X7, -96(DI) MOVOU X8, -80(DI) MOVOU X9, -64(DI) MOVOU X10, -48(DI) MOVOU X11, -32(DI) MOVOU X12, -16(DI) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8BAvx: MOVQ BP, AX emit_literal_done_repeat_emit_encodeSnappyBlockAsm8BAvx: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 LEAQ (DX)(BP*1), BP XORL R10, R10 CMPL DI, $0x08 JL matchlen_single_repeat_extend matchlen_loopback_repeat_extend: MOVQ (R8)(R10*1), R9 XORQ (BP)(R10*1), R9 TESTQ R9, R9 JZ matchlen_loop_repeat_extend BSFQ R9, R9 SARQ $0x03, R9 LEAL (R10)(R9*1), R10 JMP repeat_extend_forward_end_encodeSnappyBlockAsm8BAvx matchlen_loop_repeat_extend: LEAL -8(DI), DI LEAL 8(R10), R10 CMPL DI, $0x08 JGE matchlen_loopback_repeat_extend matchlen_single_repeat_extend: TESTL DI, DI JZ repeat_extend_forward_end_encodeSnappyBlockAsm8BAvx matchlen_single_loopback_repeat_extend: MOVB (R8)(R10*1), R9 CMPB (BP)(R10*1), R9 JNE repeat_extend_forward_end_encodeSnappyBlockAsm8BAvx LEAL 1(R10), R10 DECL DI JNZ matchlen_single_loopback_repeat_extend repeat_extend_forward_end_encodeSnappyBlockAsm8BAvx: ADDL R10, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI CMPL SI, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8BAvx four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm8BAvx: CMPL BP, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm8BAvx MOVB $0xff, (AX) MOVL SI, 1(AX) LEAL -64(BP), BP ADDQ $0x05, AX CMPL BP, $0x04 JL four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm8BAvx JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm8BAvx four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm8BAvx: TESTL BP, BP JZ repeat_end_emit_encodeSnappyBlockAsm8BAvx MOVB $0x03, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVL SI, 1(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeSnappyBlockAsm8BAvx two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8BAvx: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8BAvx MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8BAvx two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8BAvx: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8BAvx CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8BAvx MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeSnappyBlockAsm8BAvx emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8BAvx: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeSnappyBlockAsm8BAvx: MOVL CX, 12(SP) CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm8BAvx JMP search_loop_encodeSnappyBlockAsm8BAvx no_repeat_found_encodeSnappyBlockAsm8BAvx: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeSnappyBlockAsm8BAvx SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeSnappyBlockAsm8BAvx MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeSnappyBlockAsm8BAvx MOVL 20(SP), CX JMP search_loop_encodeSnappyBlockAsm8BAvx candidate3_match_encodeSnappyBlockAsm8BAvx: ADDL $0x02, CX JMP candidate_match_encodeSnappyBlockAsm8BAvx candidate2_match_encodeSnappyBlockAsm8BAvx: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeSnappyBlockAsm8BAvx: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeSnappyBlockAsm8BAvx match_extend_back_loop_encodeSnappyBlockAsm8BAvx: CMPL CX, SI JLE match_extend_back_end_encodeSnappyBlockAsm8BAvx MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeSnappyBlockAsm8BAvx LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeSnappyBlockAsm8BAvx JMP match_extend_back_loop_encodeSnappyBlockAsm8BAvx match_extend_back_end_encodeSnappyBlockAsm8BAvx: MOVL CX, SI SUBL 12(SP), SI LEAQ 4(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeSnappyBlockAsm8BAvx MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeSnappyBlockAsm8BAvx: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm8BAvx MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 MOVL R8, DI SUBL $0x01, DI JC emit_literal_done_match_emit_encodeSnappyBlockAsm8BAvx CMPL DI, $0x3c JLT one_byte_match_emit_encodeSnappyBlockAsm8BAvx CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeSnappyBlockAsm8BAvx CMPL DI, $0x00010000 JLT three_bytes_match_emit_encodeSnappyBlockAsm8BAvx CMPL DI, $0x01000000 JLT four_bytes_match_emit_encodeSnappyBlockAsm8BAvx MOVB $0xfc, (AX) MOVL DI, 1(AX) ADDQ $0x05, AX JMP memmove_match_emit_encodeSnappyBlockAsm8BAvx four_bytes_match_emit_encodeSnappyBlockAsm8BAvx: MOVL DI, R9 SHRL $0x10, R9 MOVB $0xf8, (AX) MOVW DI, 1(AX) MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_match_emit_encodeSnappyBlockAsm8BAvx three_bytes_match_emit_encodeSnappyBlockAsm8BAvx: MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_match_emit_encodeSnappyBlockAsm8BAvx two_bytes_match_emit_encodeSnappyBlockAsm8BAvx: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX JMP memmove_match_emit_encodeSnappyBlockAsm8BAvx one_byte_match_emit_encodeSnappyBlockAsm8BAvx: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeSnappyBlockAsm8BAvx: LEAQ (AX)(R8*1), DI NOP emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_tail: TESTQ R8, R8 JEQ memmove_end_copy_match_emit_encodeSnappyBlockAsm8BAvx CMPQ R8, $0x02 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_1or2 CMPQ R8, $0x04 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_3 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_4 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_5through7 JE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_9through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_17through32 CMPQ R8, $0x40 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_33through64 CMPQ R8, $0x80 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_65through128 CMPQ R8, $0x00000100 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_129through256 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_avxUnaligned emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), R10 MOVB R9, (AX) MOVB R10, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8BAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_4: MOVL (SI), R9 MOVL R9, (AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8BAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), R10 MOVW R9, (AX) MOVB R10, 2(AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8BAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_5through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), R10 MOVL R9, (AX) MOVL R10, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8BAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_8: MOVQ (SI), R9 MOVQ R9, (AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8BAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_9through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), R10 MOVQ R9, (AX) MOVQ R10, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8BAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8BAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8BAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_65through128: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8BAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_129through256: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU -128(SI)(R8*1), X8 MOVOU -112(SI)(R8*1), X9 MOVOU -96(SI)(R8*1), X10 MOVOU -80(SI)(R8*1), X11 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(R8*1) MOVOU X9, -112(AX)(R8*1) MOVOU X10, -96(AX)(R8*1) MOVOU X11, -80(AX)(R8*1) MOVOU X12, -64(AX)(R8*1) MOVOU X13, -48(AX)(R8*1) MOVOU X14, -32(AX)(R8*1) MOVOU X15, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8BAvx emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_256through2048: LEAQ -256(R8), R8 MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU 128(SI), X8 MOVOU 144(SI), X9 MOVOU 160(SI), X10 MOVOU 176(SI), X11 MOVOU 192(SI), X12 MOVOU 208(SI), X13 MOVOU 224(SI), X14 MOVOU 240(SI), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ R8, $0x00000100 LEAQ 256(SI), SI LEAQ 256(AX), AX JGE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_move_256through2048 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_tail emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_avxUnaligned: LEAQ (SI)(R8*1), R10 MOVQ AX, R12 MOVOU -128(R10), X5 MOVOU -112(R10), X6 MOVQ $0x00000080, R9 ANDQ $0xffffffe0, AX ADDQ $0x20, AX MOVOU -96(R10), X7 MOVOU -80(R10), X8 MOVQ AX, R11 SUBQ R12, R11 MOVOU -64(R10), X9 MOVOU -48(R10), X10 SUBQ R11, R8 MOVOU -32(R10), X11 MOVOU -16(R10), X12 VMOVDQU (SI), Y4 ADDQ R11, SI SUBQ R9, R8 emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_gobble_128_loop: VMOVDQU (SI), Y0 VMOVDQU 32(SI), Y1 VMOVDQU 64(SI), Y2 VMOVDQU 96(SI), Y3 ADDQ R9, SI VMOVDQA Y0, (AX) VMOVDQA Y1, 32(AX) VMOVDQA Y2, 64(AX) VMOVDQA Y3, 96(AX) ADDQ R9, AX SUBQ R9, R8 JA emit_lit_memmove_match_emit_encodeSnappyBlockAsm8BAvx_memmove_gobble_128_loop ADDQ R9, R8 ADDQ AX, R8 VMOVDQU Y4, (R12) VZEROUPPER MOVOU X5, -128(R8) MOVOU X6, -112(R8) MOVOU X7, -96(R8) MOVOU X8, -80(R8) MOVOU X9, -64(R8) MOVOU X10, -48(R8) MOVOU X11, -32(R8) MOVOU X12, -16(R8) memmove_end_copy_match_emit_encodeSnappyBlockAsm8BAvx: MOVQ DI, AX emit_literal_done_match_emit_encodeSnappyBlockAsm8BAvx: match_nolit_loop_encodeSnappyBlockAsm8BAvx: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeSnappyBlockAsm8BAvx matchlen_loopback_match_nolit_encodeSnappyBlockAsm8BAvx: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm8BAvx BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeSnappyBlockAsm8BAvx matchlen_loop_match_nolit_encodeSnappyBlockAsm8BAvx: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm8BAvx matchlen_single_match_nolit_encodeSnappyBlockAsm8BAvx: TESTL SI, SI JZ match_nolit_end_encodeSnappyBlockAsm8BAvx matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm8BAvx: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeSnappyBlockAsm8BAvx LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm8BAvx match_nolit_end_encodeSnappyBlockAsm8BAvx: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 CMPL BP, $0x00010000 JL two_byte_offset_match_nolit_encodeSnappyBlockAsm8BAvx four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm8BAvx: CMPL R9, $0x40 JLE four_bytes_remain_match_nolit_encodeSnappyBlockAsm8BAvx MOVB $0xff, (AX) MOVL BP, 1(AX) LEAL -64(R9), R9 ADDQ $0x05, AX CMPL R9, $0x04 JL four_bytes_remain_match_nolit_encodeSnappyBlockAsm8BAvx JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm8BAvx four_bytes_remain_match_nolit_encodeSnappyBlockAsm8BAvx: TESTL R9, R9 JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm8BAvx MOVB $0x03, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm8BAvx two_byte_offset_match_nolit_encodeSnappyBlockAsm8BAvx: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8BAvx MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm8BAvx two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8BAvx: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm8BAvx CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm8BAvx MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm8BAvx emit_copy_three_match_nolit_encodeSnappyBlockAsm8BAvx: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBlockAsm8BAvx: MOVL CX, 12(SP) CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm8BAvx CMPQ AX, (SP) JL match_nolit_dst_ok_encodeSnappyBlockAsm8BAvx MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm8BAvx: MOVQ -2(DX)(CX*1), SI MOVQ $0x9e3779b1, BP MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, R8 SHLQ $0x20, DI IMULQ BP, DI SHRQ $0x38, DI SHLQ $0x20, R8 IMULQ BP, R8 SHRQ $0x38, R8 LEAL -2(CX), R9 MOVL 24(SP)(R8*4), BP MOVL R9, 24(SP)(DI*4) MOVL CX, 24(SP)(R8*4) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeSnappyBlockAsm8BAvx INCL CX JMP search_loop_encodeSnappyBlockAsm8BAvx emit_remainder_encodeSnappyBlockAsm8BAvx: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 4(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeSnappyBlockAsm8BAvx MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeSnappyBlockAsm8BAvx: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm8BAvx MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP MOVL BP, DX SUBL $0x01, DX JC emit_literal_done_emit_remainder_encodeSnappyBlockAsm8BAvx CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeSnappyBlockAsm8BAvx CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeSnappyBlockAsm8BAvx CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeSnappyBlockAsm8BAvx CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeSnappyBlockAsm8BAvx MOVB $0xfc, (AX) MOVL DX, 1(AX) ADDQ $0x05, AX JMP memmove_emit_remainder_encodeSnappyBlockAsm8BAvx four_bytes_emit_remainder_encodeSnappyBlockAsm8BAvx: MOVL DX, BX SHRL $0x10, BX MOVB $0xf8, (AX) MOVW DX, 1(AX) MOVB BL, 3(AX) ADDQ $0x04, AX JMP memmove_emit_remainder_encodeSnappyBlockAsm8BAvx three_bytes_emit_remainder_encodeSnappyBlockAsm8BAvx: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_emit_remainder_encodeSnappyBlockAsm8BAvx two_bytes_emit_remainder_encodeSnappyBlockAsm8BAvx: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX JMP memmove_emit_remainder_encodeSnappyBlockAsm8BAvx one_byte_emit_remainder_encodeSnappyBlockAsm8BAvx: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeSnappyBlockAsm8BAvx: LEAQ (AX)(BP*1), DX MOVL BP, BX NOP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_tail: TESTQ BX, BX JEQ memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8BAvx CMPQ BX, $0x02 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_1or2 CMPQ BX, $0x04 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_3 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_5through7 JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_9through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_17through32 CMPQ BX, $0x40 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_33through64 CMPQ BX, $0x80 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_65through128 CMPQ BX, $0x00000100 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_129through256 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_avxUnaligned emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), SI MOVB BP, (AX) MOVB SI, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8BAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_4: MOVL (CX), BP MOVL BP, (AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8BAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_3: MOVW (CX), BP MOVB 2(CX), SI MOVW BP, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8BAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_5through7: MOVL (CX), BP MOVL -4(CX)(BX*1), SI MOVL BP, (AX) MOVL SI, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8BAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_8: MOVQ (CX), BP MOVQ BP, (AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8BAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_9through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), SI MOVQ BP, (AX) MOVQ SI, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8BAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8BAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8BAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_65through128: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU -64(CX)(BX*1), X12 MOVOU -48(CX)(BX*1), X13 MOVOU -32(CX)(BX*1), X14 MOVOU -16(CX)(BX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(BX*1) MOVOU X13, -48(AX)(BX*1) MOVOU X14, -32(AX)(BX*1) MOVOU X15, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8BAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_129through256: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU -128(CX)(BX*1), X8 MOVOU -112(CX)(BX*1), X9 MOVOU -96(CX)(BX*1), X10 MOVOU -80(CX)(BX*1), X11 MOVOU -64(CX)(BX*1), X12 MOVOU -48(CX)(BX*1), X13 MOVOU -32(CX)(BX*1), X14 MOVOU -16(CX)(BX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(BX*1) MOVOU X9, -112(AX)(BX*1) MOVOU X10, -96(AX)(BX*1) MOVOU X11, -80(AX)(BX*1) MOVOU X12, -64(AX)(BX*1) MOVOU X13, -48(AX)(BX*1) MOVOU X14, -32(AX)(BX*1) MOVOU X15, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8BAvx emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_256through2048: LEAQ -256(BX), BX MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU 128(CX), X8 MOVOU 144(CX), X9 MOVOU 160(CX), X10 MOVOU 176(CX), X11 MOVOU 192(CX), X12 MOVOU 208(CX), X13 MOVOU 224(CX), X14 MOVOU 240(CX), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ BX, $0x00000100 LEAQ 256(CX), CX LEAQ 256(AX), AX JGE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_move_256through2048 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_tail emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_avxUnaligned: LEAQ (CX)(BX*1), SI MOVQ AX, R8 MOVOU -128(SI), X5 MOVOU -112(SI), X6 MOVQ $0x00000080, BP ANDQ $0xffffffe0, AX ADDQ $0x20, AX MOVOU -96(SI), X7 MOVOU -80(SI), X8 MOVQ AX, DI SUBQ R8, DI MOVOU -64(SI), X9 MOVOU -48(SI), X10 SUBQ DI, BX MOVOU -32(SI), X11 MOVOU -16(SI), X12 VMOVDQU (CX), Y4 ADDQ DI, CX SUBQ BP, BX emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_gobble_128_loop: VMOVDQU (CX), Y0 VMOVDQU 32(CX), Y1 VMOVDQU 64(CX), Y2 VMOVDQU 96(CX), Y3 ADDQ BP, CX VMOVDQA Y0, (AX) VMOVDQA Y1, 32(AX) VMOVDQA Y2, 64(AX) VMOVDQA Y3, 96(AX) ADDQ BP, AX SUBQ BP, BX JA emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8BAvx_memmove_gobble_128_loop ADDQ BP, BX ADDQ AX, BX VMOVDQU Y4, (R8) VZEROUPPER MOVOU X5, -128(BX) MOVOU X6, -112(BX) MOVOU X7, -96(BX) MOVOU X8, -80(BX) MOVOU X9, -64(BX) MOVOU X10, -48(BX) MOVOU X11, -32(BX) MOVOU X12, -16(BX) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8BAvx: MOVQ DX, AX emit_literal_done_emit_remainder_encodeSnappyBlockAsm8BAvx: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func emitLiteral(dst []byte, lit []byte) int // Requires: SSE2 TEXT ·emitLiteral(SB), NOSPLIT, $0-56 MOVQ dst_base+0(FP), AX MOVQ lit_base+24(FP), CX MOVQ lit_len+32(FP), DX MOVL DX, BX MOVL DX, BP SUBL $0x01, BP JC emit_literal_end_standalone CMPL BP, $0x3c JLT one_byte_standalone CMPL BP, $0x00000100 JLT two_bytes_standalone CMPL BP, $0x00010000 JLT three_bytes_standalone CMPL BP, $0x01000000 JLT four_bytes_standalone MOVB $0xfc, (AX) MOVL BP, 1(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP memmove_standalone four_bytes_standalone: MOVL BP, SI SHRL $0x10, SI MOVB $0xf8, (AX) MOVW BP, 1(AX) MOVB SI, 3(AX) ADDQ $0x04, BX ADDQ $0x04, AX JMP memmove_standalone three_bytes_standalone: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, BX ADDQ $0x03, AX JMP memmove_standalone two_bytes_standalone: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP memmove_standalone one_byte_standalone: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, BX ADDQ $0x01, AX memmove_standalone: NOP emit_lit_memmove_standalone_memmove_tail: TESTQ DX, DX JEQ emit_literal_end_standalone CMPQ DX, $0x02 JBE emit_lit_memmove_standalone_memmove_move_1or2 CMPQ DX, $0x04 JB emit_lit_memmove_standalone_memmove_move_3 JBE emit_lit_memmove_standalone_memmove_move_4 CMPQ DX, $0x08 JB emit_lit_memmove_standalone_memmove_move_5through7 JE emit_lit_memmove_standalone_memmove_move_8 CMPQ DX, $0x10 JBE emit_lit_memmove_standalone_memmove_move_9through16 CMPQ DX, $0x20 JBE emit_lit_memmove_standalone_memmove_move_17through32 CMPQ DX, $0x40 JBE emit_lit_memmove_standalone_memmove_move_33through64 CMPQ DX, $0x80 JBE emit_lit_memmove_standalone_memmove_move_65through128 CMPQ DX, $0x00000100 JBE emit_lit_memmove_standalone_memmove_move_129through256 JMP emit_lit_memmove_standalone_memmove_move_256through2048 emit_lit_memmove_standalone_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(DX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_4: MOVL (CX), BP MOVL BP, (AX) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_5through7: MOVL (CX), BP MOVL -4(CX)(DX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_8: MOVQ (CX), BP MOVQ BP, (AX) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_9through16: MOVQ (CX), BP MOVQ -8(CX)(DX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(DX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(DX*1), X2 MOVOU -16(CX)(DX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DX*1) MOVOU X3, -16(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_65through128: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU -64(CX)(DX*1), X12 MOVOU -48(CX)(DX*1), X13 MOVOU -32(CX)(DX*1), X14 MOVOU -16(CX)(DX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(DX*1) MOVOU X13, -48(AX)(DX*1) MOVOU X14, -32(AX)(DX*1) MOVOU X15, -16(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_129through256: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU -128(CX)(DX*1), X8 MOVOU -112(CX)(DX*1), X9 MOVOU -96(CX)(DX*1), X10 MOVOU -80(CX)(DX*1), X11 MOVOU -64(CX)(DX*1), X12 MOVOU -48(CX)(DX*1), X13 MOVOU -32(CX)(DX*1), X14 MOVOU -16(CX)(DX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(DX*1) MOVOU X9, -112(AX)(DX*1) MOVOU X10, -96(AX)(DX*1) MOVOU X11, -80(AX)(DX*1) MOVOU X12, -64(AX)(DX*1) MOVOU X13, -48(AX)(DX*1) MOVOU X14, -32(AX)(DX*1) MOVOU X15, -16(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_256through2048: LEAQ -256(DX), DX MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU 128(CX), X8 MOVOU 144(CX), X9 MOVOU 160(CX), X10 MOVOU 176(CX), X11 MOVOU 192(CX), X12 MOVOU 208(CX), X13 MOVOU 224(CX), X14 MOVOU 240(CX), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ DX, $0x00000100 LEAQ 256(CX), CX LEAQ 256(AX), AX JGE emit_lit_memmove_standalone_memmove_move_256through2048 JMP emit_lit_memmove_standalone_memmove_tail emit_literal_end_standalone: MOVQ BX, ret+48(FP) RET // func emitLiteralAvx(dst []byte, lit []byte) int // Requires: AVX, SSE2 TEXT ·emitLiteralAvx(SB), NOSPLIT, $0-56 MOVQ dst_base+0(FP), AX MOVQ lit_base+24(FP), CX MOVQ lit_len+32(FP), DX MOVL DX, BX MOVL DX, BP SUBL $0x01, BP JC emit_literal_end_avx_standalone CMPL BP, $0x3c JLT one_byte_standalone CMPL BP, $0x00000100 JLT two_bytes_standalone CMPL BP, $0x00010000 JLT three_bytes_standalone CMPL BP, $0x01000000 JLT four_bytes_standalone MOVB $0xfc, (AX) MOVL BP, 1(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP memmove_standalone four_bytes_standalone: MOVL BP, SI SHRL $0x10, SI MOVB $0xf8, (AX) MOVW BP, 1(AX) MOVB SI, 3(AX) ADDQ $0x04, BX ADDQ $0x04, AX JMP memmove_standalone three_bytes_standalone: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, BX ADDQ $0x03, AX JMP memmove_standalone two_bytes_standalone: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP memmove_standalone one_byte_standalone: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, BX ADDQ $0x01, AX memmove_standalone: NOP emit_lit_memmove_standalone_memmove_tail: TESTQ DX, DX JEQ emit_literal_end_avx_standalone CMPQ DX, $0x02 JBE emit_lit_memmove_standalone_memmove_move_1or2 CMPQ DX, $0x04 JB emit_lit_memmove_standalone_memmove_move_3 JBE emit_lit_memmove_standalone_memmove_move_4 CMPQ DX, $0x08 JB emit_lit_memmove_standalone_memmove_move_5through7 JE emit_lit_memmove_standalone_memmove_move_8 CMPQ DX, $0x10 JBE emit_lit_memmove_standalone_memmove_move_9through16 CMPQ DX, $0x20 JBE emit_lit_memmove_standalone_memmove_move_17through32 CMPQ DX, $0x40 JBE emit_lit_memmove_standalone_memmove_move_33through64 CMPQ DX, $0x80 JBE emit_lit_memmove_standalone_memmove_move_65through128 CMPQ DX, $0x00000100 JBE emit_lit_memmove_standalone_memmove_move_129through256 JMP emit_lit_memmove_standalone_memmove_avxUnaligned emit_lit_memmove_standalone_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(DX*1), SI MOVB BP, (AX) MOVB SI, -1(AX)(DX*1) JMP emit_literal_end_avx_standalone emit_lit_memmove_standalone_memmove_move_4: MOVL (CX), BP MOVL BP, (AX) JMP emit_literal_end_avx_standalone emit_lit_memmove_standalone_memmove_move_3: MOVW (CX), BP MOVB 2(CX), SI MOVW BP, (AX) MOVB SI, 2(AX) JMP emit_literal_end_avx_standalone emit_lit_memmove_standalone_memmove_move_5through7: MOVL (CX), BP MOVL -4(CX)(DX*1), SI MOVL BP, (AX) MOVL SI, -4(AX)(DX*1) JMP emit_literal_end_avx_standalone emit_lit_memmove_standalone_memmove_move_8: MOVQ (CX), BP MOVQ BP, (AX) JMP emit_literal_end_avx_standalone emit_lit_memmove_standalone_memmove_move_9through16: MOVQ (CX), BP MOVQ -8(CX)(DX*1), SI MOVQ BP, (AX) MOVQ SI, -8(AX)(DX*1) JMP emit_literal_end_avx_standalone emit_lit_memmove_standalone_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(DX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(DX*1) JMP emit_literal_end_avx_standalone emit_lit_memmove_standalone_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(DX*1), X2 MOVOU -16(CX)(DX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DX*1) MOVOU X3, -16(AX)(DX*1) JMP emit_literal_end_avx_standalone emit_lit_memmove_standalone_memmove_move_65through128: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU -64(CX)(DX*1), X12 MOVOU -48(CX)(DX*1), X13 MOVOU -32(CX)(DX*1), X14 MOVOU -16(CX)(DX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(DX*1) MOVOU X13, -48(AX)(DX*1) MOVOU X14, -32(AX)(DX*1) MOVOU X15, -16(AX)(DX*1) JMP emit_literal_end_avx_standalone emit_lit_memmove_standalone_memmove_move_129through256: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU -128(CX)(DX*1), X8 MOVOU -112(CX)(DX*1), X9 MOVOU -96(CX)(DX*1), X10 MOVOU -80(CX)(DX*1), X11 MOVOU -64(CX)(DX*1), X12 MOVOU -48(CX)(DX*1), X13 MOVOU -32(CX)(DX*1), X14 MOVOU -16(CX)(DX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(DX*1) MOVOU X9, -112(AX)(DX*1) MOVOU X10, -96(AX)(DX*1) MOVOU X11, -80(AX)(DX*1) MOVOU X12, -64(AX)(DX*1) MOVOU X13, -48(AX)(DX*1) MOVOU X14, -32(AX)(DX*1) MOVOU X15, -16(AX)(DX*1) JMP emit_literal_end_avx_standalone emit_lit_memmove_standalone_memmove_move_256through2048: LEAQ -256(DX), DX MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU 128(CX), X8 MOVOU 144(CX), X9 MOVOU 160(CX), X10 MOVOU 176(CX), X11 MOVOU 192(CX), X12 MOVOU 208(CX), X13 MOVOU 224(CX), X14 MOVOU 240(CX), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ DX, $0x00000100 LEAQ 256(CX), CX LEAQ 256(AX), AX JGE emit_lit_memmove_standalone_memmove_move_256through2048 JMP emit_lit_memmove_standalone_memmove_tail emit_lit_memmove_standalone_memmove_avxUnaligned: LEAQ (CX)(DX*1), SI MOVQ AX, R8 MOVOU -128(SI), X5 MOVOU -112(SI), X6 MOVQ $0x00000080, BP ANDQ $0xffffffe0, AX ADDQ $0x20, AX MOVOU -96(SI), X7 MOVOU -80(SI), X8 MOVQ AX, DI SUBQ R8, DI MOVOU -64(SI), X9 MOVOU -48(SI), X10 SUBQ DI, DX MOVOU -32(SI), X11 MOVOU -16(SI), X12 VMOVDQU (CX), Y4 ADDQ DI, CX SUBQ BP, DX emit_lit_memmove_standalone_memmove_gobble_128_loop: VMOVDQU (CX), Y0 VMOVDQU 32(CX), Y1 VMOVDQU 64(CX), Y2 VMOVDQU 96(CX), Y3 ADDQ BP, CX VMOVDQA Y0, (AX) VMOVDQA Y1, 32(AX) VMOVDQA Y2, 64(AX) VMOVDQA Y3, 96(AX) ADDQ BP, AX SUBQ BP, DX JA emit_lit_memmove_standalone_memmove_gobble_128_loop ADDQ BP, DX ADDQ AX, DX VMOVDQU Y4, (R8) VZEROUPPER MOVOU X5, -128(DX) MOVOU X6, -112(DX) MOVOU X7, -96(DX) MOVOU X8, -80(DX) MOVOU X9, -64(DX) MOVOU X10, -48(DX) MOVOU X11, -32(DX) MOVOU X12, -16(DX) emit_literal_end_avx_standalone: MOVQ BX, ret+48(FP) RET // func emitRepeat(dst []byte, offset int, length int) int TEXT ·emitRepeat(SB), NOSPLIT, $0-48 XORQ BX, BX MOVQ dst_base+0(FP), AX MOVQ offset+24(FP), CX MOVQ length+32(FP), DX emit_repeat_again_standalone: MOVL DX, BP LEAL -4(DX), DX CMPL BP, $0x08 JLE repeat_two_standalone CMPL BP, $0x0c JGE cant_repeat_two_offset_standalone CMPL CX, $0x00000800 JLT repeat_two_offset_standalone cant_repeat_two_offset_standalone: CMPL DX, $0x00000104 JLT repeat_three_standalone CMPL DX, $0x00010100 JLT repeat_four_standalone CMPL DX, $0x0100ffff JLT repeat_five_standalone LEAL -16842747(DX), DX MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX ADDQ $0x05, BX JMP emit_repeat_again_standalone repeat_five_standalone: LEAL -65536(DX), DX MOVL DX, CX MOVW $0x001d, (AX) MOVW DX, 2(AX) SARL $0x10, CX MOVB CL, 4(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP gen_emit_repeat_end repeat_four_standalone: LEAL -256(DX), DX MOVW $0x0019, (AX) MOVW DX, 2(AX) ADDQ $0x04, BX ADDQ $0x04, AX JMP gen_emit_repeat_end repeat_three_standalone: LEAL -4(DX), DX MOVW $0x0015, (AX) MOVB DL, 2(AX) ADDQ $0x03, BX ADDQ $0x03, AX JMP gen_emit_repeat_end repeat_two_standalone: SHLL $0x02, DX ORL $0x01, DX MOVW DX, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_repeat_end repeat_two_offset_standalone: XORQ BP, BP LEAL 1(BP)(DX*4), DX MOVB CL, 1(AX) SARL $0x08, CX SHLL $0x05, CX ORL CX, DX MOVB DL, (AX) ADDQ $0x02, BX ADDQ $0x02, AX gen_emit_repeat_end: MOVQ BX, ret+40(FP) RET // func emitCopy(dst []byte, offset int, length int) int TEXT ·emitCopy(SB), NOSPLIT, $0-48 XORQ BX, BX MOVQ dst_base+0(FP), AX MOVQ offset+24(FP), CX MOVQ length+32(FP), DX CMPL CX, $0x00010000 JL two_byte_offset_standalone four_bytes_loop_back_standalone: CMPL DX, $0x40 JLE four_bytes_remain_standalone MOVB $0xff, (AX) MOVL CX, 1(AX) LEAL -64(DX), DX ADDQ $0x05, BX ADDQ $0x05, AX CMPL DX, $0x04 JL four_bytes_remain_standalone emit_repeat_again_standalone_emit_copy: MOVL DX, BP LEAL -4(DX), DX CMPL BP, $0x08 JLE repeat_two_standalone_emit_copy CMPL BP, $0x0c JGE cant_repeat_two_offset_standalone_emit_copy CMPL CX, $0x00000800 JLT repeat_two_offset_standalone_emit_copy cant_repeat_two_offset_standalone_emit_copy: CMPL DX, $0x00000104 JLT repeat_three_standalone_emit_copy CMPL DX, $0x00010100 JLT repeat_four_standalone_emit_copy CMPL DX, $0x0100ffff JLT repeat_five_standalone_emit_copy LEAL -16842747(DX), DX MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX ADDQ $0x05, BX JMP emit_repeat_again_standalone_emit_copy repeat_five_standalone_emit_copy: LEAL -65536(DX), DX MOVL DX, CX MOVW $0x001d, (AX) MOVW DX, 2(AX) SARL $0x10, CX MOVB CL, 4(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP gen_emit_copy_end repeat_four_standalone_emit_copy: LEAL -256(DX), DX MOVW $0x0019, (AX) MOVW DX, 2(AX) ADDQ $0x04, BX ADDQ $0x04, AX JMP gen_emit_copy_end repeat_three_standalone_emit_copy: LEAL -4(DX), DX MOVW $0x0015, (AX) MOVB DL, 2(AX) ADDQ $0x03, BX ADDQ $0x03, AX JMP gen_emit_copy_end repeat_two_standalone_emit_copy: SHLL $0x02, DX ORL $0x01, DX MOVW DX, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end repeat_two_offset_standalone_emit_copy: XORQ BP, BP LEAL 1(BP)(DX*4), DX MOVB CL, 1(AX) SARL $0x08, CX SHLL $0x05, CX ORL CX, DX MOVB DL, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end JMP four_bytes_loop_back_standalone four_bytes_remain_standalone: TESTL DX, DX JZ gen_emit_copy_end MOVB $0x03, BP LEAL -4(BP)(DX*4), DX MOVB DL, (AX) MOVL CX, 1(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP gen_emit_copy_end two_byte_offset_standalone: CMPL DX, $0x40 JLE two_byte_offset_short_standalone MOVB $0xee, (AX) MOVW CX, 1(AX) LEAL -60(DX), DX ADDQ $0x03, AX ADDQ $0x03, BX emit_repeat_again_standalone_emit_copy_short: MOVL DX, BP LEAL -4(DX), DX CMPL BP, $0x08 JLE repeat_two_standalone_emit_copy_short CMPL BP, $0x0c JGE cant_repeat_two_offset_standalone_emit_copy_short CMPL CX, $0x00000800 JLT repeat_two_offset_standalone_emit_copy_short cant_repeat_two_offset_standalone_emit_copy_short: CMPL DX, $0x00000104 JLT repeat_three_standalone_emit_copy_short CMPL DX, $0x00010100 JLT repeat_four_standalone_emit_copy_short CMPL DX, $0x0100ffff JLT repeat_five_standalone_emit_copy_short LEAL -16842747(DX), DX MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX ADDQ $0x05, BX JMP emit_repeat_again_standalone_emit_copy_short repeat_five_standalone_emit_copy_short: LEAL -65536(DX), DX MOVL DX, CX MOVW $0x001d, (AX) MOVW DX, 2(AX) SARL $0x10, CX MOVB CL, 4(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP gen_emit_copy_end repeat_four_standalone_emit_copy_short: LEAL -256(DX), DX MOVW $0x0019, (AX) MOVW DX, 2(AX) ADDQ $0x04, BX ADDQ $0x04, AX JMP gen_emit_copy_end repeat_three_standalone_emit_copy_short: LEAL -4(DX), DX MOVW $0x0015, (AX) MOVB DL, 2(AX) ADDQ $0x03, BX ADDQ $0x03, AX JMP gen_emit_copy_end repeat_two_standalone_emit_copy_short: SHLL $0x02, DX ORL $0x01, DX MOVW DX, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end repeat_two_offset_standalone_emit_copy_short: XORQ BP, BP LEAL 1(BP)(DX*4), DX MOVB CL, 1(AX) SARL $0x08, CX SHLL $0x05, CX ORL CX, DX MOVB DL, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end JMP two_byte_offset_standalone two_byte_offset_short_standalone: CMPL DX, $0x0c JGE emit_copy_three_standalone CMPL CX, $0x00000800 JGE emit_copy_three_standalone MOVB $0x01, BP LEAL -16(BP)(DX*4), DX MOVB CL, 1(AX) SHRL $0x08, CX SHLL $0x05, CX ORL CX, DX MOVB DL, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end emit_copy_three_standalone: MOVB $0x02, BP LEAL -4(BP)(DX*4), DX MOVB DL, (AX) MOVW CX, 1(AX) ADDQ $0x03, BX ADDQ $0x03, AX gen_emit_copy_end: MOVQ BX, ret+40(FP) RET // func emitCopyNoRepeat(dst []byte, offset int, length int) int TEXT ·emitCopyNoRepeat(SB), NOSPLIT, $0-48 XORQ BX, BX MOVQ dst_base+0(FP), AX MOVQ offset+24(FP), CX MOVQ length+32(FP), DX CMPL CX, $0x00010000 JL two_byte_offset_standalone_snappy four_bytes_loop_back_standalone_snappy: CMPL DX, $0x40 JLE four_bytes_remain_standalone_snappy MOVB $0xff, (AX) MOVL CX, 1(AX) LEAL -64(DX), DX ADDQ $0x05, BX ADDQ $0x05, AX CMPL DX, $0x04 JL four_bytes_remain_standalone_snappy JMP four_bytes_loop_back_standalone_snappy four_bytes_remain_standalone_snappy: TESTL DX, DX JZ gen_emit_copy_end_snappy MOVB $0x03, BP LEAL -4(BP)(DX*4), DX MOVB DL, (AX) MOVL CX, 1(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP gen_emit_copy_end_snappy two_byte_offset_standalone_snappy: CMPL DX, $0x40 JLE two_byte_offset_short_standalone_snappy MOVB $0xee, (AX) MOVW CX, 1(AX) LEAL -60(DX), DX ADDQ $0x03, AX ADDQ $0x03, BX JMP two_byte_offset_standalone_snappy two_byte_offset_short_standalone_snappy: CMPL DX, $0x0c JGE emit_copy_three_standalone_snappy CMPL CX, $0x00000800 JGE emit_copy_three_standalone_snappy MOVB $0x01, BP LEAL -16(BP)(DX*4), DX MOVB CL, 1(AX) SHRL $0x08, CX SHLL $0x05, CX ORL CX, DX MOVB DL, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end_snappy emit_copy_three_standalone_snappy: MOVB $0x02, BP LEAL -4(BP)(DX*4), DX MOVB DL, (AX) MOVW CX, 1(AX) ADDQ $0x03, BX ADDQ $0x03, AX gen_emit_copy_end_snappy: MOVQ BX, ret+40(FP) RET // func matchLen(a []byte, b []byte) int TEXT ·matchLen(SB), NOSPLIT, $0-56 MOVQ a_base+0(FP), AX MOVQ b_base+24(FP), CX MOVQ a_len+8(FP), DX XORL BP, BP CMPL DX, $0x08 JL matchlen_single_standalone matchlen_loopback_standalone: MOVQ (AX)(BP*1), BX XORQ (CX)(BP*1), BX TESTQ BX, BX JZ matchlen_loop_standalone BSFQ BX, BX SARQ $0x03, BX LEAL (BP)(BX*1), BP JMP gen_match_len_end matchlen_loop_standalone: LEAL -8(DX), DX LEAL 8(BP), BP CMPL DX, $0x08 JGE matchlen_loopback_standalone matchlen_single_standalone: TESTL DX, DX JZ gen_match_len_end matchlen_single_loopback_standalone: MOVB (AX)(BP*1), BL CMPB (CX)(BP*1), BL JNE gen_match_len_end LEAL 1(BP), BP DECL DX JNZ matchlen_single_loopback_standalone gen_match_len_end: MOVQ BP, ret+48(FP) RET