%!s(int64=5) %!d(string=hai) anos · 39768ce68f
--- a/internal/lz4block/decode_arm.s
+++ b/internal/lz4block/decode_arm.s
@@ -0,0 +1,166 @@
 
															+// +build gc
														
 
															+// +build !noasm
														
 
															+
														
 
															+#include "textflag.h"
														
 
															+
														
 
															+// Register allocation.
														
 
															+#define dst	R0
														
 
															+#define dstorig	R1
														
 
															+#define src	R2
														
 
															+#define dstend	R3
														
 
															+#define srcend	R4
														
 
															+#define match	R5	// Match address.
														
 
															+#define token	R6
														
 
															+#define len	R7	// Literal and match lengths.
														
 
															+#define offset	R5	// Match offset; overlaps with match.
														
 
															+#define tmp1	R8
														
 
															+#define tmp2	R9
														
 
															+#define tmp3	R12
														
 
															+
														
 
															+#define minMatch	$4
														
 
															+
														
 
															+// func decodeBlock(dst, src []byte) int
														
 
															+TEXT ·decodeBlock(SB), NOFRAME|NOSPLIT, $-4-28
														
 
															+	MOVW dst_ptr+0(FP),  dst
														
 
															+	MOVW dst_len+4(FP),  dstend
														
 
															+	MOVW src_ptr+12(FP), src
														
 
															+	MOVW src_len+16(FP), srcend
														
 
															+
														
 
															+	CMP $0, srcend
														
 
															+	BEQ shortSrc
														
 
															+
														
 
															+	ADD dst, dstend
														
 
															+	ADD src, srcend
														
 
															+
														
 
															+	MOVW dst, dstorig
														
 
															+
														
 
															+loop:
														
 
															+	CMP src, srcend
														
 
															+	BEQ end
														
 
															+
														
 
															+	// Read token. Extract literal length.
														
 
															+	MOVBU.P 1(src), token
														
 
															+	MOVW    token >> 4, len
														
 
															+	CMP     $15, len
														
 
															+	BNE     readLitlenDone
														
 
															+
														
 
															+readLitlenLoop:
														
 
															+	CMP     src, srcend
														
 
															+	BEQ     shortSrc
														
 
															+	MOVBU.P 1(src), tmp1
														
 
															+	ADD     tmp1, len
														
 
															+	CMP     $255, tmp1
														
 
															+	BEQ     readLitlenLoop
														
 
															+
														
 
															+readLitlenDone:
														
 
															+	CMP $0, len
														
 
															+	BEQ copyLiteralDone
														
 
															+
														
 
															+	// Bounds check dst+len and src+len.
														
 
															+	ADD dst, len, tmp1
														
 
															+	ADD src, len, tmp2
														
 
															+	CMP dstend, tmp1
														
 
															+	BHI shortDst
														
 
															+	CMP srcend, tmp2
														
 
															+	BHI shortSrc
														
 
															+
														
 
															+	// Copy literal.
														
 
															+	CMP $4, len
														
 
															+	BLO copyLiteralFinish
														
 
															+
														
 
															+	// Copy 0-3 bytes until src is aligned.
														
 
															+	TST        $1, src
														
 
															+	MOVBU.NE.P 1(src), tmp1
														
 
															+	MOVB.NE.P  tmp1, 1(dst)
														
 
															+	SUB.NE     $1, len
														
 
															+
														
 
															+	TST        $2, src
														
 
															+	MOVHU.NE.P 2(src), tmp2
														
 
															+	MOVH.NE.P  tmp2, 2(dst)
														
 
															+	SUB.NE     $2, len
														
 
															+
														
 
															+	CMP $4, len
														
 
															+	BLO copyLiteralFinish
														
 
															+
														
 
															+copyLiteralLoop:
														
 
															+	// Aligned load, unaligned write.
														
 
															+	SUB   $4, len
														
 
															+	MOVW.P 4(src), tmp1
														
 
															+	MOVW   tmp1 >>  8, tmp2
														
 
															+	MOVB   tmp2, 1(dst)
														
 
															+	MOVW   tmp1 >> 16, tmp3
														
 
															+	MOVB   tmp3, 2(dst)
														
 
															+	MOVW   tmp1 >> 24, tmp2
														
 
															+	MOVB   tmp2, 3(dst)
														
 
															+	MOVB.P tmp1, 4(dst)
														
 
															+	CMP    $4, len
														
 
															+	BHS    copyLiteralLoop
														
 
															+
														
 
															+copyLiteralFinish:
														
 
															+	// Copy remaining 0-3 bytes.
														
 
															+	TST        $2, len
														
 
															+	MOVHU.NE.P 2(src), tmp2
														
 
															+	MOVHU.NE.P tmp2, 2(dst)
														
 
															+	TST        $1, len
														
 
															+	MOVBU.NE.P 1(src), tmp1
														
 
															+	MOVBU.NE.P tmp1, 1(dst)
														
 
															+
														
 
															+copyLiteralDone:
														
 
															+	CMP src, srcend
														
 
															+	BEQ end
														
 
															+
														
 
															+	// Read offset.
														
 
															+	ADD   $2, src
														
 
															+	CMP   srcend, src
														
 
															+	BHI   shortSrc
														
 
															+	MOVHU -2(src), offset
														
 
															+	CMP   $0, offset
														
 
															+	BEQ   corrupt
														
 
															+
														
 
															+	// Read match length.
														
 
															+	AND $15, token, len
														
 
															+	CMP $15, len
														
 
															+	BNE readMatchlenDone
														
 
															+
														
 
															+readMatchlenLoop:
														
 
															+	CMP     src, srcend
														
 
															+	BEQ     shortSrc
														
 
															+	MOVBU.P 1(src), tmp1
														
 
															+	ADD     tmp1, len
														
 
															+	CMP     $255, tmp1
														
 
															+	BEQ     readMatchlenLoop
														
 
															+
														
 
															+readMatchlenDone:
														
 
															+	ADD minMatch, len
														
 
															+
														
 
															+	ADD dst, len, tmp1
														
 
															+	CMP dstend, tmp1
														
 
															+	BHI shortDst
														
 
															+
														
 
															+	SUB offset, dst, match
														
 
															+	CMP dstorig, match
														
 
															+	BLO corrupt
														
 
															+
														
 
															+copyMatch:
														
 
															+	// Simple byte-at-a-time copy.
														
 
															+	SUB.S   $1, len
														
 
															+	MOVBU.P 1(match), tmp2
														
 
															+	MOVB.P  tmp2, 1(dst)
														
 
															+	BNE     copyMatch
														
 
															+
														
 
															+	B loop
														
 
															+
														
 
															+	// The three error cases have distinct labels so we can put different
														
 
															+	// return codes here when debugging, or if the error returns need to
														
 
															+	// be changed.
														
 
															+shortDst:
														
 
															+shortSrc:
														
 
															+corrupt:
														
 
															+	MOVW $-1, tmp1
														
 
															+	MOVW tmp1, ret+24(FP)
														
 
															+	RET
														
 
															+
														
 
															+end:
														
 
															+	SUB  dstorig, dst, tmp1
														
 
															+	MOVW tmp1, ret+24(FP)
														
 
															+	RET
														
--- a/internal/lz4block/decode_amd64.go
+++ b/internal/lz4block/decode_amd64.go
@@ -1,3 +1,4 @@
 
															+// +build amd64 arm
														
 
															 // +build !appengine
														
 
															 // +build gc
														
 
															 // +build !noasm
														
--- a/internal/lz4block/decode_other.go
+++ b/internal/lz4block/decode_other.go
@@ -1,4 +1,4 @@
 
															-// +build !amd64 appengine !gc noasm
														
 
															+// +build !amd64,!arm appengine !gc noasm
														
 
															 package lz4block