Преглед на файлове

blake2s: use proper Go frame sizes

Currently blake2s's assembly routines claim they have a zero byte
frame and manually subtract upwards of 704 bytes from the stack
pointer without cooperating with Go's ABI. As a result, these
functions may not grow the stack when necessary, leading to memory
corruption.

Fix this by using the correct stack frame sizes so the generated stack
growth prologue is correct, and aligning the SP up instead of down.

Change-Id: Ic426338c45c94a2c01d549860c2295a0ee9200bf
Reviewed-on: https://go-review.googlesource.com/31583
Reviewed-by: Adam Langley <agl@golang.org>
Reviewed-by: Andreas Auernhammer <aead@mail.de>
Reviewed-by: Minux Ma <minux@golang.org>
Austin Clements преди 9 години
родител
ревизия
5953a478da
променени са 2 файла, в които са добавени 21 реда и са изтрити 15 реда
  1. 10 6
      blake2s/blake2s_386.s
  2. 11 9
      blake2s/blake2s_amd64.s

+ 10 - 6
blake2s/blake2s_386.s

@@ -290,7 +290,7 @@ GLOBL counter<>(SB), (NOPTR+RODATA), $16
 	MOVL t, 8*4+off+576(dst)
 
 // func hashBlocksSSE2(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte)
-TEXT ·hashBlocksSSE2(SB), 4, $0-24
+TEXT ·hashBlocksSSE2(SB), 4, $672-24 // frame = 656 + 16 byte alignment
 	MOVL h+0(FP), AX
 	MOVL c+4(FP), BX
 	MOVL flag+8(FP), CX
@@ -298,8 +298,10 @@ TEXT ·hashBlocksSSE2(SB), 4, $0-24
 	MOVL blocks_len+16(FP), DX
 
 	MOVL SP, BP
-	ANDL $0xFFFFFFF0, SP
-	SUBL $(16+16+640), SP
+	MOVL SP, DI
+	ADDL $15, DI
+	ANDL $~15, DI
+	MOVL DI, SP
 
 	MOVL CX, 8(SP)
 	MOVL 0(BX), CX
@@ -357,7 +359,7 @@ loop:
 	RET
 
 // func hashBlocksSSSE3(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte)
-TEXT ·hashBlocksSSSE3(SB), 4, $0-24
+TEXT ·hashBlocksSSSE3(SB), 4, $704-24 // frame = 688 + 16 byte alignment
 	MOVL h+0(FP), AX
 	MOVL c+4(FP), BX
 	MOVL flag+8(FP), CX
@@ -365,8 +367,10 @@ TEXT ·hashBlocksSSSE3(SB), 4, $0-24
 	MOVL blocks_len+16(FP), DX
 
 	MOVL SP, BP
-	ANDL $0xFFFFFFF0, SP
-	SUBL $(16+16+640+32), SP
+	MOVL SP, DI
+	ADDL $15, DI
+	ANDL $~15, DI
+	MOVL DI, SP
 
 	MOVL CX, 8(SP)
 	MOVL 0(BX), CX

+ 11 - 9
blake2s/blake2s_amd64.s

@@ -364,7 +364,7 @@ GLOBL counter<>(SB), (NOPTR+RODATA), $16
 	LOAD_MSG_SSE4(X8, X9, X10, X11, SI, 10, 8, 7, 1, 2, 4, 6, 5, 15, 9, 3, 13, 11, 14, 12, 0); \
 	ROUND_SSSE3(X4, X5, X6, X7, X8, X9, X10, X11, X8, X13, X14)
 
-#define HASH_BLOCKS(h, c, flag, blocks_base, blocks_len, stack_size, BLAKE2s_FUNC) \
+#define HASH_BLOCKS(h, c, flag, blocks_base, blocks_len, BLAKE2s_FUNC) \
 	MOVQ  h, AX;                   \
 	MOVQ  c, BX;                   \
 	MOVL  flag, CX;                \
@@ -372,8 +372,10 @@ GLOBL counter<>(SB), (NOPTR+RODATA), $16
 	MOVQ  blocks_len, DX;          \
 	                               \
 	MOVQ  SP, BP;                  \
-	ANDQ  $0xFFFFFFFFFFFFFFF0, SP; \
-	SUBQ  $(16+16+stack_size), SP; \
+	MOVQ  SP, R9;                  \
+	ADDQ  $15, R9;                 \
+	ANDQ  $~15, R9;                \
+	MOVQ  R9, SP;                  \
 	                               \
 	MOVQ  0(BX), R9;               \
 	MOVQ  R9, 0(SP);               \
@@ -421,18 +423,18 @@ GLOBL counter<>(SB), (NOPTR+RODATA), $16
 	MOVQ  BP, SP
 
 // func hashBlocksSSE2(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte)
-TEXT ·hashBlocksSSE2(SB), 4, $0-48
-	HASH_BLOCKS(h+0(FP), c+8(FP), flag+16(FP), blocks_base+24(FP), blocks_len+32(FP), 640, BLAKE2s_SSE2)
+TEXT ·hashBlocksSSE2(SB), 4, $672-48 // frame = 656 + 16 byte alignment
+	HASH_BLOCKS(h+0(FP), c+8(FP), flag+16(FP), blocks_base+24(FP), blocks_len+32(FP), BLAKE2s_SSE2)
 	RET
 
 // func hashBlocksSSSE3(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte)
-TEXT ·hashBlocksSSSE3(SB), 4, $0-48
-	HASH_BLOCKS(h+0(FP), c+8(FP), flag+16(FP), blocks_base+24(FP), blocks_len+32(FP), 640, BLAKE2s_SSSE3)
+TEXT ·hashBlocksSSSE3(SB), 4, $672-48 // frame = 656 + 16 byte alignment
+	HASH_BLOCKS(h+0(FP), c+8(FP), flag+16(FP), blocks_base+24(FP), blocks_len+32(FP), BLAKE2s_SSSE3)
 	RET
 
 // func hashBlocksSSE4(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte)
-TEXT ·hashBlocksSSE4(SB), 4, $0-48
-	HASH_BLOCKS(h+0(FP), c+8(FP), flag+16(FP), blocks_base+24(FP), blocks_len+32(FP), 0, BLAKE2s_SSE4)
+TEXT ·hashBlocksSSE4(SB), 4, $16-48 // frame = 0 + 16 byte alignment
+	HASH_BLOCKS(h+0(FP), c+8(FP), flag+16(FP), blocks_base+24(FP), blocks_len+32(FP), BLAKE2s_SSE4)
 	RET
 
 // func supportSSE4() bool