Browse Source

salsa20/salsa: eliminate unnecessary "callee save" prologue/epilogue

SP offsets were adjusted to fill in the now unused callee save area
using the following Python script:

import sys, re
def adj(m):
    delta = int(m.group(1))
    if delta >= 408:
        delta -= (408 - 352)
    return "%d(SP)" % delta
sys.stdout.write(re.sub(r"(\d+)\(SP\)", adj, sys.stdin.read()))

Change-Id: I06675a75d89e5834f804df595868fe4bb8976719
Reviewed-on: https://go-review.googlesource.com/31587
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Reviewed-by: Adam Langley <agl@golang.org>
Austin Clements 9 năm trước cách đây
mục cha
commit
1705134e1b
1 tập tin đã thay đổi với 11 bổ sung24 xóa
  1. 11 24
      salsa20/salsa/salsa2020_amd64.s

+ 11 - 24
salsa20/salsa/salsa2020_amd64.s

@@ -8,26 +8,20 @@
 // domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html
 // domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html
 
 
 // func salsa2020XORKeyStream(out, in *byte, n uint64, nonce, key *byte)
 // func salsa2020XORKeyStream(out, in *byte, n uint64, nonce, key *byte)
-TEXT ·salsa2020XORKeyStream(SB),0,$512-40
+// This needs up to 64 bytes at 360(SP); hence the non-obvious frame size.
+TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
 	MOVQ out+0(FP),DI
 	MOVQ out+0(FP),DI
 	MOVQ in+8(FP),SI
 	MOVQ in+8(FP),SI
 	MOVQ n+16(FP),DX
 	MOVQ n+16(FP),DX
 	MOVQ nonce+24(FP),CX
 	MOVQ nonce+24(FP),CX
 	MOVQ key+32(FP),R8
 	MOVQ key+32(FP),R8
 
 
-	MOVQ SP,R11
+	MOVQ SP,R12
 	MOVQ SP,R9
 	MOVQ SP,R9
 	ADDQ $31, R9
 	ADDQ $31, R9
 	ANDQ $~31, R9
 	ANDQ $~31, R9
 	MOVQ R9, SP
 	MOVQ R9, SP
 
 
-	MOVQ R11,352(SP)
-	MOVQ R12,360(SP)
-	MOVQ R13,368(SP)
-	MOVQ R14,376(SP)
-	MOVQ R15,384(SP)
-	MOVQ BX,392(SP)
-	MOVQ BP,400(SP)
 	MOVQ DX,R9
 	MOVQ DX,R9
 	MOVQ CX,DX
 	MOVQ CX,DX
 	MOVQ R8,R10
 	MOVQ R8,R10
@@ -133,7 +127,7 @@ TEXT ·salsa2020XORKeyStream(SB),0,$512-40
 	SHRQ $32,CX
 	SHRQ $32,CX
 	MOVL DX,16(SP)
 	MOVL DX,16(SP)
 	MOVL CX, 36 (SP)
 	MOVL CX, 36 (SP)
-	MOVQ R9,408(SP)
+	MOVQ R9,352(SP)
 	MOVQ $20,DX
 	MOVQ $20,DX
 	MOVOA 64(SP),X0
 	MOVOA 64(SP),X0
 	MOVOA 80(SP),X1
 	MOVOA 80(SP),X1
@@ -650,7 +644,7 @@ TEXT ·salsa2020XORKeyStream(SB),0,$512-40
 	MOVL CX,244(DI)
 	MOVL CX,244(DI)
 	MOVL R8,248(DI)
 	MOVL R8,248(DI)
 	MOVL R9,252(DI)
 	MOVL R9,252(DI)
-	MOVQ 408(SP),R9
+	MOVQ 352(SP),R9
 	SUBQ $256,R9
 	SUBQ $256,R9
 	ADDQ $256,SI
 	ADDQ $256,SI
 	ADDQ $256,DI
 	ADDQ $256,DI
@@ -662,13 +656,13 @@ TEXT ·salsa2020XORKeyStream(SB),0,$512-40
 	CMPQ R9,$64
 	CMPQ R9,$64
 	JAE NOCOPY
 	JAE NOCOPY
 	MOVQ DI,DX
 	MOVQ DI,DX
-	LEAQ 416(SP),DI
+	LEAQ 360(SP),DI
 	MOVQ R9,CX
 	MOVQ R9,CX
 	REP; MOVSB
 	REP; MOVSB
-	LEAQ 416(SP),DI
-	LEAQ 416(SP),SI
+	LEAQ 360(SP),DI
+	LEAQ 360(SP),SI
 	NOCOPY:
 	NOCOPY:
-	MOVQ R9,408(SP)
+	MOVQ R9,352(SP)
 	MOVOA 48(SP),X0
 	MOVOA 48(SP),X0
 	MOVOA 0(SP),X1
 	MOVOA 0(SP),X1
 	MOVOA 16(SP),X2
 	MOVOA 16(SP),X2
@@ -867,7 +861,7 @@ TEXT ·salsa2020XORKeyStream(SB),0,$512-40
 	MOVL R8,44(DI)
 	MOVL R8,44(DI)
 	MOVL R9,28(DI)
 	MOVL R9,28(DI)
 	MOVL AX,12(DI)
 	MOVL AX,12(DI)
-	MOVQ 408(SP),R9
+	MOVQ 352(SP),R9
 	MOVL 16(SP),CX
 	MOVL 16(SP),CX
 	MOVL  36 (SP),R8
 	MOVL  36 (SP),R8
 	ADDQ $1,CX
 	ADDQ $1,CX
@@ -886,14 +880,7 @@ TEXT ·salsa2020XORKeyStream(SB),0,$512-40
 	REP; MOVSB
 	REP; MOVSB
 	BYTESATLEAST64:
 	BYTESATLEAST64:
 	DONE:
 	DONE:
-	MOVQ 352(SP),R11
-	MOVQ 360(SP),R12
-	MOVQ 368(SP),R13
-	MOVQ 376(SP),R14
-	MOVQ 384(SP),R15
-	MOVQ 392(SP),BX
-	MOVQ 400(SP),BP
-	MOVQ R11,SP
+	MOVQ R12,SP
 	RET
 	RET
 	BYTESATLEAST65:
 	BYTESATLEAST65:
 	SUBQ $64,R9
 	SUBQ $64,R9