瀏覽代碼

poly1305: implement a subset of the hash.Hash interface

This CL adds the poly1305.MAC type which implements a
subset of the hash.Hash interface. With MAC it is possible
to compute an authentication tag of data without copying
it into a single byte slice.

This commit modifies the reference/generic and the
AMD64 assembler but not the ARM/s390x implementation
to support an io.Writer interface.

Updates golang/go#25219

Change-Id: I7ee5a9eadd43387cf3cd887d734c625575eee47d
Reviewed-on: https://go-review.googlesource.com/c/crypto/+/111335
Run-TryBot: Filippo Valsorda <filippo@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Filippo Valsorda <filippo@golang.org>
Andreas Auernhammer 7 年之前
父節點
當前提交
c2843e01d9
共有 7 個文件被更改,包括 322 次插入92 次删除
  1. 11 0
      poly1305/mac_noasm.go
  2. 65 15
      poly1305/poly1305.go
  3. 71 6
      poly1305/poly1305_test.go
  4. 52 6
      poly1305/sum_amd64.go
  5. 43 20
      poly1305/sum_amd64.s
  6. 77 44
      poly1305/sum_generic.go
  7. 3 1
      poly1305/sum_noasm.go

+ 11 - 0
poly1305/mac_noasm.go

@@ -0,0 +1,11 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !amd64 gccgo appengine
+
+package poly1305
+
+type mac struct{ macGeneric }
+
+func newMAC(key *[32]byte) mac { return mac{newMACGeneric(key)} }

+ 65 - 15
poly1305/poly1305.go

@@ -2,21 +2,19 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-/*
-Package poly1305 implements Poly1305 one-time message authentication code as
-specified in https://cr.yp.to/mac/poly1305-20050329.pdf.
-
-Poly1305 is a fast, one-time authentication function. It is infeasible for an
-attacker to generate an authenticator for a message without the key. However, a
-key must only be used for a single message. Authenticating two different
-messages with the same key allows an attacker to forge authenticators for other
-messages with the same key.
-
-Poly1305 was originally coupled with AES in order to make Poly1305-AES. AES was
-used with a fixed key in order to generate one-time keys from an nonce.
-However, in this package AES isn't used and the one-time key is specified
-directly.
-*/
+// Package poly1305 implements Poly1305 one-time message authentication code as
+// specified in https://cr.yp.to/mac/poly1305-20050329.pdf.
+//
+// Poly1305 is a fast, one-time authentication function. It is infeasible for an
+// attacker to generate an authenticator for a message without the key. However, a
+// key must only be used for a single message. Authenticating two different
+// messages with the same key allows an attacker to forge authenticators for other
+// messages with the same key.
+//
+// Poly1305 was originally coupled with AES in order to make Poly1305-AES. AES was
+// used with a fixed key in order to generate one-time keys from an nonce.
+// However, in this package AES isn't used and the one-time key is specified
+// directly.
 package poly1305 // import "golang.org/x/crypto/poly1305"
 
 import "crypto/subtle"
@@ -31,3 +29,55 @@ func Verify(mac *[16]byte, m []byte, key *[32]byte) bool {
 	Sum(&tmp, m, key)
 	return subtle.ConstantTimeCompare(tmp[:], mac[:]) == 1
 }
+
+// New returns a new MAC computing an authentication
+// tag of all data written to it with the given key.
+// This allows writing the message progressively instead
+// of passing it as a single slice. Common users should use
+// the Sum function instead.
+//
+// The key must be unique for each message, as authenticating
+// two different messages with the same key allows an attacker
+// to forge messages at will.
+func New(key *[32]byte) *MAC {
+	return &MAC{
+		mac:       newMAC(key),
+		finalized: false,
+	}
+}
+
+// MAC is an io.Writer computing an authentication tag
+// of the data written to it.
+//
+// MAC cannot be used like common hash.Hash implementations,
+// because using a poly1305 key twice breaks its security.
+// Therefore writing data to a running MAC after calling
+// Sum causes it to panic.
+type MAC struct {
+	mac // platform-dependent implementation
+
+	finalized bool
+}
+
+// Size returns the number of bytes Sum will return.
+func (h *MAC) Size() int { return TagSize }
+
+// Write adds more data to the running message authentication code.
+// It never returns an error.
+//
+// It must not be called after the first call of Sum.
+func (h *MAC) Write(p []byte) (n int, err error) {
+	if h.finalized {
+		panic("poly1305: write to MAC after Sum")
+	}
+	return h.mac.Write(p)
+}
+
+// Sum computes the authenticator of all data written to the
+// message authentication code.
+func (h *MAC) Sum(b []byte) []byte {
+	var mac [TagSize]byte
+	h.mac.Sum(&mac)
+	h.finalized = true
+	return append(b, mac[:]...)
+}

+ 71 - 6
poly1305/poly1305_test.go

@@ -100,7 +100,50 @@ func TestSumUnaligned(t *testing.T)        { testSum(t, true, Sum) }
 func TestSumGeneric(t *testing.T)          { testSum(t, false, sumGeneric) }
 func TestSumGenericUnaligned(t *testing.T) { testSum(t, true, sumGeneric) }
 
-func benchmark(b *testing.B, size int, unaligned bool) {
+func TestWriteGeneric(t *testing.T)          { testWriteGeneric(t, false) }
+func TestWriteGenericUnaligned(t *testing.T) { testWriteGeneric(t, true) }
+func TestWrite(t *testing.T)                 { testWrite(t, false) }
+func TestWriteUnaligned(t *testing.T)        { testWrite(t, true) }
+
+func testWriteGeneric(t *testing.T, unaligned bool) {
+	for i, v := range testData {
+		key := v.Key()
+		input := v.Input()
+		var out [16]byte
+
+		if unaligned {
+			input = unalignBytes(input)
+		}
+		h := newMACGeneric(&key)
+		h.Write(input[:len(input)/2])
+		h.Write(input[len(input)/2:])
+		h.Sum(&out)
+		if tag := v.Tag(); out != tag {
+			t.Errorf("%d: expected %x, got %x", i, tag[:], out[:])
+		}
+	}
+}
+
+func testWrite(t *testing.T, unaligned bool) {
+	for i, v := range testData {
+		key := v.Key()
+		input := v.Input()
+		var out [16]byte
+
+		if unaligned {
+			input = unalignBytes(input)
+		}
+		h := New(&key)
+		h.Write(input[:len(input)/2])
+		h.Write(input[len(input)/2:])
+		h.Sum(out[:0])
+		if tag := v.Tag(); out != tag {
+			t.Errorf("%d: expected %x, got %x", i, tag[:], out[:])
+		}
+	}
+}
+
+func benchmarkSum(b *testing.B, size int, unaligned bool) {
 	var out [16]byte
 	var key [32]byte
 	in := make([]byte, size)
@@ -114,11 +157,33 @@ func benchmark(b *testing.B, size int, unaligned bool) {
 	}
 }
 
-func Benchmark64(b *testing.B)          { benchmark(b, 64, false) }
-func Benchmark1K(b *testing.B)          { benchmark(b, 1024, false) }
-func Benchmark64Unaligned(b *testing.B) { benchmark(b, 64, true) }
-func Benchmark1KUnaligned(b *testing.B) { benchmark(b, 1024, true) }
-func Benchmark2M(b *testing.B)          { benchmark(b, 2097152, true) }
+func benchmarkWrite(b *testing.B, size int, unaligned bool) {
+	var key [32]byte
+	h := New(&key)
+	in := make([]byte, size)
+	if unaligned {
+		in = unalignBytes(in)
+	}
+	b.SetBytes(int64(len(in)))
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		h.Write(in)
+	}
+}
+
+func Benchmark64(b *testing.B)          { benchmarkSum(b, 64, false) }
+func Benchmark1K(b *testing.B)          { benchmarkSum(b, 1024, false) }
+func Benchmark2M(b *testing.B)          { benchmarkSum(b, 2*1024*1024, false) }
+func Benchmark64Unaligned(b *testing.B) { benchmarkSum(b, 64, true) }
+func Benchmark1KUnaligned(b *testing.B) { benchmarkSum(b, 1024, true) }
+func Benchmark2MUnaligned(b *testing.B) { benchmarkSum(b, 2*1024*1024, true) }
+
+func BenchmarkWrite64(b *testing.B)          { benchmarkWrite(b, 64, false) }
+func BenchmarkWrite1K(b *testing.B)          { benchmarkWrite(b, 1024, false) }
+func BenchmarkWrite2M(b *testing.B)          { benchmarkWrite(b, 2*1024*1024, false) }
+func BenchmarkWrite64Unaligned(b *testing.B) { benchmarkWrite(b, 64, true) }
+func BenchmarkWrite1KUnaligned(b *testing.B) { benchmarkWrite(b, 1024, true) }
+func BenchmarkWrite2MUnaligned(b *testing.B) { benchmarkWrite(b, 2*1024*1024, true) }
 
 func unalignBytes(in []byte) []byte {
 	out := make([]byte, len(in)+1)

+ 52 - 6
poly1305/sum_amd64.go

@@ -6,17 +6,63 @@
 
 package poly1305
 
-// This function is implemented in sum_amd64.s
 //go:noescape
-func poly1305(out *[16]byte, m *byte, mlen uint64, key *[32]byte)
+func initialize(state *[7]uint64, key *[32]byte)
+
+//go:noescape
+func update(state *[7]uint64, msg []byte)
+
+//go:noescape
+func finalize(tag *[TagSize]byte, state *[7]uint64)
 
 // Sum generates an authenticator for m using a one-time key and puts the
 // 16-byte result into out. Authenticating two different messages with the same
 // key allows an attacker to forge messages at will.
 func Sum(out *[16]byte, m []byte, key *[32]byte) {
-	var mPtr *byte
-	if len(m) > 0 {
-		mPtr = &m[0]
+	h := newMAC(key)
+	h.Write(m)
+	h.Sum(out)
+}
+
+func newMAC(key *[32]byte) (h mac) {
+	initialize(&h.state, key)
+	return
+}
+
+type mac struct {
+	state [7]uint64 // := uint64{ h0, h1, h2, r0, r1, pad0, pad1 }
+
+	buffer [TagSize]byte
+	offset int
+}
+
+func (h *mac) Write(p []byte) (n int, err error) {
+	n = len(p)
+	if h.offset > 0 {
+		remaining := TagSize - h.offset
+		if n < remaining {
+			h.offset += copy(h.buffer[h.offset:], p)
+			return n, nil
+		}
+		copy(h.buffer[h.offset:], p[:remaining])
+		p = p[remaining:]
+		h.offset = 0
+		update(&h.state, h.buffer[:])
+	}
+	if nn := len(p) - (len(p) % TagSize); nn > 0 {
+		update(&h.state, p[:nn])
+		p = p[nn:]
+	}
+	if len(p) > 0 {
+		h.offset += copy(h.buffer[h.offset:], p)
+	}
+	return n, nil
+}
+
+func (h *mac) Sum(out *[16]byte) {
+	state := h.state
+	if h.offset > 0 {
+		update(&state, h.buffer[:h.offset])
 	}
-	poly1305(out, mPtr, uint64(len(m)), key)
+	finalize(out, &state)
 }

+ 43 - 20
poly1305/sum_amd64.s

@@ -58,20 +58,17 @@ DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
 DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
 GLOBL ·poly1305Mask<>(SB), RODATA, $16
 
-// func poly1305(out *[16]byte, m *byte, mlen uint64, key *[32]key)
-TEXT ·poly1305(SB), $0-32
-	MOVQ out+0(FP), DI
-	MOVQ m+8(FP), SI
-	MOVQ mlen+16(FP), R15
-	MOVQ key+24(FP), AX
-
-	MOVQ 0(AX), R11
-	MOVQ 8(AX), R12
-	ANDQ ·poly1305Mask<>(SB), R11   // r0
-	ANDQ ·poly1305Mask<>+8(SB), R12 // r1
-	XORQ R8, R8                    // h0
-	XORQ R9, R9                    // h1
-	XORQ R10, R10                  // h2
+// func update(state *[7]uint64, msg []byte)
+TEXT ·update(SB), $0-32
+	MOVQ state+0(FP), DI
+	MOVQ msg_base+8(FP), SI
+	MOVQ msg_len+16(FP), R15
+
+	MOVQ 0(DI), R8   // h0
+	MOVQ 8(DI), R9   // h1
+	MOVQ 16(DI), R10 // h2
+	MOVQ 24(DI), R11 // r0
+	MOVQ 32(DI), R12 // r1
 
 	CMPQ R15, $16
 	JB   bytes_between_0_and_15
@@ -109,16 +106,42 @@ flush_buffer:
 	JMP  multiply
 
 done:
-	MOVQ    R8, AX
-	MOVQ    R9, BX
+	MOVQ R8, 0(DI)
+	MOVQ R9, 8(DI)
+	MOVQ R10, 16(DI)
+	RET
+
+// func initialize(state *[7]uint64, key *[32]byte)
+TEXT ·initialize(SB), $0-16
+	MOVQ state+0(FP), DI
+	MOVQ key+8(FP), SI
+
+	// state[0...7] is initialized with zero
+	MOVOU 0(SI), X0
+	MOVOU 16(SI), X1
+	MOVOU ·poly1305Mask<>(SB), X2
+	PAND  X2, X0
+	MOVOU X0, 24(DI)
+	MOVOU X1, 40(DI)
+	RET
+
+// func finalize(tag *[TagSize]byte, state *[7]uint64)
+TEXT ·finalize(SB), $0-16
+	MOVQ tag+0(FP), DI
+	MOVQ state+8(FP), SI
+
+	MOVQ    0(SI), AX
+	MOVQ    8(SI), BX
+	MOVQ    16(SI), CX
+	MOVQ    AX, R8
+	MOVQ    BX, R9
 	SUBQ    $0xFFFFFFFFFFFFFFFB, AX
 	SBBQ    $0xFFFFFFFFFFFFFFFF, BX
-	SBBQ    $3, R10
+	SBBQ    $3, CX
 	CMOVQCS R8, AX
 	CMOVQCS R9, BX
-	MOVQ    key+24(FP), R8
-	ADDQ    16(R8), AX
-	ADCQ    24(R8), BX
+	ADDQ    40(SI), AX
+	ADCQ    48(SI), BX
 
 	MOVQ AX, 0(DI)
 	MOVQ BX, 8(DI)

+ 77 - 44
poly1305/sum_ref.go → poly1305/sum_generic.go

@@ -1,4 +1,4 @@
-// Copyright 2012 The Go Authors. All rights reserved.
+// Copyright 2018 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
@@ -6,21 +6,79 @@ package poly1305
 
 import "encoding/binary"
 
+const (
+	msgBlock   = uint32(1 << 24)
+	finalBlock = uint32(0)
+)
+
 // sumGeneric generates an authenticator for msg using a one-time key and
 // puts the 16-byte result into out. This is the generic implementation of
 // Sum and should be called if no assembly implementation is available.
 func sumGeneric(out *[TagSize]byte, msg []byte, key *[32]byte) {
-	var (
-		h0, h1, h2, h3, h4 uint32 // the hash accumulators
-		r0, r1, r2, r3, r4 uint64 // the r part of the key
-	)
+	h := newMACGeneric(key)
+	h.Write(msg)
+	h.Sum(out)
+}
+
+func newMACGeneric(key *[32]byte) (h macGeneric) {
+	h.r[0] = binary.LittleEndian.Uint32(key[0:]) & 0x3ffffff
+	h.r[1] = (binary.LittleEndian.Uint32(key[3:]) >> 2) & 0x3ffff03
+	h.r[2] = (binary.LittleEndian.Uint32(key[6:]) >> 4) & 0x3ffc0ff
+	h.r[3] = (binary.LittleEndian.Uint32(key[9:]) >> 6) & 0x3f03fff
+	h.r[4] = (binary.LittleEndian.Uint32(key[12:]) >> 8) & 0x00fffff
+
+	h.s[0] = binary.LittleEndian.Uint32(key[16:])
+	h.s[1] = binary.LittleEndian.Uint32(key[20:])
+	h.s[2] = binary.LittleEndian.Uint32(key[24:])
+	h.s[3] = binary.LittleEndian.Uint32(key[28:])
+	return
+}
+
+type macGeneric struct {
+	h, r [5]uint32
+	s    [4]uint32
+
+	buffer [TagSize]byte
+	offset int
+}
+
+func (h *macGeneric) Write(p []byte) (n int, err error) {
+	n = len(p)
+	if h.offset > 0 {
+		remaining := TagSize - h.offset
+		if n < remaining {
+			h.offset += copy(h.buffer[h.offset:], p)
+			return n, nil
+		}
+		copy(h.buffer[h.offset:], p[:remaining])
+		p = p[remaining:]
+		h.offset = 0
+		updateGeneric(h.buffer[:], msgBlock, &(h.h), &(h.r))
+	}
+	if nn := len(p) - (len(p) % TagSize); nn > 0 {
+		updateGeneric(p, msgBlock, &(h.h), &(h.r))
+		p = p[nn:]
+	}
+	if len(p) > 0 {
+		h.offset += copy(h.buffer[h.offset:], p)
+	}
+	return n, nil
+}
 
-	r0 = uint64(binary.LittleEndian.Uint32(key[0:]) & 0x3ffffff)
-	r1 = uint64((binary.LittleEndian.Uint32(key[3:]) >> 2) & 0x3ffff03)
-	r2 = uint64((binary.LittleEndian.Uint32(key[6:]) >> 4) & 0x3ffc0ff)
-	r3 = uint64((binary.LittleEndian.Uint32(key[9:]) >> 6) & 0x3f03fff)
-	r4 = uint64((binary.LittleEndian.Uint32(key[12:]) >> 8) & 0x00fffff)
+func (h *macGeneric) Sum(out *[16]byte) {
+	H, R := h.h, h.r
+	if h.offset > 0 {
+		var buffer [TagSize]byte
+		copy(buffer[:], h.buffer[:h.offset])
+		buffer[h.offset] = 1 // invariant: h.offset < TagSize
+		updateGeneric(buffer[:], finalBlock, &H, &R)
+	}
+	finalizeGeneric(out, &H, &(h.s))
+}
 
+func updateGeneric(msg []byte, flag uint32, h, r *[5]uint32) {
+	h0, h1, h2, h3, h4 := h[0], h[1], h[2], h[3], h[4]
+	r0, r1, r2, r3, r4 := uint64(r[0]), uint64(r[1]), uint64(r[2]), uint64(r[3]), uint64(r[4])
 	R1, R2, R3, R4 := r1*5, r2*5, r3*5, r4*5
 
 	for len(msg) >= TagSize {
@@ -29,7 +87,7 @@ func sumGeneric(out *[TagSize]byte, msg []byte, key *[32]byte) {
 		h1 += (binary.LittleEndian.Uint32(msg[3:]) >> 2) & 0x3ffffff
 		h2 += (binary.LittleEndian.Uint32(msg[6:]) >> 4) & 0x3ffffff
 		h3 += (binary.LittleEndian.Uint32(msg[9:]) >> 6) & 0x3ffffff
-		h4 += (binary.LittleEndian.Uint32(msg[12:]) >> 8) | (1 << 24)
+		h4 += (binary.LittleEndian.Uint32(msg[12:]) >> 8) | flag
 
 		// h *= r
 		d0 := (uint64(h0) * r0) + (uint64(h1) * R4) + (uint64(h2) * R3) + (uint64(h3) * R2) + (uint64(h4) * R1)
@@ -52,36 +110,11 @@ func sumGeneric(out *[TagSize]byte, msg []byte, key *[32]byte) {
 		msg = msg[TagSize:]
 	}
 
-	if len(msg) > 0 {
-		var block [TagSize]byte
-		off := copy(block[:], msg)
-		block[off] = 0x01
-
-		// h += msg
-		h0 += binary.LittleEndian.Uint32(block[0:]) & 0x3ffffff
-		h1 += (binary.LittleEndian.Uint32(block[3:]) >> 2) & 0x3ffffff
-		h2 += (binary.LittleEndian.Uint32(block[6:]) >> 4) & 0x3ffffff
-		h3 += (binary.LittleEndian.Uint32(block[9:]) >> 6) & 0x3ffffff
-		h4 += (binary.LittleEndian.Uint32(block[12:]) >> 8)
-
-		// h *= r
-		d0 := (uint64(h0) * r0) + (uint64(h1) * R4) + (uint64(h2) * R3) + (uint64(h3) * R2) + (uint64(h4) * R1)
-		d1 := (d0 >> 26) + (uint64(h0) * r1) + (uint64(h1) * r0) + (uint64(h2) * R4) + (uint64(h3) * R3) + (uint64(h4) * R2)
-		d2 := (d1 >> 26) + (uint64(h0) * r2) + (uint64(h1) * r1) + (uint64(h2) * r0) + (uint64(h3) * R4) + (uint64(h4) * R3)
-		d3 := (d2 >> 26) + (uint64(h0) * r3) + (uint64(h1) * r2) + (uint64(h2) * r1) + (uint64(h3) * r0) + (uint64(h4) * R4)
-		d4 := (d3 >> 26) + (uint64(h0) * r4) + (uint64(h1) * r3) + (uint64(h2) * r2) + (uint64(h3) * r1) + (uint64(h4) * r0)
-
-		// h %= p
-		h0 = uint32(d0) & 0x3ffffff
-		h1 = uint32(d1) & 0x3ffffff
-		h2 = uint32(d2) & 0x3ffffff
-		h3 = uint32(d3) & 0x3ffffff
-		h4 = uint32(d4) & 0x3ffffff
+	h[0], h[1], h[2], h[3], h[4] = h0, h1, h2, h3, h4
+}
 
-		h0 += uint32(d4>>26) * 5
-		h1 += h0 >> 26
-		h0 = h0 & 0x3ffffff
-	}
+func finalizeGeneric(out *[TagSize]byte, h *[5]uint32, s *[4]uint32) {
+	h0, h1, h2, h3, h4 := h[0], h[1], h[2], h[3], h[4]
 
 	// h %= p reduction
 	h2 += h1 >> 26
@@ -123,13 +156,13 @@ func sumGeneric(out *[TagSize]byte, msg []byte, key *[32]byte) {
 
 	// s: the s part of the key
 	// tag = (h + s) % (2^128)
-	t := uint64(h0) + uint64(binary.LittleEndian.Uint32(key[16:]))
+	t := uint64(h0) + uint64(s[0])
 	h0 = uint32(t)
-	t = uint64(h1) + uint64(binary.LittleEndian.Uint32(key[20:])) + (t >> 32)
+	t = uint64(h1) + uint64(s[1]) + (t >> 32)
 	h1 = uint32(t)
-	t = uint64(h2) + uint64(binary.LittleEndian.Uint32(key[24:])) + (t >> 32)
+	t = uint64(h2) + uint64(s[2]) + (t >> 32)
 	h2 = uint32(t)
-	t = uint64(h3) + uint64(binary.LittleEndian.Uint32(key[28:])) + (t >> 32)
+	t = uint64(h3) + uint64(s[3]) + (t >> 32)
 	h3 = uint32(t)
 
 	binary.LittleEndian.PutUint32(out[0:], h0)

+ 3 - 1
poly1305/sum_noasm.go

@@ -10,5 +10,7 @@ package poly1305
 // 16-byte result into out. Authenticating two different messages with the same
 // key allows an attacker to forge messages at will.
 func Sum(out *[TagSize]byte, msg []byte, key *[32]byte) {
-	sumGeneric(out, msg, key)
+	h := newMAC(key)
+	h.Write(msg)
+	h.Sum(out)
 }