Pārlūkot izejas kodu

chacha20poly1305: add XChaCha20-Poly1305

The XChaCha20 construction does not have an authoritative spec, but this
implementation is based on the following documents:

https://cr.yp.to/snuffle/xsalsa-20081128.pdf
https://download.libsodium.org/doc/secret-key_cryptography/aead.html
http://loup-vaillant.fr/tutorials/chacha20-design
https://tools.ietf.org/html/draft-paragon-paseto-rfc-00#section-7

Tested against the following implementations:

https://github.com/jedisct1/libsodium/blob/7cdf3f0e841/test/default/aead_xchacha20poly1305.c
https://git.kernel.org/pub/scm/linux/kernel/git/zx2c4/linux.git/diff/lib/zinc/selftest/chacha20poly1305.h?h=zinc
https://git.zx2c4.com/wireguard-go/tree/xchacha20poly1305/xchacha20.go

name                            time/op          speed
Chacha20Poly1305/Open-64-8         225ns ± 1%     283MB/s ± 1%
Chacha20Poly1305/Open-64-X-8       390ns ± 0%     164MB/s ± 0%
Chacha20Poly1305/Seal-64-8         222ns ± 0%     287MB/s ± 0%
Chacha20Poly1305/Seal-64-X-8       386ns ± 0%     165MB/s ± 1%
Chacha20Poly1305/Open-1350-8      1.12µs ± 1%    1.21GB/s ± 1%
Chacha20Poly1305/Open-1350-X-8    1.28µs ± 0%    1.05GB/s ± 0%
Chacha20Poly1305/Seal-1350-8      1.15µs ± 0%    1.17GB/s ± 0%
Chacha20Poly1305/Seal-1350-X-8    1.32µs ± 1%    1.02GB/s ± 0%
Chacha20Poly1305/Open-8192-8      5.53µs ± 0%    1.48GB/s ± 0%
Chacha20Poly1305/Open-8192-X-8    5.71µs ± 1%    1.44GB/s ± 1%
Chacha20Poly1305/Seal-8192-8      5.54µs ± 1%    1.48GB/s ± 1%
Chacha20Poly1305/Seal-8192-X-8    5.74µs ± 1%    1.43GB/s ± 1%

Updates golang/go#24485

Change-Id: Iea6f3b4c2be67f16f56720a200dcc895c0f9d520
Reviewed-on: https://go-review.googlesource.com/127819
Run-TryBot: Filippo Valsorda <filippo@golang.org>
Reviewed-by: Adam Langley <agl@golang.org>
Filippo Valsorda 7 gadi atpakaļ
vecāks
revīzija
f792edd33d

+ 6 - 2
chacha20poly1305/chacha20poly1305.go

@@ -14,7 +14,11 @@ import (
 const (
 	// KeySize is the size of the key used by this AEAD, in bytes.
 	KeySize = 32
-	// NonceSize is the size of the nonce used with this AEAD, in bytes.
+	// NonceSize is the size of the nonce used with the standard variant of this
+	// AEAD, in bytes.
+	//
+	// Note that this is too short to be safely generated at random if the same
+	// key is reused more than 2³² times.
 	NonceSize = 12
 )
 
@@ -22,7 +26,7 @@ type chacha20poly1305 struct {
 	key [8]uint32
 }
 
-// New returns a ChaCha20-Poly1305 AEAD that uses the given, 256-bit key.
+// New returns a ChaCha20-Poly1305 AEAD that uses the given 256-bit key.
 func New(key []byte) (cipher.AEAD, error) {
 	if len(key) != KeySize {
 		return nil, errors.New("chacha20poly1305: bad key length")

+ 111 - 74
chacha20poly1305/chacha20poly1305_test.go

@@ -6,9 +6,11 @@ package chacha20poly1305
 
 import (
 	"bytes"
+	"crypto/cipher"
 	cr "crypto/rand"
 	"encoding/hex"
 	mr "math/rand"
+	"strconv"
 	"testing"
 )
 
@@ -19,7 +21,18 @@ func TestVectors(t *testing.T) {
 		ad, _ := hex.DecodeString(test.aad)
 		plaintext, _ := hex.DecodeString(test.plaintext)
 
-		aead, err := New(key)
+		var (
+			aead cipher.AEAD
+			err  error
+		)
+		switch len(nonce) {
+		case NonceSize:
+			aead, err = New(key)
+		case NonceSizeX:
+			aead, err = NewX(key)
+		default:
+			t.Fatalf("#%d: wrong nonce length: %d", i, len(nonce))
+		}
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -68,87 +81,117 @@ func TestVectors(t *testing.T) {
 
 func TestRandom(t *testing.T) {
 	// Some random tests to verify Open(Seal) == Plaintext
-	for i := 0; i < 256; i++ {
-		var nonce [12]byte
-		var key [32]byte
-
-		al := mr.Intn(128)
-		pl := mr.Intn(16384)
-		ad := make([]byte, al)
-		plaintext := make([]byte, pl)
-		cr.Read(key[:])
-		cr.Read(nonce[:])
-		cr.Read(ad)
-		cr.Read(plaintext)
-
-		aead, err := New(key[:])
-		if err != nil {
-			t.Fatal(err)
-		}
+	f := func(t *testing.T, nonceSize int) {
+		for i := 0; i < 256; i++ {
+			var nonce = make([]byte, nonceSize)
+			var key [32]byte
+
+			al := mr.Intn(128)
+			pl := mr.Intn(16384)
+			ad := make([]byte, al)
+			plaintext := make([]byte, pl)
+			cr.Read(key[:])
+			cr.Read(nonce[:])
+			cr.Read(ad)
+			cr.Read(plaintext)
+
+			var (
+				aead cipher.AEAD
+				err  error
+			)
+			switch len(nonce) {
+			case NonceSize:
+				aead, err = New(key[:])
+			case NonceSizeX:
+				aead, err = NewX(key[:])
+			default:
+				t.Fatalf("#%d: wrong nonce length: %d", i, len(nonce))
+			}
+			if err != nil {
+				t.Fatal(err)
+			}
 
-		ct := aead.Seal(nil, nonce[:], plaintext, ad)
+			ct := aead.Seal(nil, nonce[:], plaintext, ad)
 
-		plaintext2, err := aead.Open(nil, nonce[:], ct, ad)
-		if err != nil {
-			t.Errorf("Random #%d: Open failed", i)
-			continue
-		}
+			plaintext2, err := aead.Open(nil, nonce[:], ct, ad)
+			if err != nil {
+				t.Errorf("Random #%d: Open failed", i)
+				continue
+			}
 
-		if !bytes.Equal(plaintext, plaintext2) {
-			t.Errorf("Random #%d: plaintext's don't match: got %x vs %x", i, plaintext2, plaintext)
-			continue
-		}
+			if !bytes.Equal(plaintext, plaintext2) {
+				t.Errorf("Random #%d: plaintext's don't match: got %x vs %x", i, plaintext2, plaintext)
+				continue
+			}
 
-		if len(ad) > 0 {
-			alterAdIdx := mr.Intn(len(ad))
-			ad[alterAdIdx] ^= 0x80
-			if _, err := aead.Open(nil, nonce[:], ct, ad); err == nil {
-				t.Errorf("Random #%d: Open was successful after altering additional data", i)
+			if len(ad) > 0 {
+				alterAdIdx := mr.Intn(len(ad))
+				ad[alterAdIdx] ^= 0x80
+				if _, err := aead.Open(nil, nonce[:], ct, ad); err == nil {
+					t.Errorf("Random #%d: Open was successful after altering additional data", i)
+				}
+				ad[alterAdIdx] ^= 0x80
 			}
-			ad[alterAdIdx] ^= 0x80
-		}
 
-		alterNonceIdx := mr.Intn(aead.NonceSize())
-		nonce[alterNonceIdx] ^= 0x80
-		if _, err := aead.Open(nil, nonce[:], ct, ad); err == nil {
-			t.Errorf("Random #%d: Open was successful after altering nonce", i)
-		}
-		nonce[alterNonceIdx] ^= 0x80
+			alterNonceIdx := mr.Intn(aead.NonceSize())
+			nonce[alterNonceIdx] ^= 0x80
+			if _, err := aead.Open(nil, nonce[:], ct, ad); err == nil {
+				t.Errorf("Random #%d: Open was successful after altering nonce", i)
+			}
+			nonce[alterNonceIdx] ^= 0x80
 
-		alterCtIdx := mr.Intn(len(ct))
-		ct[alterCtIdx] ^= 0x80
-		if _, err := aead.Open(nil, nonce[:], ct, ad); err == nil {
-			t.Errorf("Random #%d: Open was successful after altering ciphertext", i)
+			alterCtIdx := mr.Intn(len(ct))
+			ct[alterCtIdx] ^= 0x80
+			if _, err := aead.Open(nil, nonce[:], ct, ad); err == nil {
+				t.Errorf("Random #%d: Open was successful after altering ciphertext", i)
+			}
+			ct[alterCtIdx] ^= 0x80
 		}
-		ct[alterCtIdx] ^= 0x80
 	}
+	t.Run("Standard", func(t *testing.T) { f(t, NonceSize) })
+	t.Run("X", func(t *testing.T) { f(t, NonceSizeX) })
 }
 
-func benchamarkChaCha20Poly1305Seal(b *testing.B, buf []byte) {
+func benchamarkChaCha20Poly1305Seal(b *testing.B, buf []byte, nonceSize int) {
+	b.ReportAllocs()
 	b.SetBytes(int64(len(buf)))
 
 	var key [32]byte
-	var nonce [12]byte
+	var nonce = make([]byte, nonceSize)
 	var ad [13]byte
 	var out []byte
 
-	aead, _ := New(key[:])
+	var aead cipher.AEAD
+	switch len(nonce) {
+	case NonceSize:
+		aead, _ = New(key[:])
+	case NonceSizeX:
+		aead, _ = NewX(key[:])
+	}
+
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		out = aead.Seal(out[:0], nonce[:], buf[:], ad[:])
 	}
 }
 
-func benchamarkChaCha20Poly1305Open(b *testing.B, buf []byte) {
+func benchamarkChaCha20Poly1305Open(b *testing.B, buf []byte, nonceSize int) {
+	b.ReportAllocs()
 	b.SetBytes(int64(len(buf)))
 
 	var key [32]byte
-	var nonce [12]byte
+	var nonce = make([]byte, nonceSize)
 	var ad [13]byte
 	var ct []byte
 	var out []byte
 
-	aead, _ := New(key[:])
+	var aead cipher.AEAD
+	switch len(nonce) {
+	case NonceSize:
+		aead, _ = New(key[:])
+	case NonceSizeX:
+		aead, _ = NewX(key[:])
+	}
 	ct = aead.Seal(ct[:0], nonce[:], buf[:], ad[:])
 
 	b.ResetTimer()
@@ -157,26 +200,20 @@ func benchamarkChaCha20Poly1305Open(b *testing.B, buf []byte) {
 	}
 }
 
-func BenchmarkChacha20Poly1305Open_64(b *testing.B) {
-	benchamarkChaCha20Poly1305Open(b, make([]byte, 64))
-}
-
-func BenchmarkChacha20Poly1305Seal_64(b *testing.B) {
-	benchamarkChaCha20Poly1305Seal(b, make([]byte, 64))
-}
-
-func BenchmarkChacha20Poly1305Open_1350(b *testing.B) {
-	benchamarkChaCha20Poly1305Open(b, make([]byte, 1350))
-}
-
-func BenchmarkChacha20Poly1305Seal_1350(b *testing.B) {
-	benchamarkChaCha20Poly1305Seal(b, make([]byte, 1350))
-}
-
-func BenchmarkChacha20Poly1305Open_8K(b *testing.B) {
-	benchamarkChaCha20Poly1305Open(b, make([]byte, 8*1024))
-}
-
-func BenchmarkChacha20Poly1305Seal_8K(b *testing.B) {
-	benchamarkChaCha20Poly1305Seal(b, make([]byte, 8*1024))
+func BenchmarkChacha20Poly1305(b *testing.B) {
+	for _, length := range []int{64, 1350, 8 * 1024} {
+		b.Run("Open-"+strconv.Itoa(length), func(b *testing.B) {
+			benchamarkChaCha20Poly1305Open(b, make([]byte, length), NonceSize)
+		})
+		b.Run("Seal-"+strconv.Itoa(length), func(b *testing.B) {
+			benchamarkChaCha20Poly1305Seal(b, make([]byte, length), NonceSize)
+		})
+
+		b.Run("Open-"+strconv.Itoa(length)+"-X", func(b *testing.B) {
+			benchamarkChaCha20Poly1305Open(b, make([]byte, length), NonceSizeX)
+		})
+		b.Run("Seal-"+strconv.Itoa(length)+"-X", func(b *testing.B) {
+			benchamarkChaCha20Poly1305Seal(b, make([]byte, length), NonceSizeX)
+		})
+	}
 }

+ 23 - 0
chacha20poly1305/chacha20poly1305_vectors_test.go

@@ -336,4 +336,27 @@ var chacha20Poly1305Tests = []struct {
 		"129039b5572e8a7a8131f76a",
 		"2c125232a59879aee36cacc4aca5085a4688c4f776667a8fbd86862b5cfb1d57c976688fdd652eafa2b88b1b8e358aa2110ff6ef13cdc1ceca9c9f087c35c38d89d6fbd8de89538070f17916ecb19ca3ef4a1c834f0bdaa1df62aaabef2e117106787056c909e61ecd208357dd5c363f11c5d6cf24992cc873cf69f59360a820fcf290bd90b2cab24c47286acb4e1033962b6d41e562a206a94796a8ab1c6b8bade804ff9bdf5ba6062d2c1f8fe0f4dfc05720bd9a612b92c26789f9f6a7ce43f5e8e3aee99a9cd7d6c11eaa611983c36935b0dda57d898a60a0ab7c4b54",
 	},
+
+	// XChaCha20-Poly1305 vectors
+	{
+		"000000000000000000000000000000",
+		"",
+		"0000000000000000000000000000000000000000000000000000000000000000",
+		"000000000000000000000000000000000000000000000000",
+		"789e9689e5208d7fd9e1f3c5b5341fb2f7033812ac9ebd3745e2c99c7bbfeb",
+	},
+	{
+		"02dc819b71875e49f5e1e5a768141cfd3f14307ae61a34d81decd9a3367c00c7",
+		"",
+		"b7bbfe61b8041658ddc95d5cbdc01bbe7626d24f3a043b70ddee87541234cff7",
+		"e293239d4c0a07840c5f83cb515be7fd59c333933027e99c",
+		"7a51f271bd2e547943c7be3316c05519a5d16803712289aa2369950b1504dd8267222e47b13280077ecada7b8795d535",
+	},
+	{
+		"7afc5f3f24155002e17dc176a8f1f3a097ff5a991b02ff4640f70b90db0c15c328b696d6998ea7988edfe3b960e47824e4ae002fbe589be57896a9b7bf5578599c6ba0153c7c",
+		"d499bb9758debe59a93783c61974b7",
+		"4ea8fab44a07f7ffc0329b2c2f8f994efdb6d505aec32113ae324def5d929ba1",
+		"404d5086271c58bf27b0352a205d21ce4367d7b6a7628961",
+		"26d2b46ad58b6988e2dcf1d09ba8ab6f532dc7e0847cdbc0ed00284225c02bbdb278ee8381ebd127a06926107d1b731cfb1521b267168926492e8f77219ad922257a5be2c5e52e6183ca4dfd0ad3912d7bd1ec968065",
+	},
 }

+ 110 - 0
chacha20poly1305/xchacha20poly1305.go

@@ -0,0 +1,110 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package chacha20poly1305
+
+import (
+	"crypto/cipher"
+	"encoding/binary"
+	"errors"
+
+	"golang.org/x/crypto/internal/chacha20"
+)
+
+const (
+	// NonceSizeX is the size of the nonce used with the XChaCha20-Poly1305
+	// variant of this AEAD, in bytes.
+	NonceSizeX = 24
+)
+
+type xchacha20poly1305 struct {
+	key [8]uint32
+}
+
+// NewX returns a XChaCha20-Poly1305 AEAD that uses the given 256-bit key.
+//
+// XChaCha20-Poly1305 is a ChaCha20-Poly1305 variant that takes a longer nonce,
+// suitable to be generated randomly without risk of collisions. It should be
+// preferred when nonce uniqueness cannot be trivially ensured, or whenever
+// nonces are randomly generated.
+func NewX(key []byte) (cipher.AEAD, error) {
+	if len(key) != KeySize {
+		return nil, errors.New("chacha20poly1305: bad key length")
+	}
+	ret := new(xchacha20poly1305)
+	ret.key[0] = binary.LittleEndian.Uint32(key[0:4])
+	ret.key[1] = binary.LittleEndian.Uint32(key[4:8])
+	ret.key[2] = binary.LittleEndian.Uint32(key[8:12])
+	ret.key[3] = binary.LittleEndian.Uint32(key[12:16])
+	ret.key[4] = binary.LittleEndian.Uint32(key[16:20])
+	ret.key[5] = binary.LittleEndian.Uint32(key[20:24])
+	ret.key[6] = binary.LittleEndian.Uint32(key[24:28])
+	ret.key[7] = binary.LittleEndian.Uint32(key[28:32])
+	return ret, nil
+}
+
+func (*xchacha20poly1305) NonceSize() int {
+	return NonceSizeX
+}
+
+func (*xchacha20poly1305) Overhead() int {
+	return 16
+}
+
+func (x *xchacha20poly1305) Seal(dst, nonce, plaintext, additionalData []byte) []byte {
+	if len(nonce) != NonceSizeX {
+		panic("chacha20poly1305: bad nonce length passed to Seal")
+	}
+
+	// XChaCha20-Poly1305 technically supports a 64-bit counter, so there is no
+	// size limit. However, since we reuse the ChaCha20-Poly1305 implementation,
+	// the second half of the counter is not available. This is unlikely to be
+	// an issue because the cipher.AEAD API requires the entire message to be in
+	// memory, and the counter overflows at 256 GB.
+	if uint64(len(plaintext)) > (1<<38)-64 {
+		panic("chacha20poly1305: plaintext too large")
+	}
+
+	hNonce := [4]uint32{
+		binary.LittleEndian.Uint32(nonce[0:4]),
+		binary.LittleEndian.Uint32(nonce[4:8]),
+		binary.LittleEndian.Uint32(nonce[8:12]),
+		binary.LittleEndian.Uint32(nonce[12:16]),
+	}
+	c := &chacha20poly1305{
+		key: chacha20.HChaCha20(&x.key, &hNonce),
+	}
+	// The first 4 bytes of the final nonce are unused counter space.
+	cNonce := make([]byte, NonceSize)
+	copy(cNonce[4:12], nonce[16:24])
+
+	return c.seal(dst, cNonce[:], plaintext, additionalData)
+}
+
+func (x *xchacha20poly1305) Open(dst, nonce, ciphertext, additionalData []byte) ([]byte, error) {
+	if len(nonce) != NonceSizeX {
+		panic("chacha20poly1305: bad nonce length passed to Open")
+	}
+	if len(ciphertext) < 16 {
+		return nil, errOpen
+	}
+	if uint64(len(ciphertext)) > (1<<38)-48 {
+		panic("chacha20poly1305: ciphertext too large")
+	}
+
+	hNonce := [4]uint32{
+		binary.LittleEndian.Uint32(nonce[0:4]),
+		binary.LittleEndian.Uint32(nonce[4:8]),
+		binary.LittleEndian.Uint32(nonce[8:12]),
+		binary.LittleEndian.Uint32(nonce[12:16]),
+	}
+	c := &chacha20poly1305{
+		key: chacha20.HChaCha20(&x.key, &hNonce),
+	}
+	// The first 4 bytes of the final nonce are unused counter space.
+	cNonce := make([]byte, NonceSize)
+	copy(cNonce[4:12], nonce[16:24])
+
+	return c.open(dst, cNonce[:], ciphertext, additionalData)
+}

+ 70 - 42
internal/chacha20/chacha_generic.go

@@ -32,6 +32,30 @@ func New(key [8]uint32, nonce [3]uint32) *Cipher {
 	return &Cipher{key: key, nonce: nonce}
 }
 
+// ChaCha20 constants spelling "expand 32-byte k"
+const (
+	j0 uint32 = 0x61707865
+	j1 uint32 = 0x3320646e
+	j2 uint32 = 0x79622d32
+	j3 uint32 = 0x6b206574
+)
+
+func quarterRound(a, b, c, d uint32) (uint32, uint32, uint32, uint32) {
+	a += b
+	d ^= a
+	d = (d << 16) | (d >> 16)
+	c += d
+	b ^= c
+	b = (b << 12) | (b >> 20)
+	a += b
+	d ^= a
+	d = (d << 8) | (d >> 24)
+	c += d
+	b ^= c
+	b = (b << 7) | (b >> 25)
+	return a, b, c, d
+}
+
 // XORKeyStream XORs each byte in the given slice with a byte from the
 // cipher's key stream. Dst and src must overlap entirely or not at all.
 //
@@ -73,6 +97,9 @@ func (s *Cipher) XORKeyStream(dst, src []byte) {
 		return
 	}
 	if haveAsm {
+		if uint64(len(src))+uint64(s.counter)*64 > (1<<38)-64 {
+			panic("chacha20: counter overflow")
+		}
 		s.xorKeyStreamAsm(dst, src)
 		return
 	}
@@ -85,59 +112,34 @@ func (s *Cipher) XORKeyStream(dst, src []byte) {
 		copy(s.buf[len(s.buf)-64:], src[fin:])
 	}
 
-	// qr calculates a quarter round
-	qr := func(a, b, c, d uint32) (uint32, uint32, uint32, uint32) {
-		a += b
-		d ^= a
-		d = (d << 16) | (d >> 16)
-		c += d
-		b ^= c
-		b = (b << 12) | (b >> 20)
-		a += b
-		d ^= a
-		d = (d << 8) | (d >> 24)
-		c += d
-		b ^= c
-		b = (b << 7) | (b >> 25)
-		return a, b, c, d
-	}
-
-	// ChaCha20 constants
-	const (
-		j0 = 0x61707865
-		j1 = 0x3320646e
-		j2 = 0x79622d32
-		j3 = 0x6b206574
-	)
-
 	// pre-calculate most of the first round
-	s1, s5, s9, s13 := qr(j1, s.key[1], s.key[5], s.nonce[0])
-	s2, s6, s10, s14 := qr(j2, s.key[2], s.key[6], s.nonce[1])
-	s3, s7, s11, s15 := qr(j3, s.key[3], s.key[7], s.nonce[2])
+	s1, s5, s9, s13 := quarterRound(j1, s.key[1], s.key[5], s.nonce[0])
+	s2, s6, s10, s14 := quarterRound(j2, s.key[2], s.key[6], s.nonce[1])
+	s3, s7, s11, s15 := quarterRound(j3, s.key[3], s.key[7], s.nonce[2])
 
 	n := len(src)
 	src, dst = src[:n:n], dst[:n:n] // BCE hint
 	for i := 0; i < n; i += 64 {
 		// calculate the remainder of the first round
-		s0, s4, s8, s12 := qr(j0, s.key[0], s.key[4], s.counter)
+		s0, s4, s8, s12 := quarterRound(j0, s.key[0], s.key[4], s.counter)
 
 		// execute the second round
-		x0, x5, x10, x15 := qr(s0, s5, s10, s15)
-		x1, x6, x11, x12 := qr(s1, s6, s11, s12)
-		x2, x7, x8, x13 := qr(s2, s7, s8, s13)
-		x3, x4, x9, x14 := qr(s3, s4, s9, s14)
+		x0, x5, x10, x15 := quarterRound(s0, s5, s10, s15)
+		x1, x6, x11, x12 := quarterRound(s1, s6, s11, s12)
+		x2, x7, x8, x13 := quarterRound(s2, s7, s8, s13)
+		x3, x4, x9, x14 := quarterRound(s3, s4, s9, s14)
 
 		// execute the remaining 18 rounds
 		for i := 0; i < 9; i++ {
-			x0, x4, x8, x12 = qr(x0, x4, x8, x12)
-			x1, x5, x9, x13 = qr(x1, x5, x9, x13)
-			x2, x6, x10, x14 = qr(x2, x6, x10, x14)
-			x3, x7, x11, x15 = qr(x3, x7, x11, x15)
-
-			x0, x5, x10, x15 = qr(x0, x5, x10, x15)
-			x1, x6, x11, x12 = qr(x1, x6, x11, x12)
-			x2, x7, x8, x13 = qr(x2, x7, x8, x13)
-			x3, x4, x9, x14 = qr(x3, x4, x9, x14)
+			x0, x4, x8, x12 = quarterRound(x0, x4, x8, x12)
+			x1, x5, x9, x13 = quarterRound(x1, x5, x9, x13)
+			x2, x6, x10, x14 = quarterRound(x2, x6, x10, x14)
+			x3, x7, x11, x15 = quarterRound(x3, x7, x11, x15)
+
+			x0, x5, x10, x15 = quarterRound(x0, x5, x10, x15)
+			x1, x6, x11, x12 = quarterRound(x1, x6, x11, x12)
+			x2, x7, x8, x13 = quarterRound(x2, x7, x8, x13)
+			x3, x4, x9, x14 = quarterRound(x3, x4, x9, x14)
 		}
 
 		x0 += j0
@@ -234,3 +236,29 @@ func XORKeyStream(out, in []byte, counter *[16]byte, key *[32]byte) {
 	}
 	s.XORKeyStream(out, in)
 }
+
+// HChaCha20 uses the ChaCha20 core to generate a derived key from a key and a
+// nonce. It should only be used as part of the XChaCha20 construction.
+func HChaCha20(key *[8]uint32, nonce *[4]uint32) [8]uint32 {
+	x0, x1, x2, x3 := j0, j1, j2, j3
+	x4, x5, x6, x7 := key[0], key[1], key[2], key[3]
+	x8, x9, x10, x11 := key[4], key[5], key[6], key[7]
+	x12, x13, x14, x15 := nonce[0], nonce[1], nonce[2], nonce[3]
+
+	for i := 0; i < 10; i++ {
+		x0, x4, x8, x12 = quarterRound(x0, x4, x8, x12)
+		x1, x5, x9, x13 = quarterRound(x1, x5, x9, x13)
+		x2, x6, x10, x14 = quarterRound(x2, x6, x10, x14)
+		x3, x7, x11, x15 = quarterRound(x3, x7, x11, x15)
+
+		x0, x5, x10, x15 = quarterRound(x0, x5, x10, x15)
+		x1, x6, x11, x12 = quarterRound(x1, x6, x11, x12)
+		x2, x7, x8, x13 = quarterRound(x2, x7, x8, x13)
+		x3, x4, x9, x14 = quarterRound(x3, x4, x9, x14)
+	}
+
+	var out [8]uint32
+	out[0], out[1], out[2], out[3] = x0, x1, x2, x3
+	out[4], out[5], out[6], out[7] = x12, x13, x14, x15
+	return out
+}

+ 0 - 0
internal/chacha20/asm_s390x.s → internal/chacha20/chacha_s390x.s


+ 37 - 0
internal/chacha20/chacha_test.go

@@ -5,6 +5,7 @@
 package chacha20
 
 import (
+	"encoding/binary"
 	"encoding/hex"
 	"fmt"
 	"math/rand"
@@ -186,3 +187,39 @@ func BenchmarkChaCha20(b *testing.B) {
 		})
 	}
 }
+
+func TestHChaCha20(t *testing.T) {
+	// See draft-paragon-paseto-rfc-00 §7.2.1.
+	key := []byte{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+		0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+		0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+		0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f}
+	nonce := []byte{0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x4a,
+		0x00, 0x00, 0x00, 0x00, 0x31, 0x41, 0x59, 0x27}
+	expected := []byte{0x82, 0x41, 0x3b, 0x42, 0x27, 0xb2, 0x7b, 0xfe,
+		0xd3, 0x0e, 0x42, 0x50, 0x8a, 0x87, 0x7d, 0x73,
+		0xa0, 0xf9, 0xe4, 0xd5, 0x8a, 0x74, 0xa8, 0x53,
+		0xc1, 0x2e, 0xc4, 0x13, 0x26, 0xd3, 0xec, 0xdc,
+	}
+	result := HChaCha20(&[8]uint32{
+		binary.LittleEndian.Uint32(key[0:4]),
+		binary.LittleEndian.Uint32(key[4:8]),
+		binary.LittleEndian.Uint32(key[8:12]),
+		binary.LittleEndian.Uint32(key[12:16]),
+		binary.LittleEndian.Uint32(key[16:20]),
+		binary.LittleEndian.Uint32(key[20:24]),
+		binary.LittleEndian.Uint32(key[24:28]),
+		binary.LittleEndian.Uint32(key[28:32]),
+	}, &[4]uint32{
+		binary.LittleEndian.Uint32(nonce[0:4]),
+		binary.LittleEndian.Uint32(nonce[4:8]),
+		binary.LittleEndian.Uint32(nonce[8:12]),
+		binary.LittleEndian.Uint32(nonce[12:16]),
+	})
+	for i := 0; i < 8; i++ {
+		want := binary.LittleEndian.Uint32(expected[i*4 : (i+1)*4])
+		if got := result[i]; got != want {
+			t.Errorf("word %d incorrect: want 0x%x, got 0x%x", i, want, got)
+		}
+	}
+}