Browse Source

Merge pull request #460 from Zariel/support-appengine

Add an implementation of murmurhash for appengine
Chris Bannister 10 years ago
parent
commit
96aad64c6b
5 changed files with 346 additions and 197 deletions
  1. 139 0
      murmur.go
  2. 137 0
      murmur_appengine.go
  3. 70 0
      murmur_test.go
  4. 0 133
      token.go
  5. 0 64
      token_test.go

+ 139 - 0
murmur.go

@@ -0,0 +1,139 @@
+// +build !appengine
+
+package gocql
+
+import (
+	"unsafe"
+)
+
+func murmur3H1(data []byte) uint64 {
+	length := len(data)
+
+	var h1, h2, k1, k2 uint64
+
+	const (
+		c1 = 0x87c37b91114253d5
+		c2 = 0x4cf5ad432745937f
+	)
+
+	// body
+	nBlocks := length / 16
+	for i := 0; i < nBlocks; i++ {
+		block := (*[2]uint64)(unsafe.Pointer(&data[i*16]))
+
+		k1 = block[0]
+		k2 = block[1]
+
+		k1 *= c1
+		k1 = (k1 << 31) | (k1 >> 33) // ROTL64(k1, 31)
+		k1 *= c2
+		h1 ^= k1
+
+		h1 = (h1 << 27) | (h1 >> 37) // ROTL64(h1, 27)
+		h1 += h2
+		h1 = h1*5 + 0x52dce729
+
+		k2 *= c2
+		k2 = (k2 << 33) | (k2 >> 31) // ROTL64(k2, 33)
+		k2 *= c1
+		h2 ^= k2
+
+		h2 = (h2 << 31) | (h2 >> 33) // ROTL64(h2, 31)
+		h2 += h1
+		h2 = h2*5 + 0x38495ab5
+	}
+
+	// tail
+	tail := data[nBlocks*16:]
+	k1 = 0
+	k2 = 0
+	switch length & 15 {
+	case 15:
+		k2 ^= uint64(tail[14]) << 48
+		fallthrough
+	case 14:
+		k2 ^= uint64(tail[13]) << 40
+		fallthrough
+	case 13:
+		k2 ^= uint64(tail[12]) << 32
+		fallthrough
+	case 12:
+		k2 ^= uint64(tail[11]) << 24
+		fallthrough
+	case 11:
+		k2 ^= uint64(tail[10]) << 16
+		fallthrough
+	case 10:
+		k2 ^= uint64(tail[9]) << 8
+		fallthrough
+	case 9:
+		k2 ^= uint64(tail[8])
+
+		k2 *= c2
+		k2 = (k2 << 33) | (k2 >> 31) // ROTL64(k2, 33)
+		k2 *= c1
+		h2 ^= k2
+
+		fallthrough
+	case 8:
+		k1 ^= uint64(tail[7]) << 56
+		fallthrough
+	case 7:
+		k1 ^= uint64(tail[6]) << 48
+		fallthrough
+	case 6:
+		k1 ^= uint64(tail[5]) << 40
+		fallthrough
+	case 5:
+		k1 ^= uint64(tail[4]) << 32
+		fallthrough
+	case 4:
+		k1 ^= uint64(tail[3]) << 24
+		fallthrough
+	case 3:
+		k1 ^= uint64(tail[2]) << 16
+		fallthrough
+	case 2:
+		k1 ^= uint64(tail[1]) << 8
+		fallthrough
+	case 1:
+		k1 ^= uint64(tail[0])
+
+		k1 *= c1
+		k1 = (k1 << 31) | (k1 >> 33) // ROTL64(k1, 31)
+		k1 *= c2
+		h1 ^= k1
+	}
+
+	h1 ^= uint64(length)
+	h2 ^= uint64(length)
+
+	h1 += h2
+	h2 += h1
+
+	// finalizer
+	const (
+		fmix1 = 0xff51afd7ed558ccd
+		fmix2 = 0xc4ceb9fe1a85ec53
+	)
+
+	// fmix64(h1)
+	h1 ^= h1 >> 33
+	h1 *= fmix1
+	h1 ^= h1 >> 33
+	h1 *= fmix2
+	h1 ^= h1 >> 33
+
+	// fmix64(h2)
+	h2 ^= h2 >> 33
+	h2 *= fmix1
+	h2 ^= h2 >> 33
+	h2 *= fmix2
+	h2 ^= h2 >> 33
+
+	h1 += h2
+	// the following is extraneous since h2 is discarded
+	// h2 += h1
+
+	return h1
+}

+ 137 - 0
murmur_appengine.go

@@ -0,0 +1,137 @@
+// +build appengine
+
+package gocql
+
+import "encoding/binary"
+
+func murmur3H1(data []byte) uint64 {
+	length := len(data)
+
+	var h1, h2, k1, k2 uint64
+
+	const (
+		c1 = 0x87c37b91114253d5
+		c2 = 0x4cf5ad432745937f
+	)
+
+	// body
+	nBlocks := length / 16
+	for i := 0; i < nBlocks; i++ {
+		// block := (*[2]uint64)(unsafe.Pointer(&data[i*16]))
+
+		k1 = binary.LittleEndian.Uint64(data[i*16:])
+		k2 = binary.LittleEndian.Uint64(data[(i*16)+8:])
+
+		k1 *= c1
+		k1 = (k1 << 31) | (k1 >> 33) // ROTL64(k1, 31)
+		k1 *= c2
+		h1 ^= k1
+
+		h1 = (h1 << 27) | (h1 >> 37) // ROTL64(h1, 27)
+		h1 += h2
+		h1 = h1*5 + 0x52dce729
+
+		k2 *= c2
+		k2 = (k2 << 33) | (k2 >> 31) // ROTL64(k2, 33)
+		k2 *= c1
+		h2 ^= k2
+
+		h2 = (h2 << 31) | (h2 >> 33) // ROTL64(h2, 31)
+		h2 += h1
+		h2 = h2*5 + 0x38495ab5
+	}
+
+	// tail
+	tail := data[nBlocks*16:]
+	k1 = 0
+	k2 = 0
+	switch length & 15 {
+	case 15:
+		k2 ^= uint64(tail[14]) << 48
+		fallthrough
+	case 14:
+		k2 ^= uint64(tail[13]) << 40
+		fallthrough
+	case 13:
+		k2 ^= uint64(tail[12]) << 32
+		fallthrough
+	case 12:
+		k2 ^= uint64(tail[11]) << 24
+		fallthrough
+	case 11:
+		k2 ^= uint64(tail[10]) << 16
+		fallthrough
+	case 10:
+		k2 ^= uint64(tail[9]) << 8
+		fallthrough
+	case 9:
+		k2 ^= uint64(tail[8])
+
+		k2 *= c2
+		k2 = (k2 << 33) | (k2 >> 31) // ROTL64(k2, 33)
+		k2 *= c1
+		h2 ^= k2
+
+		fallthrough
+	case 8:
+		k1 ^= uint64(tail[7]) << 56
+		fallthrough
+	case 7:
+		k1 ^= uint64(tail[6]) << 48
+		fallthrough
+	case 6:
+		k1 ^= uint64(tail[5]) << 40
+		fallthrough
+	case 5:
+		k1 ^= uint64(tail[4]) << 32
+		fallthrough
+	case 4:
+		k1 ^= uint64(tail[3]) << 24
+		fallthrough
+	case 3:
+		k1 ^= uint64(tail[2]) << 16
+		fallthrough
+	case 2:
+		k1 ^= uint64(tail[1]) << 8
+		fallthrough
+	case 1:
+		k1 ^= uint64(tail[0])
+
+		k1 *= c1
+		k1 = (k1 << 31) | (k1 >> 33) // ROTL64(k1, 31)
+		k1 *= c2
+		h1 ^= k1
+	}
+
+	h1 ^= uint64(length)
+	h2 ^= uint64(length)
+
+	h1 += h2
+	h2 += h1
+
+	// finalizer
+	const (
+		fmix1 = 0xff51afd7ed558ccd
+		fmix2 = 0xc4ceb9fe1a85ec53
+	)
+
+	// fmix64(h1)
+	h1 ^= h1 >> 33
+	h1 *= fmix1
+	h1 ^= h1 >> 33
+	h1 *= fmix2
+	h1 ^= h1 >> 33
+
+	// fmix64(h2)
+	h2 ^= h2 >> 33
+	h2 *= fmix1
+	h2 ^= h2 >> 33
+	h2 *= fmix2
+	h2 ^= h2 >> 33
+
+	h1 += h2
+	// the following is extraneous since h2 is discarded
+	// h2 += h1
+
+	return h1
+}

+ 70 - 0
murmur_test.go

@@ -0,0 +1,70 @@
+package gocql
+
+import (
+	"strconv"
+	"testing"
+)
+
+// Test the implementation of murmur3
+func TestMurmur3H1(t *testing.T) {
+	// these examples are based on adding a index number to a sample string in
+	// a loop. The expected values were generated by the java datastax murmur3
+	// implementation. The number of examples here of increasing lengths ensure
+	// test coverage of all tail-length branches in the murmur3 algorithm
+	seriesExpected := [...]uint64{
+		0x0000000000000000, // ""
+		0x2ac9debed546a380, // "0"
+		0x649e4eaa7fc1708e, // "01"
+		0xce68f60d7c353bdb, // "012"
+		0x0f95757ce7f38254, // "0123"
+		0x0f04e459497f3fc1, // "01234"
+		0x88c0a92586be0a27, // "012345"
+		0x13eb9fb82606f7a6, // "0123456"
+		0x8236039b7387354d, // "01234567"
+		0x4c1e87519fe738ba, // "012345678"
+		0x3f9652ac3effeb24, // "0123456789"
+		0x3f33760ded9006c6, // "01234567890"
+		0xaed70a6631854cb1, // "012345678901"
+		0x8a299a8f8e0e2da7, // "0123456789012"
+		0x624b675c779249a6, // "01234567890123"
+		0xa4b203bb1d90b9a3, // "012345678901234"
+		0xa3293ad698ecb99a, // "0123456789012345"
+		0xbc740023dbd50048, // "01234567890123456"
+		0x3fe5ab9837d25cdd, // "012345678901234567"
+		0x2d0338c1ca87d132, // "0123456789012345678"
+	}
+	sample := ""
+	for i, expected := range seriesExpected {
+		assertMurmur3H1(t, []byte(sample), expected)
+
+		sample = sample + strconv.Itoa(i%10)
+	}
+
+	// Here are some test examples from other driver implementations
+	assertMurmur3H1(t, []byte("hello"), 0xcbd8a7b341bd9b02)
+	assertMurmur3H1(t, []byte("hello, world"), 0x342fac623a5ebc8e)
+	assertMurmur3H1(t, []byte("19 Jan 2038 at 3:14:07 AM"), 0xb89e5988b737affc)
+	assertMurmur3H1(t, []byte("The quick brown fox jumps over the lazy dog."), 0xcd99481f9ee902c9)
+}
+
+// helper function for testing the murmur3 implementation
+func assertMurmur3H1(t *testing.T, data []byte, expected uint64) {
+	actual := murmur3H1(data)
+	if actual != expected {
+		t.Errorf("Expected h1 = %x for data = %x, but was %x", expected, data, actual)
+	}
+}
+
+// Benchmark of the performance of the murmur3 implementation
+func BenchmarkMurmur3H1(b *testing.B) {
+	var h1 uint64
+	var data [1024]byte
+	for i := 0; i < 1024; i++ {
+		data[i] = byte(i)
+	}
+	for i := 0; i < b.N; i++ {
+		b.ResetTimer()
+		h1 = murmur3H1(data[:])
+		_ = murmur3Token(int64(h1))
+	}
+}

+ 0 - 133
token.go

@@ -12,7 +12,6 @@ import (
 	"sort"
 	"strconv"
 	"strings"
-	"unsafe"
 )
 
 // a token partitioner
@@ -42,138 +41,6 @@ func (p murmur3Partitioner) Hash(partitionKey []byte) token {
 }
 
 // murmur3 little-endian, 128-bit hash, but returns only h1
-func murmur3H1(data []byte) uint64 {
-	length := len(data)
-
-	var h1, h2, k1, k2 uint64
-
-	const (
-		c1 = 0x87c37b91114253d5
-		c2 = 0x4cf5ad432745937f
-	)
-
-	// body
-	nBlocks := length / 16
-	for i := 0; i < nBlocks; i++ {
-		block := (*[2]uint64)(unsafe.Pointer(&data[i*16]))
-
-		k1 = block[0]
-		k2 = block[1]
-
-		k1 *= c1
-		k1 = (k1 << 31) | (k1 >> 33) // ROTL64(k1, 31)
-		k1 *= c2
-		h1 ^= k1
-
-		h1 = (h1 << 27) | (h1 >> 37) // ROTL64(h1, 27)
-		h1 += h2
-		h1 = h1*5 + 0x52dce729
-
-		k2 *= c2
-		k2 = (k2 << 33) | (k2 >> 31) // ROTL64(k2, 33)
-		k2 *= c1
-		h2 ^= k2
-
-		h2 = (h2 << 31) | (h2 >> 33) // ROTL64(h2, 31)
-		h2 += h1
-		h2 = h2*5 + 0x38495ab5
-	}
-
-	// tail
-	tail := data[nBlocks*16:]
-	k1 = 0
-	k2 = 0
-	switch length & 15 {
-	case 15:
-		k2 ^= uint64(tail[14]) << 48
-		fallthrough
-	case 14:
-		k2 ^= uint64(tail[13]) << 40
-		fallthrough
-	case 13:
-		k2 ^= uint64(tail[12]) << 32
-		fallthrough
-	case 12:
-		k2 ^= uint64(tail[11]) << 24
-		fallthrough
-	case 11:
-		k2 ^= uint64(tail[10]) << 16
-		fallthrough
-	case 10:
-		k2 ^= uint64(tail[9]) << 8
-		fallthrough
-	case 9:
-		k2 ^= uint64(tail[8])
-
-		k2 *= c2
-		k2 = (k2 << 33) | (k2 >> 31) // ROTL64(k2, 33)
-		k2 *= c1
-		h2 ^= k2
-
-		fallthrough
-	case 8:
-		k1 ^= uint64(tail[7]) << 56
-		fallthrough
-	case 7:
-		k1 ^= uint64(tail[6]) << 48
-		fallthrough
-	case 6:
-		k1 ^= uint64(tail[5]) << 40
-		fallthrough
-	case 5:
-		k1 ^= uint64(tail[4]) << 32
-		fallthrough
-	case 4:
-		k1 ^= uint64(tail[3]) << 24
-		fallthrough
-	case 3:
-		k1 ^= uint64(tail[2]) << 16
-		fallthrough
-	case 2:
-		k1 ^= uint64(tail[1]) << 8
-		fallthrough
-	case 1:
-		k1 ^= uint64(tail[0])
-
-		k1 *= c1
-		k1 = (k1 << 31) | (k1 >> 33) // ROTL64(k1, 31)
-		k1 *= c2
-		h1 ^= k1
-	}
-
-	h1 ^= uint64(length)
-	h2 ^= uint64(length)
-
-	h1 += h2
-	h2 += h1
-
-	// finalizer
-	const (
-		fmix1 = 0xff51afd7ed558ccd
-		fmix2 = 0xc4ceb9fe1a85ec53
-	)
-
-	// fmix64(h1)
-	h1 ^= h1 >> 33
-	h1 *= fmix1
-	h1 ^= h1 >> 33
-	h1 *= fmix2
-	h1 ^= h1 >> 33
-
-	// fmix64(h2)
-	h2 ^= h2 >> 33
-	h2 *= fmix1
-	h2 ^= h2 >> 33
-	h2 *= fmix2
-	h2 ^= h2 >> 33
-
-	h1 += h2
-	// the following is extraneous since h2 is discarded
-	// h2 += h1
-
-	return h1
-}
-
 func (p murmur3Partitioner) ParseString(str string) token {
 	val, _ := strconv.ParseInt(str, 10, 64)
 	return murmur3Token(val)

+ 0 - 64
token_test.go

@@ -12,70 +12,6 @@ import (
 	"testing"
 )
 
-// Test the implementation of murmur3
-func TestMurmur3H1(t *testing.T) {
-	// these examples are based on adding a index number to a sample string in
-	// a loop. The expected values were generated by the java datastax murmur3
-	// implementation. The number of examples here of increasing lengths ensure
-	// test coverage of all tail-length branches in the murmur3 algorithm
-	seriesExpected := [...]uint64{
-		0x0000000000000000, // ""
-		0x2ac9debed546a380, // "0"
-		0x649e4eaa7fc1708e, // "01"
-		0xce68f60d7c353bdb, // "012"
-		0x0f95757ce7f38254, // "0123"
-		0x0f04e459497f3fc1, // "01234"
-		0x88c0a92586be0a27, // "012345"
-		0x13eb9fb82606f7a6, // "0123456"
-		0x8236039b7387354d, // "01234567"
-		0x4c1e87519fe738ba, // "012345678"
-		0x3f9652ac3effeb24, // "0123456789"
-		0x3f33760ded9006c6, // "01234567890"
-		0xaed70a6631854cb1, // "012345678901"
-		0x8a299a8f8e0e2da7, // "0123456789012"
-		0x624b675c779249a6, // "01234567890123"
-		0xa4b203bb1d90b9a3, // "012345678901234"
-		0xa3293ad698ecb99a, // "0123456789012345"
-		0xbc740023dbd50048, // "01234567890123456"
-		0x3fe5ab9837d25cdd, // "012345678901234567"
-		0x2d0338c1ca87d132, // "0123456789012345678"
-	}
-	sample := ""
-	for i, expected := range seriesExpected {
-		assertMurmur3H1(t, []byte(sample), expected)
-
-		sample = sample + strconv.Itoa(i%10)
-	}
-
-	// Here are some test examples from other driver implementations
-	assertMurmur3H1(t, []byte("hello"), 0xcbd8a7b341bd9b02)
-	assertMurmur3H1(t, []byte("hello, world"), 0x342fac623a5ebc8e)
-	assertMurmur3H1(t, []byte("19 Jan 2038 at 3:14:07 AM"), 0xb89e5988b737affc)
-	assertMurmur3H1(t, []byte("The quick brown fox jumps over the lazy dog."), 0xcd99481f9ee902c9)
-}
-
-// helper function for testing the murmur3 implementation
-func assertMurmur3H1(t *testing.T, data []byte, expected uint64) {
-	actual := murmur3H1(data)
-	if actual != expected {
-		t.Errorf("Expected h1 = %x for data = %x, but was %x", expected, data, actual)
-	}
-}
-
-// Benchmark of the performance of the murmur3 implementation
-func BenchmarkMurmur3H1(b *testing.B) {
-	var h1 uint64
-	var data [1024]byte
-	for i := 0; i < 1024; i++ {
-		data[i] = byte(i)
-	}
-	for i := 0; i < b.N; i++ {
-		b.ResetTimer()
-		h1 = murmur3H1(data[:])
-		_ = murmur3Token(int64(h1))
-	}
-}
-
 // Tests of the murmur3Patitioner
 func TestMurmur3Partitioner(t *testing.T) {
 	token := murmur3Partitioner{}.ParseString("-1053604476080545076")