Explorar el Código

Add an implementation of murmurhash for appengine

Appengine cant use unsafe so it cant use the murmur implementation
add an implementation which does not use unsafe protected by the
appengine build flag.

fixes #457
Chris Bannister hace 10 años
padre
commit
0c3594747e
Se han modificado 5 ficheros con 346 adiciones y 197 borrados
  1. 139 0
      murmur.go
  2. 137 0
      murmur_appengine.go
  3. 70 0
      murmur_test.go
  4. 0 133
      token.go
  5. 0 64
      token_test.go

+ 139 - 0
murmur.go

@@ -0,0 +1,139 @@
+// +build !appengine
+
+package gocql
+
+import (
+	"unsafe"
+)
+
+func murmur3H1(data []byte) uint64 {
+	length := len(data)
+
+	var h1, h2, k1, k2 uint64
+
+	const (
+		c1 = 0x87c37b91114253d5
+		c2 = 0x4cf5ad432745937f
+	)
+
+	// body
+	nBlocks := length / 16
+	for i := 0; i < nBlocks; i++ {
+		block := (*[2]uint64)(unsafe.Pointer(&data[i*16]))
+
+		k1 = block[0]
+		k2 = block[1]
+
+		k1 *= c1
+		k1 = (k1 << 31) | (k1 >> 33) // ROTL64(k1, 31)
+		k1 *= c2
+		h1 ^= k1
+
+		h1 = (h1 << 27) | (h1 >> 37) // ROTL64(h1, 27)
+		h1 += h2
+		h1 = h1*5 + 0x52dce729
+
+		k2 *= c2
+		k2 = (k2 << 33) | (k2 >> 31) // ROTL64(k2, 33)
+		k2 *= c1
+		h2 ^= k2
+
+		h2 = (h2 << 31) | (h2 >> 33) // ROTL64(h2, 31)
+		h2 += h1
+		h2 = h2*5 + 0x38495ab5
+	}
+
+	// tail
+	tail := data[nBlocks*16:]
+	k1 = 0
+	k2 = 0
+	switch length & 15 {
+	case 15:
+		k2 ^= uint64(tail[14]) << 48
+		fallthrough
+	case 14:
+		k2 ^= uint64(tail[13]) << 40
+		fallthrough
+	case 13:
+		k2 ^= uint64(tail[12]) << 32
+		fallthrough
+	case 12:
+		k2 ^= uint64(tail[11]) << 24
+		fallthrough
+	case 11:
+		k2 ^= uint64(tail[10]) << 16
+		fallthrough
+	case 10:
+		k2 ^= uint64(tail[9]) << 8
+		fallthrough
+	case 9:
+		k2 ^= uint64(tail[8])
+
+		k2 *= c2
+		k2 = (k2 << 33) | (k2 >> 31) // ROTL64(k2, 33)
+		k2 *= c1
+		h2 ^= k2
+
+		fallthrough
+	case 8:
+		k1 ^= uint64(tail[7]) << 56
+		fallthrough
+	case 7:
+		k1 ^= uint64(tail[6]) << 48
+		fallthrough
+	case 6:
+		k1 ^= uint64(tail[5]) << 40
+		fallthrough
+	case 5:
+		k1 ^= uint64(tail[4]) << 32
+		fallthrough
+	case 4:
+		k1 ^= uint64(tail[3]) << 24
+		fallthrough
+	case 3:
+		k1 ^= uint64(tail[2]) << 16
+		fallthrough
+	case 2:
+		k1 ^= uint64(tail[1]) << 8
+		fallthrough
+	case 1:
+		k1 ^= uint64(tail[0])
+
+		k1 *= c1
+		k1 = (k1 << 31) | (k1 >> 33) // ROTL64(k1, 31)
+		k1 *= c2
+		h1 ^= k1
+	}
+
+	h1 ^= uint64(length)
+	h2 ^= uint64(length)
+
+	h1 += h2
+	h2 += h1
+
+	// finalizer
+	const (
+		fmix1 = 0xff51afd7ed558ccd
+		fmix2 = 0xc4ceb9fe1a85ec53
+	)
+
+	// fmix64(h1)
+	h1 ^= h1 >> 33
+	h1 *= fmix1
+	h1 ^= h1 >> 33
+	h1 *= fmix2
+	h1 ^= h1 >> 33
+
+	// fmix64(h2)
+	h2 ^= h2 >> 33
+	h2 *= fmix1
+	h2 ^= h2 >> 33
+	h2 *= fmix2
+	h2 ^= h2 >> 33
+
+	h1 += h2
+	// the following is extraneous since h2 is discarded
+	// h2 += h1
+
+	return h1
+}

+ 137 - 0
murmur_appengine.go

@@ -0,0 +1,137 @@
+// +build appengine
+
+package gocql
+
+import "encoding/binary"
+
+func murmur3H1(data []byte) uint64 {
+	length := len(data)
+
+	var h1, h2, k1, k2 uint64
+
+	const (
+		c1 = 0x87c37b91114253d5
+		c2 = 0x4cf5ad432745937f
+	)
+
+	// body
+	nBlocks := length / 16
+	for i := 0; i < nBlocks; i++ {
+		// block := (*[2]uint64)(unsafe.Pointer(&data[i*16]))
+
+		k1 = binary.LittleEndian.Uint64(data[i*16:])
+		k2 = binary.LittleEndian.Uint64(data[(i*16)+8:])
+
+		k1 *= c1
+		k1 = (k1 << 31) | (k1 >> 33) // ROTL64(k1, 31)
+		k1 *= c2
+		h1 ^= k1
+
+		h1 = (h1 << 27) | (h1 >> 37) // ROTL64(h1, 27)
+		h1 += h2
+		h1 = h1*5 + 0x52dce729
+
+		k2 *= c2
+		k2 = (k2 << 33) | (k2 >> 31) // ROTL64(k2, 33)
+		k2 *= c1
+		h2 ^= k2
+
+		h2 = (h2 << 31) | (h2 >> 33) // ROTL64(h2, 31)
+		h2 += h1
+		h2 = h2*5 + 0x38495ab5
+	}
+
+	// tail
+	tail := data[nBlocks*16:]
+	k1 = 0
+	k2 = 0
+	switch length & 15 {
+	case 15:
+		k2 ^= uint64(tail[14]) << 48
+		fallthrough
+	case 14:
+		k2 ^= uint64(tail[13]) << 40
+		fallthrough
+	case 13:
+		k2 ^= uint64(tail[12]) << 32
+		fallthrough
+	case 12:
+		k2 ^= uint64(tail[11]) << 24
+		fallthrough
+	case 11:
+		k2 ^= uint64(tail[10]) << 16
+		fallthrough
+	case 10:
+		k2 ^= uint64(tail[9]) << 8
+		fallthrough
+	case 9:
+		k2 ^= uint64(tail[8])
+
+		k2 *= c2
+		k2 = (k2 << 33) | (k2 >> 31) // ROTL64(k2, 33)
+		k2 *= c1
+		h2 ^= k2
+
+		fallthrough
+	case 8:
+		k1 ^= uint64(tail[7]) << 56
+		fallthrough
+	case 7:
+		k1 ^= uint64(tail[6]) << 48
+		fallthrough
+	case 6:
+		k1 ^= uint64(tail[5]) << 40
+		fallthrough
+	case 5:
+		k1 ^= uint64(tail[4]) << 32
+		fallthrough
+	case 4:
+		k1 ^= uint64(tail[3]) << 24
+		fallthrough
+	case 3:
+		k1 ^= uint64(tail[2]) << 16
+		fallthrough
+	case 2:
+		k1 ^= uint64(tail[1]) << 8
+		fallthrough
+	case 1:
+		k1 ^= uint64(tail[0])
+
+		k1 *= c1
+		k1 = (k1 << 31) | (k1 >> 33) // ROTL64(k1, 31)
+		k1 *= c2
+		h1 ^= k1
+	}
+
+	h1 ^= uint64(length)
+	h2 ^= uint64(length)
+
+	h1 += h2
+	h2 += h1
+
+	// finalizer
+	const (
+		fmix1 = 0xff51afd7ed558ccd
+		fmix2 = 0xc4ceb9fe1a85ec53
+	)
+
+	// fmix64(h1)
+	h1 ^= h1 >> 33
+	h1 *= fmix1
+	h1 ^= h1 >> 33
+	h1 *= fmix2
+	h1 ^= h1 >> 33
+
+	// fmix64(h2)
+	h2 ^= h2 >> 33
+	h2 *= fmix1
+	h2 ^= h2 >> 33
+	h2 *= fmix2
+	h2 ^= h2 >> 33
+
+	h1 += h2
+	// the following is extraneous since h2 is discarded
+	// h2 += h1
+
+	return h1
+}

+ 70 - 0
murmur_test.go

@@ -0,0 +1,70 @@
+package gocql
+
+import (
+	"strconv"
+	"testing"
+)
+
+// Test the implementation of murmur3
+func TestMurmur3H1(t *testing.T) {
+	// these examples are based on adding a index number to a sample string in
+	// a loop. The expected values were generated by the java datastax murmur3
+	// implementation. The number of examples here of increasing lengths ensure
+	// test coverage of all tail-length branches in the murmur3 algorithm
+	seriesExpected := [...]uint64{
+		0x0000000000000000, // ""
+		0x2ac9debed546a380, // "0"
+		0x649e4eaa7fc1708e, // "01"
+		0xce68f60d7c353bdb, // "012"
+		0x0f95757ce7f38254, // "0123"
+		0x0f04e459497f3fc1, // "01234"
+		0x88c0a92586be0a27, // "012345"
+		0x13eb9fb82606f7a6, // "0123456"
+		0x8236039b7387354d, // "01234567"
+		0x4c1e87519fe738ba, // "012345678"
+		0x3f9652ac3effeb24, // "0123456789"
+		0x3f33760ded9006c6, // "01234567890"
+		0xaed70a6631854cb1, // "012345678901"
+		0x8a299a8f8e0e2da7, // "0123456789012"
+		0x624b675c779249a6, // "01234567890123"
+		0xa4b203bb1d90b9a3, // "012345678901234"
+		0xa3293ad698ecb99a, // "0123456789012345"
+		0xbc740023dbd50048, // "01234567890123456"
+		0x3fe5ab9837d25cdd, // "012345678901234567"
+		0x2d0338c1ca87d132, // "0123456789012345678"
+	}
+	sample := ""
+	for i, expected := range seriesExpected {
+		assertMurmur3H1(t, []byte(sample), expected)
+
+		sample = sample + strconv.Itoa(i%10)
+	}
+
+	// Here are some test examples from other driver implementations
+	assertMurmur3H1(t, []byte("hello"), 0xcbd8a7b341bd9b02)
+	assertMurmur3H1(t, []byte("hello, world"), 0x342fac623a5ebc8e)
+	assertMurmur3H1(t, []byte("19 Jan 2038 at 3:14:07 AM"), 0xb89e5988b737affc)
+	assertMurmur3H1(t, []byte("The quick brown fox jumps over the lazy dog."), 0xcd99481f9ee902c9)
+}
+
+// helper function for testing the murmur3 implementation
+func assertMurmur3H1(t *testing.T, data []byte, expected uint64) {
+	actual := murmur3H1(data)
+	if actual != expected {
+		t.Errorf("Expected h1 = %x for data = %x, but was %x", expected, data, actual)
+	}
+}
+
+// Benchmark of the performance of the murmur3 implementation
+func BenchmarkMurmur3H1(b *testing.B) {
+	var h1 uint64
+	var data [1024]byte
+	for i := 0; i < 1024; i++ {
+		data[i] = byte(i)
+	}
+	for i := 0; i < b.N; i++ {
+		b.ResetTimer()
+		h1 = murmur3H1(data[:])
+		_ = murmur3Token(int64(h1))
+	}
+}

+ 0 - 133
token.go

@@ -12,7 +12,6 @@ import (
 	"sort"
 	"strconv"
 	"strings"
-	"unsafe"
 )
 
 // a token partitioner
@@ -42,138 +41,6 @@ func (p murmur3Partitioner) Hash(partitionKey []byte) token {
 }
 
 // murmur3 little-endian, 128-bit hash, but returns only h1
-func murmur3H1(data []byte) uint64 {
-	length := len(data)
-
-	var h1, h2, k1, k2 uint64
-
-	const (
-		c1 = 0x87c37b91114253d5
-		c2 = 0x4cf5ad432745937f
-	)
-
-	// body
-	nBlocks := length / 16
-	for i := 0; i < nBlocks; i++ {
-		block := (*[2]uint64)(unsafe.Pointer(&data[i*16]))
-
-		k1 = block[0]
-		k2 = block[1]
-
-		k1 *= c1
-		k1 = (k1 << 31) | (k1 >> 33) // ROTL64(k1, 31)
-		k1 *= c2
-		h1 ^= k1
-
-		h1 = (h1 << 27) | (h1 >> 37) // ROTL64(h1, 27)
-		h1 += h2
-		h1 = h1*5 + 0x52dce729
-
-		k2 *= c2
-		k2 = (k2 << 33) | (k2 >> 31) // ROTL64(k2, 33)
-		k2 *= c1
-		h2 ^= k2
-
-		h2 = (h2 << 31) | (h2 >> 33) // ROTL64(h2, 31)
-		h2 += h1
-		h2 = h2*5 + 0x38495ab5
-	}
-
-	// tail
-	tail := data[nBlocks*16:]
-	k1 = 0
-	k2 = 0
-	switch length & 15 {
-	case 15:
-		k2 ^= uint64(tail[14]) << 48
-		fallthrough
-	case 14:
-		k2 ^= uint64(tail[13]) << 40
-		fallthrough
-	case 13:
-		k2 ^= uint64(tail[12]) << 32
-		fallthrough
-	case 12:
-		k2 ^= uint64(tail[11]) << 24
-		fallthrough
-	case 11:
-		k2 ^= uint64(tail[10]) << 16
-		fallthrough
-	case 10:
-		k2 ^= uint64(tail[9]) << 8
-		fallthrough
-	case 9:
-		k2 ^= uint64(tail[8])
-
-		k2 *= c2
-		k2 = (k2 << 33) | (k2 >> 31) // ROTL64(k2, 33)
-		k2 *= c1
-		h2 ^= k2
-
-		fallthrough
-	case 8:
-		k1 ^= uint64(tail[7]) << 56
-		fallthrough
-	case 7:
-		k1 ^= uint64(tail[6]) << 48
-		fallthrough
-	case 6:
-		k1 ^= uint64(tail[5]) << 40
-		fallthrough
-	case 5:
-		k1 ^= uint64(tail[4]) << 32
-		fallthrough
-	case 4:
-		k1 ^= uint64(tail[3]) << 24
-		fallthrough
-	case 3:
-		k1 ^= uint64(tail[2]) << 16
-		fallthrough
-	case 2:
-		k1 ^= uint64(tail[1]) << 8
-		fallthrough
-	case 1:
-		k1 ^= uint64(tail[0])
-
-		k1 *= c1
-		k1 = (k1 << 31) | (k1 >> 33) // ROTL64(k1, 31)
-		k1 *= c2
-		h1 ^= k1
-	}
-
-	h1 ^= uint64(length)
-	h2 ^= uint64(length)
-
-	h1 += h2
-	h2 += h1
-
-	// finalizer
-	const (
-		fmix1 = 0xff51afd7ed558ccd
-		fmix2 = 0xc4ceb9fe1a85ec53
-	)
-
-	// fmix64(h1)
-	h1 ^= h1 >> 33
-	h1 *= fmix1
-	h1 ^= h1 >> 33
-	h1 *= fmix2
-	h1 ^= h1 >> 33
-
-	// fmix64(h2)
-	h2 ^= h2 >> 33
-	h2 *= fmix1
-	h2 ^= h2 >> 33
-	h2 *= fmix2
-	h2 ^= h2 >> 33
-
-	h1 += h2
-	// the following is extraneous since h2 is discarded
-	// h2 += h1
-
-	return h1
-}
-
 func (p murmur3Partitioner) ParseString(str string) token {
 	val, _ := strconv.ParseInt(str, 10, 64)
 	return murmur3Token(val)

+ 0 - 64
token_test.go

@@ -12,70 +12,6 @@ import (
 	"testing"
 )
 
-// Test the implementation of murmur3
-func TestMurmur3H1(t *testing.T) {
-	// these examples are based on adding a index number to a sample string in
-	// a loop. The expected values were generated by the java datastax murmur3
-	// implementation. The number of examples here of increasing lengths ensure
-	// test coverage of all tail-length branches in the murmur3 algorithm
-	seriesExpected := [...]uint64{
-		0x0000000000000000, // ""
-		0x2ac9debed546a380, // "0"
-		0x649e4eaa7fc1708e, // "01"
-		0xce68f60d7c353bdb, // "012"
-		0x0f95757ce7f38254, // "0123"
-		0x0f04e459497f3fc1, // "01234"
-		0x88c0a92586be0a27, // "012345"
-		0x13eb9fb82606f7a6, // "0123456"
-		0x8236039b7387354d, // "01234567"
-		0x4c1e87519fe738ba, // "012345678"
-		0x3f9652ac3effeb24, // "0123456789"
-		0x3f33760ded9006c6, // "01234567890"
-		0xaed70a6631854cb1, // "012345678901"
-		0x8a299a8f8e0e2da7, // "0123456789012"
-		0x624b675c779249a6, // "01234567890123"
-		0xa4b203bb1d90b9a3, // "012345678901234"
-		0xa3293ad698ecb99a, // "0123456789012345"
-		0xbc740023dbd50048, // "01234567890123456"
-		0x3fe5ab9837d25cdd, // "012345678901234567"
-		0x2d0338c1ca87d132, // "0123456789012345678"
-	}
-	sample := ""
-	for i, expected := range seriesExpected {
-		assertMurmur3H1(t, []byte(sample), expected)
-
-		sample = sample + strconv.Itoa(i%10)
-	}
-
-	// Here are some test examples from other driver implementations
-	assertMurmur3H1(t, []byte("hello"), 0xcbd8a7b341bd9b02)
-	assertMurmur3H1(t, []byte("hello, world"), 0x342fac623a5ebc8e)
-	assertMurmur3H1(t, []byte("19 Jan 2038 at 3:14:07 AM"), 0xb89e5988b737affc)
-	assertMurmur3H1(t, []byte("The quick brown fox jumps over the lazy dog."), 0xcd99481f9ee902c9)
-}
-
-// helper function for testing the murmur3 implementation
-func assertMurmur3H1(t *testing.T, data []byte, expected uint64) {
-	actual := murmur3H1(data)
-	if actual != expected {
-		t.Errorf("Expected h1 = %x for data = %x, but was %x", expected, data, actual)
-	}
-}
-
-// Benchmark of the performance of the murmur3 implementation
-func BenchmarkMurmur3H1(b *testing.B) {
-	var h1 uint64
-	var data [1024]byte
-	for i := 0; i < 1024; i++ {
-		data[i] = byte(i)
-	}
-	for i := 0; i < b.N; i++ {
-		b.ResetTimer()
-		h1 = murmur3H1(data[:])
-		_ = murmur3Token(int64(h1))
-	}
-}
-
 // Tests of the murmur3Patitioner
 func TestMurmur3Partitioner(t *testing.T) {
 	token := murmur3Partitioner{}.ParseString("-1053604476080545076")