From bea4366cf287430b554e93e65edc58ea4cfe812e Mon Sep 17 00:00:00 2001 From: William Dillon Date: Tue, 27 May 2025 20:03:25 -0400 Subject: [PATCH] fixing some bugs, implementing the Bytes() methods and adding Mishmash128 --- README.md | 4 +- mishmash.go | 100 +++++++++++++++++++++++++++++++++++++++++++---- mishmash_test.go | 44 +++++++++++++++++++++ 3 files changed, 140 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 9ac72cd..17c5d87 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,4 @@ # mishmash -package mishmash implements the mishmash hash from Pete McNeil's [CodeDweller](https://code.microneil.com/madscientist/CodeDweller) using Go's hash.Hash32 and hash.Hash64 interfaces for simple idiomatic use in other Go programs. \ No newline at end of file +package mishmash implements the mishmash hash from Pete McNeil's [CodeDweller](https://code.microneil.com/madscientist/CodeDweller) using Go's hash.Hash32 and hash.Hash64 interfaces for simple idiomatic use in other Go programs. + +It includes some tests to ensure the hashes produced by this package are the same as the version of CodeDweller linked above. \ No newline at end of file diff --git a/mishmash.go b/mishmash.go index f64560d..cc7707b 100644 --- a/mishmash.go +++ b/mishmash.go @@ -9,16 +9,19 @@ import ( ) const ( - UINT32_RESULTS_MASK uint64 = 0x00000000FFFFFFFF - NUM_PRIMES = 0x100 + UINT32_RESULTS_MASK uint64 = 0x00000000FFFFFFFF // used to mask the uint64 accumulator used by Mishmash + NUM_PRIMES = 0x100 // sizeof PrimeSet + + MISHMASH128_DEFAULT_LEFT, MISHMASH128_DEFAULT_RIGHT uint64 = 0, 0x8888888888888888 // default accumulator values used by 128-bit Mishmash ) -type PrimesSet [NUM_PRIMES]uint32 +type PrimesSet [NUM_PRIMES]uint32 // typedef for an array of uint32 prime numbers func (p *PrimesSet) Select(n uint64) uint32 { return (*p)[n&(NUM_PRIMES-1)] } +// allows loading a primeset from a file - useful for experimenting with new primes func LoadPrimesSet(filename string) (PrimesSet, error) { f, err := os.ReadFile(filename) if err != nil { @@ -39,6 +42,7 @@ func LoadPrimesSet(filename string) (PrimesSet, error) { return results, nil } +// the default PrimeSet used by the various Mishmash implementations var ThePrimes = PrimesSet{ 3825240899, 3652005211, 2966014067, 3432177659, 3109134187, 3139884271, 3108258589, 2277840529, 3748140223, 4206444373, 2684505017, 3883989821, 4076539213, 3880335997, 2603229667, 2358458953, @@ -74,6 +78,7 @@ var ThePrimes = PrimesSet{ 3154883449, 3782386073, 3324965471, 4088422453, 3784508591, 3903657481, 3010059277, 2936392909, } +// the mishmash engine - allows use of any prime set to allow experimentation func Engine(buffer []byte, length int, accumulator uint64, primes *PrimesSet) uint64 { for _, b := range buffer[:length] { accumulator1 := uint64(primes.Select(accumulator) + uint32(b)) @@ -84,19 +89,33 @@ func Engine(buffer []byte, length int, accumulator uint64, primes *PrimesSet) ui return accumulator } +// general mishmash function - carried over from C++ for feature parity func Mishmash(buffer []byte, length int) uint32 { accumulator := Engine(buffer, length, 0, &ThePrimes) return uint32(accumulator & UINT32_RESULTS_MASK) } +// general mishmash function - carried over from C++ for feature parity func MishmashString(buffer string) uint32 { return Mishmash([]byte(buffer), len(buffer)) } +// *Mishmash32 implements the hash.Hash and hash.Hash32 interfaces type Mishmash32 struct { accumulator uint64 } +// returns the uint32 hash as a byte slice +func (m *Mishmash32) Bytes() []byte { + hash := m.Sum32() + return []byte{ + byte((hash & 0xFF000000) >> 24), + byte((hash & 0x00FF0000) >> 16), + byte((hash & 0x0000FF00) >> 8), + byte((hash & 0x000000FF)), + } +} + func New32() hash.Hash32 { return &Mishmash32{} } @@ -104,20 +123,22 @@ func New32() hash.Hash32 { // Sum appends the current hash to b and returns the resulting slice. // It does not change the underlying hash state. func (m *Mishmash32) Sum(p []byte) []byte { - return nil + return append(p, m.Bytes()...) } +// returns the current uint32 hash func (m *Mishmash32) Sum32() uint32 { return uint32(m.accumulator & UINT32_RESULTS_MASK) } +// reset accumulator func (m *Mishmash32) Reset() { m.accumulator = 0 } // returns the number of bytes Sum() will return func (m *Mishmash32) Size() int { - return 8 + return 4 } // BlockSize returns the hash's underlying block size. @@ -135,10 +156,26 @@ func (m *Mishmash32) Write(p []byte) (int, error) { return len(p), nil } +// *Mishmash64 implements the hash.Hash and hash.Hash64 interfaces. type Mishmash64 struct { accumulator uint64 } +// returns the uint64 hash as a byte slice +func (m *Mishmash64) Bytes() []byte { + hash := m.accumulator + return []byte{ + byte((hash & 0xFF00000000000000) >> 56), + byte((hash & 0x00FF000000000000) >> 48), + byte((hash & 0x0000FF0000000000) >> 40), + byte((hash & 0x000000FF00000000) >> 32), + byte((hash & 0x00000000FF000000) >> 24), + byte((hash & 0x0000000000FF0000) >> 16), + byte((hash & 0x000000000000FF00) >> 8), + byte((hash & 0x00000000000000FF)), + } +} + func New64() hash.Hash64 { return &Mishmash64{} } @@ -146,20 +183,22 @@ func New64() hash.Hash64 { // Sum appends the current hash to b and returns the resulting slice. // It does not change the underlying hash state. func (m *Mishmash64) Sum(p []byte) []byte { - return nil + return append(p, m.Bytes()...) } +// returns the current uint64 hash func (m *Mishmash64) Sum64() uint64 { return m.accumulator } +// resets the accumulator to 0 func (m *Mishmash64) Reset() { m.accumulator = 0 } // returns the number of bytes Sum() will return func (m *Mishmash64) Size() int { - return 16 + return 8 } // BlockSize returns the hash's underlying block size. @@ -176,3 +215,50 @@ func (m *Mishmash64) Write(p []byte) (int, error) { m.accumulator = Engine(p, len(p), m.accumulator, &ThePrimes) return len(p), nil } + +// experimental and subject to change +// *Mishmash128 implements the hash.Hash interface +type Mishmash128 struct { + hash1, hash2 Mishmash64 +} + +func (m *Mishmash128) Bytes() []byte { + return append(m.hash1.Bytes(), m.hash2.Bytes()...) +} + +func New128() hash.Hash { + hash := &Mishmash128{} + hash.Reset() + return hash +} + +// Sum appends the current hash to b and returns the resulting slice. +// It does not change the underlying hash state. +func (m *Mishmash128) Sum(b []byte) []byte { + return append(b, m.Bytes()...) +} + +func (m *Mishmash128) Reset() { + m.hash1.accumulator, m.hash2.accumulator = MISHMASH128_DEFAULT_LEFT, MISHMASH128_DEFAULT_RIGHT +} + +// returns the number of bytes Sum() will return +func (m *Mishmash128) Size() int { + return 16 +} + +// BlockSize returns the hash's underlying block size. +// The Write method must be able to accept any amount +// of data, but it may operate more efficiently if all writes +// are a multiple of the block size. +func (m *Mishmash128) BlockSize() int { + return 1 +} + +// Write (via the embedded io.Writer interface) adds more data to the running hash. +// It never returns an error. +func (m *Mishmash128) Write(p []byte) (int, error) { + m.hash1.Write(p) + m.hash2.Write(p) + return len(p), nil +} diff --git a/mishmash_test.go b/mishmash_test.go index fb8271e..2a4e24e 100644 --- a/mishmash_test.go +++ b/mishmash_test.go @@ -2,9 +2,11 @@ package mishmash import ( "bufio" + "bytes" "errors" "fmt" "hash" + "math/big" "math/rand" "os" "strconv" @@ -127,3 +129,45 @@ func BenchmarkMishmash64(b *testing.B) { } } } + +func mask32(bits int) uint32 { + var t uint32 + for range bits { + t <<= 1 + t |= 1 + } + for range 32 - bits { + t <<= 1 + } + return t +} + +func TestSum32(t *testing.T) { + hash, ok := New32().(*Mishmash32) + if !ok { + t.Fatalf("error reflecting hash.Hash to *Mishmash32\n") + } + s := []byte("hello world!") + hash.Write(s) + accumulator := hash.accumulator + if want, got := big.NewInt(int64(accumulator&UINT32_RESULTS_MASK)).Bytes(), hash.Bytes(); !bytes.Equal(want, got) { + t.Fatalf("error: wanted %v; got %v\n", want, got) + } else if want, got := append(s, hash.Bytes()...), hash.Sum(s); !bytes.Equal(want, got) { + t.Fatalf("error: wanted %v; got %v\n", want, got) + } +} + +func TestSum64(t *testing.T) { + hash, ok := New64().(*Mishmash64) + if !ok { + t.Fatalf("error reflecting hash.Hash to *Mishmash64") + } + s := []byte("hello world!") + hash.Write(s) + accumulator := hash.accumulator + if want, got := big.NewInt(int64(accumulator)).Bytes(), hash.Bytes(); !bytes.Equal(want, got) { + t.Fatalf("error: wanted %v; got %v\n", want, got) + } else if want, got := append(s, hash.Bytes()...), hash.Sum(s); !bytes.Equal(want, got) { + t.Fatalf("error: wanted %v; got %v\n", want, got) + } +}