fixing some bugs, implementing the Bytes() methods and adding Mishmash128

This commit is contained in:
William Dillon 2025-05-27 20:03:25 -04:00
parent 45b33e8013
commit bea4366cf2
3 changed files with 140 additions and 8 deletions

View File

@ -1,2 +1,4 @@
# mishmash # mishmash
package mishmash implements the mishmash hash from Pete McNeil's [CodeDweller](https://code.microneil.com/madscientist/CodeDweller) using Go's hash.Hash32 and hash.Hash64 interfaces for simple idiomatic use in other Go programs. package mishmash implements the mishmash hash from Pete McNeil's [CodeDweller](https://code.microneil.com/madscientist/CodeDweller) using Go's hash.Hash32 and hash.Hash64 interfaces for simple idiomatic use in other Go programs.
It includes some tests to ensure the hashes produced by this package are the same as the version of CodeDweller linked above.

View File

@ -9,16 +9,19 @@ import (
) )
const ( const (
UINT32_RESULTS_MASK uint64 = 0x00000000FFFFFFFF UINT32_RESULTS_MASK uint64 = 0x00000000FFFFFFFF // used to mask the uint64 accumulator used by Mishmash
NUM_PRIMES = 0x100 NUM_PRIMES = 0x100 // sizeof PrimeSet
MISHMASH128_DEFAULT_LEFT, MISHMASH128_DEFAULT_RIGHT uint64 = 0, 0x8888888888888888 // default accumulator values used by 128-bit Mishmash
) )
type PrimesSet [NUM_PRIMES]uint32 type PrimesSet [NUM_PRIMES]uint32 // typedef for an array of uint32 prime numbers
func (p *PrimesSet) Select(n uint64) uint32 { func (p *PrimesSet) Select(n uint64) uint32 {
return (*p)[n&(NUM_PRIMES-1)] return (*p)[n&(NUM_PRIMES-1)]
} }
// allows loading a primeset from a file - useful for experimenting with new primes
func LoadPrimesSet(filename string) (PrimesSet, error) { func LoadPrimesSet(filename string) (PrimesSet, error) {
f, err := os.ReadFile(filename) f, err := os.ReadFile(filename)
if err != nil { if err != nil {
@ -39,6 +42,7 @@ func LoadPrimesSet(filename string) (PrimesSet, error) {
return results, nil return results, nil
} }
// the default PrimeSet used by the various Mishmash implementations
var ThePrimes = PrimesSet{ var ThePrimes = PrimesSet{
3825240899, 3652005211, 2966014067, 3432177659, 3109134187, 3139884271, 3108258589, 2277840529, 3825240899, 3652005211, 2966014067, 3432177659, 3109134187, 3139884271, 3108258589, 2277840529,
3748140223, 4206444373, 2684505017, 3883989821, 4076539213, 3880335997, 2603229667, 2358458953, 3748140223, 4206444373, 2684505017, 3883989821, 4076539213, 3880335997, 2603229667, 2358458953,
@ -74,6 +78,7 @@ var ThePrimes = PrimesSet{
3154883449, 3782386073, 3324965471, 4088422453, 3784508591, 3903657481, 3010059277, 2936392909, 3154883449, 3782386073, 3324965471, 4088422453, 3784508591, 3903657481, 3010059277, 2936392909,
} }
// the mishmash engine - allows use of any prime set to allow experimentation
func Engine(buffer []byte, length int, accumulator uint64, primes *PrimesSet) uint64 { func Engine(buffer []byte, length int, accumulator uint64, primes *PrimesSet) uint64 {
for _, b := range buffer[:length] { for _, b := range buffer[:length] {
accumulator1 := uint64(primes.Select(accumulator) + uint32(b)) accumulator1 := uint64(primes.Select(accumulator) + uint32(b))
@ -84,19 +89,33 @@ func Engine(buffer []byte, length int, accumulator uint64, primes *PrimesSet) ui
return accumulator return accumulator
} }
// general mishmash function - carried over from C++ for feature parity
func Mishmash(buffer []byte, length int) uint32 { func Mishmash(buffer []byte, length int) uint32 {
accumulator := Engine(buffer, length, 0, &ThePrimes) accumulator := Engine(buffer, length, 0, &ThePrimes)
return uint32(accumulator & UINT32_RESULTS_MASK) return uint32(accumulator & UINT32_RESULTS_MASK)
} }
// general mishmash function - carried over from C++ for feature parity
func MishmashString(buffer string) uint32 { func MishmashString(buffer string) uint32 {
return Mishmash([]byte(buffer), len(buffer)) return Mishmash([]byte(buffer), len(buffer))
} }
// *Mishmash32 implements the hash.Hash and hash.Hash32 interfaces
type Mishmash32 struct { type Mishmash32 struct {
accumulator uint64 accumulator uint64
} }
// returns the uint32 hash as a byte slice
func (m *Mishmash32) Bytes() []byte {
hash := m.Sum32()
return []byte{
byte((hash & 0xFF000000) >> 24),
byte((hash & 0x00FF0000) >> 16),
byte((hash & 0x0000FF00) >> 8),
byte((hash & 0x000000FF)),
}
}
func New32() hash.Hash32 { func New32() hash.Hash32 {
return &Mishmash32{} return &Mishmash32{}
} }
@ -104,20 +123,22 @@ func New32() hash.Hash32 {
// Sum appends the current hash to b and returns the resulting slice. // Sum appends the current hash to b and returns the resulting slice.
// It does not change the underlying hash state. // It does not change the underlying hash state.
func (m *Mishmash32) Sum(p []byte) []byte { func (m *Mishmash32) Sum(p []byte) []byte {
return nil return append(p, m.Bytes()...)
} }
// returns the current uint32 hash
func (m *Mishmash32) Sum32() uint32 { func (m *Mishmash32) Sum32() uint32 {
return uint32(m.accumulator & UINT32_RESULTS_MASK) return uint32(m.accumulator & UINT32_RESULTS_MASK)
} }
// reset accumulator
func (m *Mishmash32) Reset() { func (m *Mishmash32) Reset() {
m.accumulator = 0 m.accumulator = 0
} }
// returns the number of bytes Sum() will return // returns the number of bytes Sum() will return
func (m *Mishmash32) Size() int { func (m *Mishmash32) Size() int {
return 8 return 4
} }
// BlockSize returns the hash's underlying block size. // BlockSize returns the hash's underlying block size.
@ -135,10 +156,26 @@ func (m *Mishmash32) Write(p []byte) (int, error) {
return len(p), nil return len(p), nil
} }
// *Mishmash64 implements the hash.Hash and hash.Hash64 interfaces.
type Mishmash64 struct { type Mishmash64 struct {
accumulator uint64 accumulator uint64
} }
// returns the uint64 hash as a byte slice
func (m *Mishmash64) Bytes() []byte {
hash := m.accumulator
return []byte{
byte((hash & 0xFF00000000000000) >> 56),
byte((hash & 0x00FF000000000000) >> 48),
byte((hash & 0x0000FF0000000000) >> 40),
byte((hash & 0x000000FF00000000) >> 32),
byte((hash & 0x00000000FF000000) >> 24),
byte((hash & 0x0000000000FF0000) >> 16),
byte((hash & 0x000000000000FF00) >> 8),
byte((hash & 0x00000000000000FF)),
}
}
func New64() hash.Hash64 { func New64() hash.Hash64 {
return &Mishmash64{} return &Mishmash64{}
} }
@ -146,20 +183,22 @@ func New64() hash.Hash64 {
// Sum appends the current hash to b and returns the resulting slice. // Sum appends the current hash to b and returns the resulting slice.
// It does not change the underlying hash state. // It does not change the underlying hash state.
func (m *Mishmash64) Sum(p []byte) []byte { func (m *Mishmash64) Sum(p []byte) []byte {
return nil return append(p, m.Bytes()...)
} }
// returns the current uint64 hash
func (m *Mishmash64) Sum64() uint64 { func (m *Mishmash64) Sum64() uint64 {
return m.accumulator return m.accumulator
} }
// resets the accumulator to 0
func (m *Mishmash64) Reset() { func (m *Mishmash64) Reset() {
m.accumulator = 0 m.accumulator = 0
} }
// returns the number of bytes Sum() will return // returns the number of bytes Sum() will return
func (m *Mishmash64) Size() int { func (m *Mishmash64) Size() int {
return 16 return 8
} }
// BlockSize returns the hash's underlying block size. // BlockSize returns the hash's underlying block size.
@ -176,3 +215,50 @@ func (m *Mishmash64) Write(p []byte) (int, error) {
m.accumulator = Engine(p, len(p), m.accumulator, &ThePrimes) m.accumulator = Engine(p, len(p), m.accumulator, &ThePrimes)
return len(p), nil return len(p), nil
} }
// experimental and subject to change
// *Mishmash128 implements the hash.Hash interface
type Mishmash128 struct {
hash1, hash2 Mishmash64
}
func (m *Mishmash128) Bytes() []byte {
return append(m.hash1.Bytes(), m.hash2.Bytes()...)
}
func New128() hash.Hash {
hash := &Mishmash128{}
hash.Reset()
return hash
}
// Sum appends the current hash to b and returns the resulting slice.
// It does not change the underlying hash state.
func (m *Mishmash128) Sum(b []byte) []byte {
return append(b, m.Bytes()...)
}
func (m *Mishmash128) Reset() {
m.hash1.accumulator, m.hash2.accumulator = MISHMASH128_DEFAULT_LEFT, MISHMASH128_DEFAULT_RIGHT
}
// returns the number of bytes Sum() will return
func (m *Mishmash128) Size() int {
return 16
}
// BlockSize returns the hash's underlying block size.
// The Write method must be able to accept any amount
// of data, but it may operate more efficiently if all writes
// are a multiple of the block size.
func (m *Mishmash128) BlockSize() int {
return 1
}
// Write (via the embedded io.Writer interface) adds more data to the running hash.
// It never returns an error.
func (m *Mishmash128) Write(p []byte) (int, error) {
m.hash1.Write(p)
m.hash2.Write(p)
return len(p), nil
}

View File

@ -2,9 +2,11 @@ package mishmash
import ( import (
"bufio" "bufio"
"bytes"
"errors" "errors"
"fmt" "fmt"
"hash" "hash"
"math/big"
"math/rand" "math/rand"
"os" "os"
"strconv" "strconv"
@ -127,3 +129,45 @@ func BenchmarkMishmash64(b *testing.B) {
} }
} }
} }
func mask32(bits int) uint32 {
var t uint32
for range bits {
t <<= 1
t |= 1
}
for range 32 - bits {
t <<= 1
}
return t
}
func TestSum32(t *testing.T) {
hash, ok := New32().(*Mishmash32)
if !ok {
t.Fatalf("error reflecting hash.Hash to *Mishmash32\n")
}
s := []byte("hello world!")
hash.Write(s)
accumulator := hash.accumulator
if want, got := big.NewInt(int64(accumulator&UINT32_RESULTS_MASK)).Bytes(), hash.Bytes(); !bytes.Equal(want, got) {
t.Fatalf("error: wanted %v; got %v\n", want, got)
} else if want, got := append(s, hash.Bytes()...), hash.Sum(s); !bytes.Equal(want, got) {
t.Fatalf("error: wanted %v; got %v\n", want, got)
}
}
func TestSum64(t *testing.T) {
hash, ok := New64().(*Mishmash64)
if !ok {
t.Fatalf("error reflecting hash.Hash to *Mishmash64")
}
s := []byte("hello world!")
hash.Write(s)
accumulator := hash.accumulator
if want, got := big.NewInt(int64(accumulator)).Bytes(), hash.Bytes(); !bytes.Equal(want, got) {
t.Fatalf("error: wanted %v; got %v\n", want, got)
} else if want, got := append(s, hash.Bytes()...), hash.Sum(s); !bytes.Equal(want, got) {
t.Fatalf("error: wanted %v; got %v\n", want, got)
}
}