mishmash/mishmash.go

265 lines
9.4 KiB
Go
Raw Normal View History

2025-05-25 16:05:35 -04:00
package mishmash
import (
"fmt"
2025-05-26 19:16:56 -04:00
"hash"
2025-05-25 16:05:35 -04:00
"os"
"strconv"
"strings"
)
const (
UINT32_RESULTS_MASK uint64 = 0x00000000FFFFFFFF // used to mask the uint64 accumulator used by Mishmash
NUM_PRIMES = 0x100 // sizeof PrimeSet
MISHMASH128_DEFAULT_LEFT, MISHMASH128_DEFAULT_RIGHT uint64 = 0, 0x8888888888888888 // default accumulator values used by 128-bit Mishmash
2025-05-25 16:05:35 -04:00
)
type PrimesSet [NUM_PRIMES]uint32 // typedef for an array of uint32 prime numbers
2025-05-25 16:05:35 -04:00
func (p *PrimesSet) Select(n uint64) uint32 {
return (*p)[n&(NUM_PRIMES-1)]
}
// allows loading a primeset from a file - useful for experimenting with new primes
2025-05-25 16:05:35 -04:00
func LoadPrimesSet(filename string) (PrimesSet, error) {
f, err := os.ReadFile(filename)
if err != nil {
return PrimesSet{}, fmt.Errorf("error reading %s: %w", filename, err)
}
fields := strings.Fields(string(f))
if want, got := NUM_PRIMES, len(fields); want != got {
return PrimesSet{}, fmt.Errorf("error: requires %d primes (got %d)", want, got)
}
results := PrimesSet{}
for i, s := range fields {
if n, err := strconv.ParseUint(s, 10, 32); err != nil {
return PrimesSet{}, fmt.Errorf("error parsing %s: %w", s, err)
} else {
results[i] = uint32(n)
}
}
return results, nil
}
// the default PrimeSet used by the various Mishmash implementations
2025-05-25 16:05:35 -04:00
var ThePrimes = PrimesSet{
3825240899, 3652005211, 2966014067, 3432177659, 3109134187, 3139884271, 3108258589, 2277840529,
3748140223, 4206444373, 2684505017, 3883989821, 4076539213, 3880335997, 2603229667, 2358458953,
4061135443, 3826856503, 2671898833, 3266747401, 3403611587, 2483486933, 3889003891, 2820911959,
2318077829, 3470930861, 3231587809, 3225029887, 4123396483, 3422817119, 3612514831, 2170177423,
3058754837, 3000926393, 2825656217, 3387930461, 3532314017, 3245479361, 3466327211, 4080294503,
4252034179, 2302986211, 3394476707, 3697851029, 3957195257, 2862308587, 4285266071, 3681357247,
3157577413, 3839398561, 3097979117, 3590787463, 3354450497, 3110291879, 3938796493, 3196834463,
2374254481, 2702597567, 3046228397, 3461690719, 2641445467, 2401060583, 2483505539, 2775297373,
2262447391, 3118976533, 3014355683, 3355176449, 4055753381, 2277045713, 3098402119, 3894957487,
2770620887, 4125228329, 2575044467, 4162428989, 3294651817, 2308925797, 3698223103, 2150023273,
3075614681, 2410764047, 3624889381, 3264455489, 3241969651, 3001767217, 3407799859, 2998917373,
2629826653, 2714272271, 3987786247, 2880807353, 3608804803, 2231694917, 3790372403, 4156893413,
2563320007, 2423350621, 2735169119, 4021079791, 4150641413, 2907916357, 3772971647, 2481168307,
2842943119, 2234753693, 3966637117, 2732029457, 3207475039, 3533605151, 2349367747, 3336108011,
2431060103, 2263416899, 2350941683, 3869512277, 3880987697, 3062735029, 2512894603, 3669845519,
2235487739, 3201016501, 2438124943, 4170458909, 2938134889, 4231610087, 3187120061, 2378420137,
3365835877, 3078766697, 3704906059, 3541986781, 3969072823, 3510542281, 2306290751, 3898737419,
2898069347, 4092904481, 2484285403, 2721169823, 4293617527, 2928584759, 2213966141, 2335957513,
3367371923, 2965261109, 4175805451, 3541995157, 2964065479, 3997902791, 3053542259, 2168926237,
3253268639, 2620083509, 3314283407, 3873087809, 2636771209, 2737638653, 3209154931, 3414204793,
3451689091, 2638985941, 2899591693, 2654878441, 2748067627, 3395485733, 2679070523, 3100687721,
2520033701, 2980087373, 2873947007, 2565436501, 2400053783, 4163039563, 3517993571, 4263192407,
3385597069, 2768101117, 3502890653, 3092130347, 3748553827, 4109944849, 2418961109, 3398621741,
3073383031, 2167592489, 2950739053, 3529429811, 3167420899, 4254703357, 3344014309, 3725480141,
3745944539, 3456003191, 2832137237, 4202217191, 3730577581, 2837794231, 2155546451, 2539211039,
2256984649, 2458975411, 2986340839, 3412432363, 3596817463, 2973444983, 2409734297, 3273292601,
3302556869, 3630727567, 3670056499, 3300959521, 3949319809, 3047032057, 3412226563, 2147483647,
2914045411, 2882644273, 4065606553, 2735903059, 3195020617, 3887229457, 3232900987, 3409357867,
3037985513, 3162012463, 3340137193, 2186608547, 4018093523, 4153387103, 2566863161, 3087918809,
3332247019, 3579407009, 3082973791, 4178339461, 3269728331, 2270495261, 2400046513, 2641204147,
2593078337, 2398468271, 3861488311, 3766456459, 2970457213, 3491800771, 3797865553, 2756555203,
3154883449, 3782386073, 3324965471, 4088422453, 3784508591, 3903657481, 3010059277, 2936392909,
}
// the mishmash engine - allows use of any prime set to allow experimentation
2025-05-25 16:05:35 -04:00
func Engine(buffer []byte, length int, accumulator uint64, primes *PrimesSet) uint64 {
for _, b := range buffer[:length] {
accumulator1 := uint64(primes.Select(accumulator) + uint32(b))
accumulator2 := ^accumulator * uint64(primes.Select(uint64(b)))
accumulator3 := accumulator >> (32 + ((b & 0x1F) ^ (b >> 5)))
accumulator = accumulator1 + accumulator2 + accumulator3
}
return accumulator
}
// general mishmash function - carried over from C++ for feature parity
2025-05-25 16:05:35 -04:00
func Mishmash(buffer []byte, length int) uint32 {
accumulator := Engine(buffer, length, 0, &ThePrimes)
return uint32(accumulator & UINT32_RESULTS_MASK)
}
// general mishmash function - carried over from C++ for feature parity
2025-05-25 16:05:35 -04:00
func MishmashString(buffer string) uint32 {
return Mishmash([]byte(buffer), len(buffer))
}
// *Mishmash32 implements the hash.Hash and hash.Hash32 interfaces
2025-05-25 16:05:35 -04:00
type Mishmash32 struct {
accumulator uint64
}
// returns the uint32 hash as a byte slice
func (m *Mishmash32) Bytes() []byte {
hash := m.Sum32()
return []byte{
byte((hash & 0xFF000000) >> 24),
byte((hash & 0x00FF0000) >> 16),
byte((hash & 0x0000FF00) >> 8),
byte((hash & 0x000000FF)),
}
}
2025-05-26 19:16:56 -04:00
func New32() hash.Hash32 {
return &Mishmash32{}
}
2025-05-25 16:05:35 -04:00
// Sum appends the current hash to b and returns the resulting slice.
// It does not change the underlying hash state.
func (m *Mishmash32) Sum(p []byte) []byte {
return append(p, m.Bytes()...)
2025-05-25 16:05:35 -04:00
}
// returns the current uint32 hash
2025-05-25 16:05:35 -04:00
func (m *Mishmash32) Sum32() uint32 {
return uint32(m.accumulator & UINT32_RESULTS_MASK)
}
// reset accumulator
2025-05-25 16:05:35 -04:00
func (m *Mishmash32) Reset() {
m.accumulator = 0
}
// returns the number of bytes Sum() will return
func (m *Mishmash32) Size() int {
return 4
2025-05-25 16:05:35 -04:00
}
// BlockSize returns the hash's underlying block size.
// The Write method must be able to accept any amount
// of data, but it may operate more efficiently if all writes
// are a multiple of the block size.
func (m *Mishmash32) BlockSize() int {
return 1
}
// Write (via the embedded io.Writer interface) adds more data to the running hash.
// It never returns an error.
func (m *Mishmash32) Write(p []byte) (int, error) {
m.accumulator = Engine(p, len(p), m.accumulator, &ThePrimes)
return len(p), nil
}
// *Mishmash64 implements the hash.Hash and hash.Hash64 interfaces.
2025-05-25 16:05:35 -04:00
type Mishmash64 struct {
accumulator uint64
}
// returns the uint64 hash as a byte slice
func (m *Mishmash64) Bytes() []byte {
hash := m.accumulator
return []byte{
byte((hash & 0xFF00000000000000) >> 56),
byte((hash & 0x00FF000000000000) >> 48),
byte((hash & 0x0000FF0000000000) >> 40),
byte((hash & 0x000000FF00000000) >> 32),
byte((hash & 0x00000000FF000000) >> 24),
byte((hash & 0x0000000000FF0000) >> 16),
byte((hash & 0x000000000000FF00) >> 8),
byte((hash & 0x00000000000000FF)),
}
}
2025-05-26 19:16:56 -04:00
func New64() hash.Hash64 {
return &Mishmash64{}
}
2025-05-25 16:05:35 -04:00
// Sum appends the current hash to b and returns the resulting slice.
// It does not change the underlying hash state.
func (m *Mishmash64) Sum(p []byte) []byte {
return append(p, m.Bytes()...)
2025-05-25 16:05:35 -04:00
}
// returns the current uint64 hash
2025-05-25 16:05:35 -04:00
func (m *Mishmash64) Sum64() uint64 {
return m.accumulator
}
// resets the accumulator to 0
2025-05-25 16:05:35 -04:00
func (m *Mishmash64) Reset() {
m.accumulator = 0
}
// returns the number of bytes Sum() will return
func (m *Mishmash64) Size() int {
return 8
2025-05-25 16:05:35 -04:00
}
// BlockSize returns the hash's underlying block size.
// The Write method must be able to accept any amount
// of data, but it may operate more efficiently if all writes
// are a multiple of the block size.
func (m *Mishmash64) BlockSize() int {
return 1
}
// Write (via the embedded io.Writer interface) adds more data to the running hash.
// It never returns an error.
func (m *Mishmash64) Write(p []byte) (int, error) {
m.accumulator = Engine(p, len(p), m.accumulator, &ThePrimes)
return len(p), nil
}
// experimental and subject to change
// *Mishmash128 implements the hash.Hash interface
type Mishmash128 struct {
hash1, hash2 Mishmash64
}
func (m *Mishmash128) Bytes() []byte {
return append(m.hash1.Bytes(), m.hash2.Bytes()...)
}
func New128() hash.Hash {
hash := &Mishmash128{}
hash.Reset()
return hash
}
// Sum appends the current hash to b and returns the resulting slice.
// It does not change the underlying hash state.
func (m *Mishmash128) Sum(b []byte) []byte {
return append(b, m.Bytes()...)
}
func (m *Mishmash128) Reset() {
m.hash1.accumulator, m.hash2.accumulator = MISHMASH128_DEFAULT_LEFT, MISHMASH128_DEFAULT_RIGHT
}
// returns the number of bytes Sum() will return
func (m *Mishmash128) Size() int {
return 16
}
// BlockSize returns the hash's underlying block size.
// The Write method must be able to accept any amount
// of data, but it may operate more efficiently if all writes
// are a multiple of the block size.
func (m *Mishmash128) BlockSize() int {
return 1
}
// Write (via the embedded io.Writer interface) adds more data to the running hash.
// It never returns an error.
func (m *Mishmash128) Write(p []byte) (int, error) {
m.hash1.Write(p)
m.hash2.Write(p)
return len(p), nil
}