first commit
This commit is contained in:
commit
9f40f8d2d7
24
LICENSE.TXT
Normal file
24
LICENSE.TXT
Normal file
@ -0,0 +1,24 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2025 MicroNeil Research Corporation and Contributors
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
2
README.md
Normal file
2
README.md
Normal file
@ -0,0 +1,2 @@
|
||||
# mishmash
|
||||
package mishmash implements the mishmash hash from Pete McNeil's [CodeDweller](https://code.microneil.com/madscientist/CodeDweller) using Go's hash.Hash32 and hash.Hash64 interfaces for simple idiomatic use in other Go programs.
|
5
go.mod
Normal file
5
go.mod
Normal file
@ -0,0 +1,5 @@
|
||||
module mishmash
|
||||
|
||||
go 1.24.3
|
||||
|
||||
require golang.org/x/exp v0.0.0-20250506013437-ce4c2cf36ca6 // indirect
|
2
go.sum
Normal file
2
go.sum
Normal file
@ -0,0 +1,2 @@
|
||||
golang.org/x/exp v0.0.0-20250506013437-ce4c2cf36ca6 h1:y5zboxd6LQAqYIhHnB48p0ByQ/GnQx2BE33L8BOHQkI=
|
||||
golang.org/x/exp v0.0.0-20250506013437-ce4c2cf36ca6/go.mod h1:U6Lno4MTRCDY+Ba7aCcauB9T60gsv5s4ralQzP72ZoQ=
|
169
mishmash.go
Normal file
169
mishmash.go
Normal file
@ -0,0 +1,169 @@
|
||||
package mishmash
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
UINT32_RESULTS_MASK uint64 = 0x00000000FFFFFFFF
|
||||
NUM_PRIMES = 256
|
||||
)
|
||||
|
||||
type PrimesSet [NUM_PRIMES]uint32
|
||||
|
||||
func (p *PrimesSet) Select(n uint64) uint32 {
|
||||
return (*p)[n&(NUM_PRIMES-1)]
|
||||
}
|
||||
|
||||
func LoadPrimesSet(filename string) (PrimesSet, error) {
|
||||
f, err := os.ReadFile(filename)
|
||||
if err != nil {
|
||||
return PrimesSet{}, fmt.Errorf("error reading %s: %w", filename, err)
|
||||
}
|
||||
fields := strings.Fields(string(f))
|
||||
if want, got := NUM_PRIMES, len(fields); want != got {
|
||||
return PrimesSet{}, fmt.Errorf("error: requires %d primes (got %d)", want, got)
|
||||
}
|
||||
results := PrimesSet{}
|
||||
for i, s := range fields {
|
||||
if n, err := strconv.ParseUint(s, 10, 32); err != nil {
|
||||
return PrimesSet{}, fmt.Errorf("error parsing %s: %w", s, err)
|
||||
} else {
|
||||
results[i] = uint32(n)
|
||||
}
|
||||
}
|
||||
return results, nil
|
||||
}
|
||||
|
||||
var ThePrimes = PrimesSet{
|
||||
3825240899, 3652005211, 2966014067, 3432177659, 3109134187, 3139884271, 3108258589, 2277840529,
|
||||
3748140223, 4206444373, 2684505017, 3883989821, 4076539213, 3880335997, 2603229667, 2358458953,
|
||||
4061135443, 3826856503, 2671898833, 3266747401, 3403611587, 2483486933, 3889003891, 2820911959,
|
||||
2318077829, 3470930861, 3231587809, 3225029887, 4123396483, 3422817119, 3612514831, 2170177423,
|
||||
3058754837, 3000926393, 2825656217, 3387930461, 3532314017, 3245479361, 3466327211, 4080294503,
|
||||
4252034179, 2302986211, 3394476707, 3697851029, 3957195257, 2862308587, 4285266071, 3681357247,
|
||||
3157577413, 3839398561, 3097979117, 3590787463, 3354450497, 3110291879, 3938796493, 3196834463,
|
||||
2374254481, 2702597567, 3046228397, 3461690719, 2641445467, 2401060583, 2483505539, 2775297373,
|
||||
2262447391, 3118976533, 3014355683, 3355176449, 4055753381, 2277045713, 3098402119, 3894957487,
|
||||
2770620887, 4125228329, 2575044467, 4162428989, 3294651817, 2308925797, 3698223103, 2150023273,
|
||||
3075614681, 2410764047, 3624889381, 3264455489, 3241969651, 3001767217, 3407799859, 2998917373,
|
||||
2629826653, 2714272271, 3987786247, 2880807353, 3608804803, 2231694917, 3790372403, 4156893413,
|
||||
2563320007, 2423350621, 2735169119, 4021079791, 4150641413, 2907916357, 3772971647, 2481168307,
|
||||
2842943119, 2234753693, 3966637117, 2732029457, 3207475039, 3533605151, 2349367747, 3336108011,
|
||||
2431060103, 2263416899, 2350941683, 3869512277, 3880987697, 3062735029, 2512894603, 3669845519,
|
||||
2235487739, 3201016501, 2438124943, 4170458909, 2938134889, 4231610087, 3187120061, 2378420137,
|
||||
3365835877, 3078766697, 3704906059, 3541986781, 3969072823, 3510542281, 2306290751, 3898737419,
|
||||
2898069347, 4092904481, 2484285403, 2721169823, 4293617527, 2928584759, 2213966141, 2335957513,
|
||||
3367371923, 2965261109, 4175805451, 3541995157, 2964065479, 3997902791, 3053542259, 2168926237,
|
||||
3253268639, 2620083509, 3314283407, 3873087809, 2636771209, 2737638653, 3209154931, 3414204793,
|
||||
3451689091, 2638985941, 2899591693, 2654878441, 2748067627, 3395485733, 2679070523, 3100687721,
|
||||
2520033701, 2980087373, 2873947007, 2565436501, 2400053783, 4163039563, 3517993571, 4263192407,
|
||||
3385597069, 2768101117, 3502890653, 3092130347, 3748553827, 4109944849, 2418961109, 3398621741,
|
||||
3073383031, 2167592489, 2950739053, 3529429811, 3167420899, 4254703357, 3344014309, 3725480141,
|
||||
3745944539, 3456003191, 2832137237, 4202217191, 3730577581, 2837794231, 2155546451, 2539211039,
|
||||
2256984649, 2458975411, 2986340839, 3412432363, 3596817463, 2973444983, 2409734297, 3273292601,
|
||||
3302556869, 3630727567, 3670056499, 3300959521, 3949319809, 3047032057, 3412226563, 2147483647,
|
||||
2914045411, 2882644273, 4065606553, 2735903059, 3195020617, 3887229457, 3232900987, 3409357867,
|
||||
3037985513, 3162012463, 3340137193, 2186608547, 4018093523, 4153387103, 2566863161, 3087918809,
|
||||
3332247019, 3579407009, 3082973791, 4178339461, 3269728331, 2270495261, 2400046513, 2641204147,
|
||||
2593078337, 2398468271, 3861488311, 3766456459, 2970457213, 3491800771, 3797865553, 2756555203,
|
||||
3154883449, 3782386073, 3324965471, 4088422453, 3784508591, 3903657481, 3010059277, 2936392909,
|
||||
}
|
||||
|
||||
func Engine(buffer []byte, length int, accumulator uint64, primes *PrimesSet) uint64 {
|
||||
for _, b := range buffer[:length] {
|
||||
accumulator1 := uint64(primes.Select(accumulator) + uint32(b))
|
||||
accumulator2 := ^accumulator * uint64(primes.Select(uint64(b)))
|
||||
accumulator3 := accumulator >> (32 + ((b & 0x1F) ^ (b >> 5)))
|
||||
accumulator = accumulator1 + accumulator2 + accumulator3
|
||||
}
|
||||
return accumulator
|
||||
}
|
||||
|
||||
func Mishmash(buffer []byte, length int) uint32 {
|
||||
accumulator := Engine(buffer, length, 0, &ThePrimes)
|
||||
return uint32(accumulator & UINT32_RESULTS_MASK)
|
||||
}
|
||||
|
||||
func MishmashString(buffer string) uint32 {
|
||||
return Mishmash([]byte(buffer), len(buffer))
|
||||
}
|
||||
|
||||
type Mishmash32 struct {
|
||||
accumulator uint64
|
||||
}
|
||||
|
||||
// Sum appends the current hash to b and returns the resulting slice.
|
||||
// It does not change the underlying hash state.
|
||||
func (m *Mishmash32) Sum(p []byte) []byte {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Mishmash32) Sum32() uint32 {
|
||||
return uint32(m.accumulator & UINT32_RESULTS_MASK)
|
||||
}
|
||||
|
||||
func (m *Mishmash32) Reset() {
|
||||
m.accumulator = 0
|
||||
}
|
||||
|
||||
// returns the number of bytes Sum() will return
|
||||
func (m *Mishmash32) Size() int {
|
||||
return 8
|
||||
}
|
||||
|
||||
// BlockSize returns the hash's underlying block size.
|
||||
// The Write method must be able to accept any amount
|
||||
// of data, but it may operate more efficiently if all writes
|
||||
// are a multiple of the block size.
|
||||
func (m *Mishmash32) BlockSize() int {
|
||||
return 1
|
||||
}
|
||||
|
||||
// Write (via the embedded io.Writer interface) adds more data to the running hash.
|
||||
// It never returns an error.
|
||||
func (m *Mishmash32) Write(p []byte) (int, error) {
|
||||
m.accumulator = Engine(p, len(p), m.accumulator, &ThePrimes)
|
||||
return len(p), nil
|
||||
}
|
||||
|
||||
type Mishmash64 struct {
|
||||
accumulator uint64
|
||||
}
|
||||
|
||||
// Sum appends the current hash to b and returns the resulting slice.
|
||||
// It does not change the underlying hash state.
|
||||
func (m *Mishmash64) Sum(p []byte) []byte {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Mishmash64) Sum64() uint64 {
|
||||
return m.accumulator
|
||||
}
|
||||
|
||||
func (m *Mishmash64) Reset() {
|
||||
m.accumulator = 0
|
||||
}
|
||||
|
||||
// returns the number of bytes Sum() will return
|
||||
func (m *Mishmash64) Size() int {
|
||||
return 16
|
||||
}
|
||||
|
||||
// BlockSize returns the hash's underlying block size.
|
||||
// The Write method must be able to accept any amount
|
||||
// of data, but it may operate more efficiently if all writes
|
||||
// are a multiple of the block size.
|
||||
func (m *Mishmash64) BlockSize() int {
|
||||
return 1
|
||||
}
|
||||
|
||||
// Write (via the embedded io.Writer interface) adds more data to the running hash.
|
||||
// It never returns an error.
|
||||
func (m *Mishmash64) Write(p []byte) (int, error) {
|
||||
m.accumulator = Engine(p, len(p), m.accumulator, &ThePrimes)
|
||||
return len(p), nil
|
||||
}
|
256
mishmash_primes.txt
Normal file
256
mishmash_primes.txt
Normal file
@ -0,0 +1,256 @@
|
||||
3825240899
|
||||
3652005211
|
||||
2966014067
|
||||
3432177659
|
||||
3109134187
|
||||
3139884271
|
||||
3108258589
|
||||
2277840529
|
||||
3748140223
|
||||
4206444373
|
||||
2684505017
|
||||
3883989821
|
||||
4076539213
|
||||
3880335997
|
||||
2603229667
|
||||
2358458953
|
||||
4061135443
|
||||
3826856503
|
||||
2671898833
|
||||
3266747401
|
||||
3403611587
|
||||
2483486933
|
||||
3889003891
|
||||
2820911959
|
||||
2318077829
|
||||
3470930861
|
||||
3231587809
|
||||
3225029887
|
||||
4123396483
|
||||
3422817119
|
||||
3612514831
|
||||
2170177423
|
||||
3058754837
|
||||
3000926393
|
||||
2825656217
|
||||
3387930461
|
||||
3532314017
|
||||
3245479361
|
||||
3466327211
|
||||
4080294503
|
||||
4252034179
|
||||
2302986211
|
||||
3394476707
|
||||
3697851029
|
||||
3957195257
|
||||
2862308587
|
||||
4285266071
|
||||
3681357247
|
||||
3157577413
|
||||
3839398561
|
||||
3097979117
|
||||
3590787463
|
||||
3354450497
|
||||
3110291879
|
||||
3938796493
|
||||
3196834463
|
||||
2374254481
|
||||
2702597567
|
||||
3046228397
|
||||
3461690719
|
||||
2641445467
|
||||
2401060583
|
||||
2483505539
|
||||
2775297373
|
||||
2262447391
|
||||
3118976533
|
||||
3014355683
|
||||
3355176449
|
||||
4055753381
|
||||
2277045713
|
||||
3098402119
|
||||
3894957487
|
||||
2770620887
|
||||
4125228329
|
||||
2575044467
|
||||
4162428989
|
||||
3294651817
|
||||
2308925797
|
||||
3698223103
|
||||
2150023273
|
||||
3075614681
|
||||
2410764047
|
||||
3624889381
|
||||
3264455489
|
||||
3241969651
|
||||
3001767217
|
||||
3407799859
|
||||
2998917373
|
||||
2629826653
|
||||
2714272271
|
||||
3987786247
|
||||
2880807353
|
||||
3608804803
|
||||
2231694917
|
||||
3790372403
|
||||
4156893413
|
||||
2563320007
|
||||
2423350621
|
||||
2735169119
|
||||
4021079791
|
||||
4150641413
|
||||
2907916357
|
||||
3772971647
|
||||
2481168307
|
||||
2842943119
|
||||
2234753693
|
||||
3966637117
|
||||
2732029457
|
||||
3207475039
|
||||
3533605151
|
||||
2349367747
|
||||
3336108011
|
||||
2431060103
|
||||
2263416899
|
||||
2350941683
|
||||
3869512277
|
||||
3880987697
|
||||
3062735029
|
||||
2512894603
|
||||
3669845519
|
||||
2235487739
|
||||
3201016501
|
||||
2438124943
|
||||
4170458909
|
||||
2938134889
|
||||
4231610087
|
||||
3187120061
|
||||
2378420137
|
||||
3365835877
|
||||
3078766697
|
||||
3704906059
|
||||
3541986781
|
||||
3969072823
|
||||
3510542281
|
||||
2306290751
|
||||
3898737419
|
||||
2898069347
|
||||
4092904481
|
||||
2484285403
|
||||
2721169823
|
||||
4293617527
|
||||
2928584759
|
||||
2213966141
|
||||
2335957513
|
||||
3367371923
|
||||
2965261109
|
||||
4175805451
|
||||
3541995157
|
||||
2964065479
|
||||
3997902791
|
||||
3053542259
|
||||
2168926237
|
||||
3253268639
|
||||
2620083509
|
||||
3314283407
|
||||
3873087809
|
||||
2636771209
|
||||
2737638653
|
||||
3209154931
|
||||
3414204793
|
||||
3451689091
|
||||
2638985941
|
||||
2899591693
|
||||
2654878441
|
||||
2748067627
|
||||
3395485733
|
||||
2679070523
|
||||
3100687721
|
||||
2520033701
|
||||
2980087373
|
||||
2873947007
|
||||
2565436501
|
||||
2400053783
|
||||
4163039563
|
||||
3517993571
|
||||
4263192407
|
||||
3385597069
|
||||
2768101117
|
||||
3502890653
|
||||
3092130347
|
||||
3748553827
|
||||
4109944849
|
||||
2418961109
|
||||
3398621741
|
||||
3073383031
|
||||
2167592489
|
||||
2950739053
|
||||
3529429811
|
||||
3167420899
|
||||
4254703357
|
||||
3344014309
|
||||
3725480141
|
||||
3745944539
|
||||
3456003191
|
||||
2832137237
|
||||
4202217191
|
||||
3730577581
|
||||
2837794231
|
||||
2155546451
|
||||
2539211039
|
||||
2256984649
|
||||
2458975411
|
||||
2986340839
|
||||
3412432363
|
||||
3596817463
|
||||
2973444983
|
||||
2409734297
|
||||
3273292601
|
||||
3302556869
|
||||
3630727567
|
||||
3670056499
|
||||
3300959521
|
||||
3949319809
|
||||
3047032057
|
||||
3412226563
|
||||
2147483647
|
||||
2914045411
|
||||
2882644273
|
||||
4065606553
|
||||
2735903059
|
||||
3195020617
|
||||
3887229457
|
||||
3232900987
|
||||
3409357867
|
||||
3037985513
|
||||
3162012463
|
||||
3340137193
|
||||
2186608547
|
||||
4018093523
|
||||
4153387103
|
||||
2566863161
|
||||
3087918809
|
||||
3332247019
|
||||
3579407009
|
||||
3082973791
|
||||
4178339461
|
||||
3269728331
|
||||
2270495261
|
||||
2400046513
|
||||
2641204147
|
||||
2593078337
|
||||
2398468271
|
||||
3861488311
|
||||
3766456459
|
||||
2970457213
|
||||
3491800771
|
||||
3797865553
|
||||
2756555203
|
||||
3154883449
|
||||
3782386073
|
||||
3324965471
|
||||
4088422453
|
||||
3784508591
|
||||
3903657481
|
||||
3010059277
|
||||
2936392909
|
129
mishmash_test.go
Normal file
129
mishmash_test.go
Normal file
@ -0,0 +1,129 @@
|
||||
package mishmash
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"hash"
|
||||
"math/rand"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
type HashedString struct {
|
||||
s string
|
||||
h32 uint32
|
||||
h64 uint64
|
||||
}
|
||||
|
||||
var (
|
||||
TheCppOutputFile = "word_hashes.txt"
|
||||
TheCppOutput = func() []HashedString {
|
||||
f, err := os.Open(TheCppOutputFile)
|
||||
if err != nil {
|
||||
panic("error opening " + TheCppOutputFile + ": " + err.Error())
|
||||
}
|
||||
defer f.Close()
|
||||
results := make([]HashedString, 0)
|
||||
scanner := bufio.NewScanner(f)
|
||||
for scanner.Scan() {
|
||||
if line := strings.TrimSpace(scanner.Text()); len(line) > 0 {
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) == 3 {
|
||||
n1, err := strconv.ParseUint(fields[1], 16, 32)
|
||||
if err != nil {
|
||||
panic("error parsing " + fields[1] + ": " + err.Error())
|
||||
}
|
||||
n2, err := strconv.ParseUint(fields[2], 16, 64)
|
||||
if err != nil {
|
||||
panic("error parsing " + fields[2] + ": " + err.Error())
|
||||
}
|
||||
results = append(results, HashedString{fields[0], uint32(n1), n2})
|
||||
}
|
||||
}
|
||||
}
|
||||
return results
|
||||
}()
|
||||
RandomWordAndHash = func() HashedString {
|
||||
return TheCppOutput[rand.Intn(len(TheCppOutput))]
|
||||
}
|
||||
)
|
||||
|
||||
func TestPrimes(t *testing.T) {
|
||||
for _, kvp := range TheCppOutput {
|
||||
if word, want, got := kvp.s, kvp.h32, MishmashString(kvp.s); want != got {
|
||||
t.Fatalf("error: %s; wanted %08x; got %08x\n", word, want, got)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestHash32Interface(t *testing.T) {
|
||||
for _, kvp := range TheCppOutput {
|
||||
var h hash.Hash32 = &Mishmash32{}
|
||||
word := kvp.s
|
||||
h.Write([]byte(word))
|
||||
if want, got := kvp.h32, h.Sum32(); want != got {
|
||||
t.Fatalf("error: %s; wanted %08x; got %08x\n", word, want, got)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestHash64Interface(t *testing.T) {
|
||||
for _, kvp := range TheCppOutput {
|
||||
var h hash.Hash64 = &Mishmash64{}
|
||||
word := kvp.s
|
||||
h.Write([]byte(word))
|
||||
if want, got := kvp.h64, h.Sum64(); want != got {
|
||||
t.Fatalf("error: %s; wanted %016x; got %016x\n", word, want, got)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestHash32Collision(t *testing.T) {
|
||||
m := make(map[uint32][]string, len(TheCppOutput))
|
||||
for _, kvp := range TheCppOutput {
|
||||
m[kvp.h32] = append(m[kvp.h32], kvp.s)
|
||||
}
|
||||
errs := make([]error, 0)
|
||||
for hash, values := range m {
|
||||
if len(values) > 1 {
|
||||
errs = append(errs, fmt.Errorf("%08x: %s", hash, strings.Join(values, " ")))
|
||||
}
|
||||
}
|
||||
if err := errors.Join(errs...); err != nil {
|
||||
fmt.Println(err)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkLoadEmbededPrimes(b *testing.B) {
|
||||
const filename = "mishmash_primes.txt"
|
||||
for _ = range b.N {
|
||||
if _, err := LoadPrimesSet(filename); err != nil {
|
||||
panic("error: " + err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkMishmash32(b *testing.B) {
|
||||
for _ = range b.N {
|
||||
kvp := RandomWordAndHash()
|
||||
var hash hash.Hash32 = &Mishmash32{}
|
||||
hash.Write([]byte(kvp.s))
|
||||
if want, got := kvp.h32, hash.Sum32(); want != got {
|
||||
panic(fmt.Sprintf("error: %s; wanted %08x; got %08x\n", kvp.s, want, got))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkMishmash64(b *testing.B) {
|
||||
for _ = range b.N {
|
||||
kvp := RandomWordAndHash()
|
||||
var hash hash.Hash64 = &Mishmash64{}
|
||||
hash.Write([]byte(kvp.s))
|
||||
if want, got := kvp.h64, hash.Sum64(); want != got {
|
||||
panic(fmt.Sprintf("error: %s; wanted %016x; got %016x\n", kvp.s, want, got))
|
||||
}
|
||||
}
|
||||
}
|
54763
word_hashes.txt
Normal file
54763
word_hashes.txt
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user