first commit

This commit is contained in:
William Dillon 2025-05-25 16:05:35 -04:00
commit 9f40f8d2d7
8 changed files with 55350 additions and 0 deletions

24
LICENSE.TXT Normal file
View File

@ -0,0 +1,24 @@
The MIT License (MIT)
Copyright (c) 2025 MicroNeil Research Corporation and Contributors
All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

2
README.md Normal file
View File

@ -0,0 +1,2 @@
# mishmash
package mishmash implements the mishmash hash from Pete McNeil's [CodeDweller](https://code.microneil.com/madscientist/CodeDweller) using Go's hash.Hash32 and hash.Hash64 interfaces for simple idiomatic use in other Go programs.

5
go.mod Normal file
View File

@ -0,0 +1,5 @@
module mishmash
go 1.24.3
require golang.org/x/exp v0.0.0-20250506013437-ce4c2cf36ca6 // indirect

2
go.sum Normal file
View File

@ -0,0 +1,2 @@
golang.org/x/exp v0.0.0-20250506013437-ce4c2cf36ca6 h1:y5zboxd6LQAqYIhHnB48p0ByQ/GnQx2BE33L8BOHQkI=
golang.org/x/exp v0.0.0-20250506013437-ce4c2cf36ca6/go.mod h1:U6Lno4MTRCDY+Ba7aCcauB9T60gsv5s4ralQzP72ZoQ=

169
mishmash.go Normal file
View File

@ -0,0 +1,169 @@
package mishmash
import (
"fmt"
"os"
"strconv"
"strings"
)
const (
UINT32_RESULTS_MASK uint64 = 0x00000000FFFFFFFF
NUM_PRIMES = 256
)
type PrimesSet [NUM_PRIMES]uint32
func (p *PrimesSet) Select(n uint64) uint32 {
return (*p)[n&(NUM_PRIMES-1)]
}
func LoadPrimesSet(filename string) (PrimesSet, error) {
f, err := os.ReadFile(filename)
if err != nil {
return PrimesSet{}, fmt.Errorf("error reading %s: %w", filename, err)
}
fields := strings.Fields(string(f))
if want, got := NUM_PRIMES, len(fields); want != got {
return PrimesSet{}, fmt.Errorf("error: requires %d primes (got %d)", want, got)
}
results := PrimesSet{}
for i, s := range fields {
if n, err := strconv.ParseUint(s, 10, 32); err != nil {
return PrimesSet{}, fmt.Errorf("error parsing %s: %w", s, err)
} else {
results[i] = uint32(n)
}
}
return results, nil
}
var ThePrimes = PrimesSet{
3825240899, 3652005211, 2966014067, 3432177659, 3109134187, 3139884271, 3108258589, 2277840529,
3748140223, 4206444373, 2684505017, 3883989821, 4076539213, 3880335997, 2603229667, 2358458953,
4061135443, 3826856503, 2671898833, 3266747401, 3403611587, 2483486933, 3889003891, 2820911959,
2318077829, 3470930861, 3231587809, 3225029887, 4123396483, 3422817119, 3612514831, 2170177423,
3058754837, 3000926393, 2825656217, 3387930461, 3532314017, 3245479361, 3466327211, 4080294503,
4252034179, 2302986211, 3394476707, 3697851029, 3957195257, 2862308587, 4285266071, 3681357247,
3157577413, 3839398561, 3097979117, 3590787463, 3354450497, 3110291879, 3938796493, 3196834463,
2374254481, 2702597567, 3046228397, 3461690719, 2641445467, 2401060583, 2483505539, 2775297373,
2262447391, 3118976533, 3014355683, 3355176449, 4055753381, 2277045713, 3098402119, 3894957487,
2770620887, 4125228329, 2575044467, 4162428989, 3294651817, 2308925797, 3698223103, 2150023273,
3075614681, 2410764047, 3624889381, 3264455489, 3241969651, 3001767217, 3407799859, 2998917373,
2629826653, 2714272271, 3987786247, 2880807353, 3608804803, 2231694917, 3790372403, 4156893413,
2563320007, 2423350621, 2735169119, 4021079791, 4150641413, 2907916357, 3772971647, 2481168307,
2842943119, 2234753693, 3966637117, 2732029457, 3207475039, 3533605151, 2349367747, 3336108011,
2431060103, 2263416899, 2350941683, 3869512277, 3880987697, 3062735029, 2512894603, 3669845519,
2235487739, 3201016501, 2438124943, 4170458909, 2938134889, 4231610087, 3187120061, 2378420137,
3365835877, 3078766697, 3704906059, 3541986781, 3969072823, 3510542281, 2306290751, 3898737419,
2898069347, 4092904481, 2484285403, 2721169823, 4293617527, 2928584759, 2213966141, 2335957513,
3367371923, 2965261109, 4175805451, 3541995157, 2964065479, 3997902791, 3053542259, 2168926237,
3253268639, 2620083509, 3314283407, 3873087809, 2636771209, 2737638653, 3209154931, 3414204793,
3451689091, 2638985941, 2899591693, 2654878441, 2748067627, 3395485733, 2679070523, 3100687721,
2520033701, 2980087373, 2873947007, 2565436501, 2400053783, 4163039563, 3517993571, 4263192407,
3385597069, 2768101117, 3502890653, 3092130347, 3748553827, 4109944849, 2418961109, 3398621741,
3073383031, 2167592489, 2950739053, 3529429811, 3167420899, 4254703357, 3344014309, 3725480141,
3745944539, 3456003191, 2832137237, 4202217191, 3730577581, 2837794231, 2155546451, 2539211039,
2256984649, 2458975411, 2986340839, 3412432363, 3596817463, 2973444983, 2409734297, 3273292601,
3302556869, 3630727567, 3670056499, 3300959521, 3949319809, 3047032057, 3412226563, 2147483647,
2914045411, 2882644273, 4065606553, 2735903059, 3195020617, 3887229457, 3232900987, 3409357867,
3037985513, 3162012463, 3340137193, 2186608547, 4018093523, 4153387103, 2566863161, 3087918809,
3332247019, 3579407009, 3082973791, 4178339461, 3269728331, 2270495261, 2400046513, 2641204147,
2593078337, 2398468271, 3861488311, 3766456459, 2970457213, 3491800771, 3797865553, 2756555203,
3154883449, 3782386073, 3324965471, 4088422453, 3784508591, 3903657481, 3010059277, 2936392909,
}
func Engine(buffer []byte, length int, accumulator uint64, primes *PrimesSet) uint64 {
for _, b := range buffer[:length] {
accumulator1 := uint64(primes.Select(accumulator) + uint32(b))
accumulator2 := ^accumulator * uint64(primes.Select(uint64(b)))
accumulator3 := accumulator >> (32 + ((b & 0x1F) ^ (b >> 5)))
accumulator = accumulator1 + accumulator2 + accumulator3
}
return accumulator
}
func Mishmash(buffer []byte, length int) uint32 {
accumulator := Engine(buffer, length, 0, &ThePrimes)
return uint32(accumulator & UINT32_RESULTS_MASK)
}
func MishmashString(buffer string) uint32 {
return Mishmash([]byte(buffer), len(buffer))
}
type Mishmash32 struct {
accumulator uint64
}
// Sum appends the current hash to b and returns the resulting slice.
// It does not change the underlying hash state.
func (m *Mishmash32) Sum(p []byte) []byte {
return nil
}
func (m *Mishmash32) Sum32() uint32 {
return uint32(m.accumulator & UINT32_RESULTS_MASK)
}
func (m *Mishmash32) Reset() {
m.accumulator = 0
}
// returns the number of bytes Sum() will return
func (m *Mishmash32) Size() int {
return 8
}
// BlockSize returns the hash's underlying block size.
// The Write method must be able to accept any amount
// of data, but it may operate more efficiently if all writes
// are a multiple of the block size.
func (m *Mishmash32) BlockSize() int {
return 1
}
// Write (via the embedded io.Writer interface) adds more data to the running hash.
// It never returns an error.
func (m *Mishmash32) Write(p []byte) (int, error) {
m.accumulator = Engine(p, len(p), m.accumulator, &ThePrimes)
return len(p), nil
}
type Mishmash64 struct {
accumulator uint64
}
// Sum appends the current hash to b and returns the resulting slice.
// It does not change the underlying hash state.
func (m *Mishmash64) Sum(p []byte) []byte {
return nil
}
func (m *Mishmash64) Sum64() uint64 {
return m.accumulator
}
func (m *Mishmash64) Reset() {
m.accumulator = 0
}
// returns the number of bytes Sum() will return
func (m *Mishmash64) Size() int {
return 16
}
// BlockSize returns the hash's underlying block size.
// The Write method must be able to accept any amount
// of data, but it may operate more efficiently if all writes
// are a multiple of the block size.
func (m *Mishmash64) BlockSize() int {
return 1
}
// Write (via the embedded io.Writer interface) adds more data to the running hash.
// It never returns an error.
func (m *Mishmash64) Write(p []byte) (int, error) {
m.accumulator = Engine(p, len(p), m.accumulator, &ThePrimes)
return len(p), nil
}

256
mishmash_primes.txt Normal file
View File

@ -0,0 +1,256 @@
3825240899
3652005211
2966014067
3432177659
3109134187
3139884271
3108258589
2277840529
3748140223
4206444373
2684505017
3883989821
4076539213
3880335997
2603229667
2358458953
4061135443
3826856503
2671898833
3266747401
3403611587
2483486933
3889003891
2820911959
2318077829
3470930861
3231587809
3225029887
4123396483
3422817119
3612514831
2170177423
3058754837
3000926393
2825656217
3387930461
3532314017
3245479361
3466327211
4080294503
4252034179
2302986211
3394476707
3697851029
3957195257
2862308587
4285266071
3681357247
3157577413
3839398561
3097979117
3590787463
3354450497
3110291879
3938796493
3196834463
2374254481
2702597567
3046228397
3461690719
2641445467
2401060583
2483505539
2775297373
2262447391
3118976533
3014355683
3355176449
4055753381
2277045713
3098402119
3894957487
2770620887
4125228329
2575044467
4162428989
3294651817
2308925797
3698223103
2150023273
3075614681
2410764047
3624889381
3264455489
3241969651
3001767217
3407799859
2998917373
2629826653
2714272271
3987786247
2880807353
3608804803
2231694917
3790372403
4156893413
2563320007
2423350621
2735169119
4021079791
4150641413
2907916357
3772971647
2481168307
2842943119
2234753693
3966637117
2732029457
3207475039
3533605151
2349367747
3336108011
2431060103
2263416899
2350941683
3869512277
3880987697
3062735029
2512894603
3669845519
2235487739
3201016501
2438124943
4170458909
2938134889
4231610087
3187120061
2378420137
3365835877
3078766697
3704906059
3541986781
3969072823
3510542281
2306290751
3898737419
2898069347
4092904481
2484285403
2721169823
4293617527
2928584759
2213966141
2335957513
3367371923
2965261109
4175805451
3541995157
2964065479
3997902791
3053542259
2168926237
3253268639
2620083509
3314283407
3873087809
2636771209
2737638653
3209154931
3414204793
3451689091
2638985941
2899591693
2654878441
2748067627
3395485733
2679070523
3100687721
2520033701
2980087373
2873947007
2565436501
2400053783
4163039563
3517993571
4263192407
3385597069
2768101117
3502890653
3092130347
3748553827
4109944849
2418961109
3398621741
3073383031
2167592489
2950739053
3529429811
3167420899
4254703357
3344014309
3725480141
3745944539
3456003191
2832137237
4202217191
3730577581
2837794231
2155546451
2539211039
2256984649
2458975411
2986340839
3412432363
3596817463
2973444983
2409734297
3273292601
3302556869
3630727567
3670056499
3300959521
3949319809
3047032057
3412226563
2147483647
2914045411
2882644273
4065606553
2735903059
3195020617
3887229457
3232900987
3409357867
3037985513
3162012463
3340137193
2186608547
4018093523
4153387103
2566863161
3087918809
3332247019
3579407009
3082973791
4178339461
3269728331
2270495261
2400046513
2641204147
2593078337
2398468271
3861488311
3766456459
2970457213
3491800771
3797865553
2756555203
3154883449
3782386073
3324965471
4088422453
3784508591
3903657481
3010059277
2936392909

129
mishmash_test.go Normal file
View File

@ -0,0 +1,129 @@
package mishmash
import (
"bufio"
"errors"
"fmt"
"hash"
"math/rand"
"os"
"strconv"
"strings"
"testing"
)
type HashedString struct {
s string
h32 uint32
h64 uint64
}
var (
TheCppOutputFile = "word_hashes.txt"
TheCppOutput = func() []HashedString {
f, err := os.Open(TheCppOutputFile)
if err != nil {
panic("error opening " + TheCppOutputFile + ": " + err.Error())
}
defer f.Close()
results := make([]HashedString, 0)
scanner := bufio.NewScanner(f)
for scanner.Scan() {
if line := strings.TrimSpace(scanner.Text()); len(line) > 0 {
fields := strings.Fields(line)
if len(fields) == 3 {
n1, err := strconv.ParseUint(fields[1], 16, 32)
if err != nil {
panic("error parsing " + fields[1] + ": " + err.Error())
}
n2, err := strconv.ParseUint(fields[2], 16, 64)
if err != nil {
panic("error parsing " + fields[2] + ": " + err.Error())
}
results = append(results, HashedString{fields[0], uint32(n1), n2})
}
}
}
return results
}()
RandomWordAndHash = func() HashedString {
return TheCppOutput[rand.Intn(len(TheCppOutput))]
}
)
func TestPrimes(t *testing.T) {
for _, kvp := range TheCppOutput {
if word, want, got := kvp.s, kvp.h32, MishmashString(kvp.s); want != got {
t.Fatalf("error: %s; wanted %08x; got %08x\n", word, want, got)
}
}
}
func TestHash32Interface(t *testing.T) {
for _, kvp := range TheCppOutput {
var h hash.Hash32 = &Mishmash32{}
word := kvp.s
h.Write([]byte(word))
if want, got := kvp.h32, h.Sum32(); want != got {
t.Fatalf("error: %s; wanted %08x; got %08x\n", word, want, got)
}
}
}
func TestHash64Interface(t *testing.T) {
for _, kvp := range TheCppOutput {
var h hash.Hash64 = &Mishmash64{}
word := kvp.s
h.Write([]byte(word))
if want, got := kvp.h64, h.Sum64(); want != got {
t.Fatalf("error: %s; wanted %016x; got %016x\n", word, want, got)
}
}
}
func TestHash32Collision(t *testing.T) {
m := make(map[uint32][]string, len(TheCppOutput))
for _, kvp := range TheCppOutput {
m[kvp.h32] = append(m[kvp.h32], kvp.s)
}
errs := make([]error, 0)
for hash, values := range m {
if len(values) > 1 {
errs = append(errs, fmt.Errorf("%08x: %s", hash, strings.Join(values, " ")))
}
}
if err := errors.Join(errs...); err != nil {
fmt.Println(err)
}
}
func BenchmarkLoadEmbededPrimes(b *testing.B) {
const filename = "mishmash_primes.txt"
for _ = range b.N {
if _, err := LoadPrimesSet(filename); err != nil {
panic("error: " + err.Error())
}
}
}
func BenchmarkMishmash32(b *testing.B) {
for _ = range b.N {
kvp := RandomWordAndHash()
var hash hash.Hash32 = &Mishmash32{}
hash.Write([]byte(kvp.s))
if want, got := kvp.h32, hash.Sum32(); want != got {
panic(fmt.Sprintf("error: %s; wanted %08x; got %08x\n", kvp.s, want, got))
}
}
}
func BenchmarkMishmash64(b *testing.B) {
for _ = range b.N {
kvp := RandomWordAndHash()
var hash hash.Hash64 = &Mishmash64{}
hash.Write([]byte(kvp.s))
if want, got := kvp.h64, hash.Sum64(); want != got {
panic(fmt.Sprintf("error: %s; wanted %016x; got %016x\n", kvp.s, want, got))
}
}
}

54763
word_hashes.txt Normal file

File diff suppressed because it is too large Load Diff