Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Low Memory Profile (LMP) Hash #60

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
179 changes: 179 additions & 0 deletions lmphash.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
package lxr

import (
"fmt"
"io"
"os"
"os/user"
)

type LMPHash struct {
fp *os.File // File Pointer to ByteMap
MapSize uint64 // Size of the translation table
MapSizeBits uint64 // Size of the ByteMap in Bits
Passes uint64 // Passes to generate the rand table
Seed uint64 // An arbitrary number used to create the tables.
HashSize uint64 // Number of bytes in the hash
verbose bool
}

// Verbose enables or disables the output of progress indicators to the console
func (lmp *LMPHash) Verbose(val bool) {
lmp.verbose = val
}

// Log is a wrapper function that only prints information when verbose is enabled
func (lmp *LMPHash) Log(msg string) {
if lmp.verbose {
fmt.Println(msg)
}
}

// Init initializes the hash with the given values
//
// We use our own algorithm for initializing the map struct. This is an fairly large table of
// byte values we use to map bytes to other byte values to enhance the avalanche nature of the hash
// as well as increase the memory footprint of the hash.
//
// Seed is a 64 bit starting point
// MapSizeBits is the number of bits to use for the MapSize, i.e. 10 = mapsize of 1024
// HashSize is the number of bits in the hash; truncated to a byte bountry
// Passes is the number of shuffles of the ByteMap performed. Each pass shuffles all byte values in the map
func (lmp *LMPHash) Init(Seed, MapSizeBits, HashSize, Passes uint64) {
if MapSizeBits < 8 {
panic(fmt.Sprintf("Bad Map Size in Bits. Must be between 8 and 34 bits, was %d", MapSizeBits))
}

MapSize := uint64(1) << MapSizeBits
lmp.HashSize = (HashSize + 7) / 8
lmp.MapSize = MapSize
lmp.MapSizeBits = MapSizeBits
lmp.Seed = Seed
lmp.Passes = Passes
lmp.openFile()
}

func (lmp *LMPHash) openFile() {
u, err := user.Current()
if err != nil {
panic(err)
}
userPath := u.HomeDir
lxrhashPath := userPath + "/.lxrhash"

filename := fmt.Sprintf(lxrhashPath+"/lxrhash-seed-%x-passes-%d-size-%d.dat", lmp.Seed, lmp.Passes, lmp.MapSizeBits)
fp, err := os.Open(filename)
if err != nil {
panic("file doesn't exist -- file generation not supported in low mem profile hash")
}
lmp.fp = fp
lmp.Log(fmt.Sprintf("ByteMap file loaded into memory"))
}

func (lmp LMPHash) Hash(src []byte) []byte {
// Keep the byte intermediate results as int64 values until reduced.
hs := make([]uint64, lmp.HashSize)
// as accumulates the state as we walk through applying the source data through the lookup map
// and combine it with the state we are building up.
var as = lmp.Seed
// We keep a series of states, and roll them along through each byte of source processed.
var s1, s2, s3 uint64
// Since MapSize is specified in bits, the index mask is the size-1
mk := lmp.MapSize - 1

buf := make([]byte, 1)
B := func(v uint64) uint64 {
lmp.fp.Seek(int64(v&mk), io.SeekStart)
lmp.fp.Read(buf)
return uint64(buf[0])
}
b := func(v uint64) byte { return byte(B(v)) }

faststep := func(v2 uint64, idx uint64) {
b := B(as ^ v2)
as = as<<7 ^ as>>5 ^ v2<<20 ^ v2<<16 ^ v2 ^ b<<20 ^ b<<12 ^ b<<4
s1 = s1<<9 ^ s1>>3 ^ hs[idx]
hs[idx] = s1 ^ as
s1, s2, s3 = s3, s1, s2
}

// Define a function to move the state by one byte. This is not intended to be fast
// Requires the previous byte read to process the next byte read. Forces serial evaluation
// and removes the possibility of scheduling byte access.
//
// (Note that use of _ = 0 in lines below are to keep go fmt from messing with comments on the right of the page)
step := func(v2 uint64, idx uint64) {
s1 = s1<<9 ^ s1>>1 ^ as ^ B(as>>5^v2)<<3 // Shifts are not random. They are selected to ensure that
s1 = s1<<5 ^ s1>>3 ^ B(s1^v2)<<7 // Prior bytes pulled from the ByteMap contribute to the
s1 = s1<<7 ^ s1>>7 ^ B(as^s1>>7)<<5 // next access of the ByteMap, either by contributing to
s1 = s1<<11 ^ s1>>5 ^ B(v2^as>>11^s1)<<27 // the lower bits of the index, or in the upper bits that
_ = 0 // move the access further in the map.
hs[idx] = s1 ^ as ^ hs[idx]<<7 ^ hs[idx]>>13 //
_ = 0 // We also pay attention not only to where the ByteMap bits
as = as<<17 ^ as>>5 ^ s1 ^ B(as^s1>>27^v2)<<3 // are applied, but what bits we use in the indexing of
as = as<<13 ^ as>>3 ^ B(as^s1)<<7 // the ByteMap
as = as<<15 ^ as>>7 ^ B(as>>7^s1)<<11 //
as = as<<9 ^ as>>11 ^ B(v2^as^s1)<<3 // Tests run against this set of shifts show that the
_ = 0 // bytes pulled from the ByteMap are evenly distributed
s1 = s1<<7 ^ s1>>27 ^ as ^ B(as>>3)<<13 // over possible byte values (0-255) and indexes into
s1 = s1<<3 ^ s1>>13 ^ B(s1^v2)<<11 // the ByteMap are also evenly distributed, and the
s1 = s1<<8 ^ s1>>11 ^ B(as^s1>>11)<<9 // deltas between bytes provided map to a curve expected
s1 = s1<<6 ^ s1>>9 ^ B(v2^as^s1)<<3 // (fewer maximum and minimum deltas, and most deltas around
_ = 0 // zero.
as = as<<23 ^ as>>3 ^ s1 ^ B(as^v2^s1>>3)<<7
as = as<<17 ^ as>>7 ^ B(as^s1>>3)<<5
as = as<<13 ^ as>>5 ^ B(as>>5^s1)<<1
as = as<<11 ^ as>>1 ^ B(v2^as^s1)<<7

s1 = s1<<5 ^ s1>>3 ^ as ^ B(as>>7^s1>>3)<<6
s1 = s1<<8 ^ s1>>6 ^ B(s1^v2)<<11
s1 = s1<<11 ^ s1>>11 ^ B(as^s1>>11)<<5
s1 = s1<<7 ^ s1>>5 ^ B(v2^as>>7^as^s1)<<17

s2 = s2<<3 ^ s2>>17 ^ s1 ^ B(as^s2>>5^v2)<<13
s2 = s2<<6 ^ s2>>13 ^ B(s2)<<11
s2 = s2<<11 ^ s2>>11 ^ B(as^s1^s2>>11)<<23
s2 = s2<<4 ^ s2>>23 ^ B(v2^as>>8^as^s2>>10)<<1

s1 = s2<<3 ^ s2>>1 ^ hs[idx] ^ v2
as = as<<9 ^ as>>7 ^ s1>>1 ^ B(s2>>1^hs[idx])<<5

s1, s2, s3 = s3, s1, s2
}

idx := uint64(0)
// Fast spin to prevent caching state
for _, v2 := range src {
if idx >= lmp.HashSize { // Use an if to avoid modulo math
idx = 0
}
faststep(uint64(v2), idx)
idx++
}

idx = 0
// Actual work to compute the hash
for _, v2 := range src {
if idx >= lmp.HashSize { // Use an if to avoid modulo math
idx = 0
}
step(uint64(v2), idx)
idx++
}

// Reduction pass
// Done by Interating over hs[] to produce the bytes[] hash
//
// At this point, we have HBits of state in hs. We need to reduce them down to a byte,
// And we do so by doing a bit more bitwise math, and mapping the values through our byte map.

bytes := make([]byte, lmp.HashSize)
// Roll over all the hs (one int64 value for every byte in the resulting hash) and reduce them to byte values
for i := len(hs) - 1; i >= 0; i-- {
step(hs[i], uint64(i)) // Step the hash functions and then
bytes[i] = b(as) ^ b(hs[i]) // Xor two resulting sequences
}

// Return the resulting hash
return bytes
}