Skip to content

Commit

Permalink
add lsh
Browse files Browse the repository at this point in the history
  • Loading branch information
sgreben committed Oct 8, 2024
1 parent 69ce465 commit 45196fd
Show file tree
Hide file tree
Showing 20 changed files with 1,328 additions and 90 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
*.out
*.txt
testdata
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ If you need to classify **binary feature vectors that fit into `uint64`s**, this

You can optionally weigh class votes by distance, or specify different vote values per data point.

The sub-package [`lsh`](https://pkg.go.dev/github.com/keilerkonzept/bitknn/lsh) implements several [Locality-Sensitive Hashing (LSH)](https://en.m.wikipedia.org/wiki/Locality-sensitive_hashing) schemes for uint64 feature vectors.

**Contents**
- [Usage](#usage)
- [Options](#options)
Expand Down Expand Up @@ -56,6 +58,7 @@ func main() {
- `WithDistanceWeightingFunc(f func(dist int) float64)`: Use a custom distance weighting function.
- `WithValues(values []float64)`: Assign vote values for each data point.


## Benchmarks

```
Expand Down
5 changes: 4 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,7 @@ module github.com/keilerkonzept/bitknn

go 1.23.0

require github.com/google/go-cmp v0.6.0
require (
github.com/google/go-cmp v0.6.0
pgregory.net/rapid v1.1.0
)
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
pgregory.net/rapid v1.1.0 h1:CMa0sjHSru3puNx+J0MIAuiiEV4N0qj8/cMWGBBCsjw=
pgregory.net/rapid v1.1.0/go.mod h1:PY5XlDGj0+V1FCq0o192FdRhpKHGTRIWBgqjDBTrq04=
59 changes: 59 additions & 0 deletions internal/slice/count_unique.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package slice

func CountUniqueInSorted[T comparable](s []T) int {
out := 0
var previous T
for i, b := range s {
if i == 0 {
previous = b
out = 1
continue
}
if b != previous {
out++
previous = b
}
}
return out
}

type IndexRange struct {
Offset int
Length int
}

func GroupSorted[E any, K comparable](s []E, sKeys []K) (map[K]IndexRange, []K) {
numGroups := CountUniqueInSorted(sKeys)
groups := make(map[K]IndexRange, numGroups)
keys := make([]K, numGroups)
{
var previous K
var previousIdx int
j := 0

for i, b := range sKeys {
if i == 0 {
keys[0] = b
previous = b
previousIdx = 0
j = 1
continue
}
if b != previous {
groups[previous] = IndexRange{
Offset: previousIdx,
Length: i - previousIdx,
}
keys[j] = b
j++
previous = b
previousIdx = i
}
}
groups[previous] = IndexRange{
Offset: previousIdx,
Length: len(s) - previousIdx,
}
}
return groups, keys
}
14 changes: 14 additions & 0 deletions internal/slice/or_alloc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package slice

func OrAlloc[T any](s []T, n int) []T {
if len(s) == n {
return s
}
if len(s) > n {
return s[:n]
}
if cap(s) < n {
return make([]T, n)
}
return s[:n]
}
10 changes: 10 additions & 0 deletions internal/slice/reorder.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package slice

func ReorderInPlace(swap func(i, j int), indices []int) {
for i, targetIdx := range indices {
for targetIdx < i {
targetIdx = indices[targetIdx]
}
swap(i, targetIdx)
}
}
33 changes: 33 additions & 0 deletions internal/testrandom/random.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package testrandom

import "math/rand/v2"

var Source = rand.New(rand.NewPCG(0xB0, 0xA4))

func Query() uint64 {
return Source.Uint64()
}

func Data(size int) []uint64 {
data := make([]uint64, size)
for i := range data {
data[i] = Source.Uint64()
}
return data
}

func Labels(size int) []int {
labels := make([]int, size)
for i := range labels {
labels[i] = int(Source.Uint32N(256))
}
return labels
}

func Values(size int) []float64 {
labels := make([]float64, size)
for i := range labels {
labels[i] = Source.Float64()
}
return labels
}
Loading

0 comments on commit 45196fd

Please sign in to comment.