Skip to content

Commit

Permalink
clean up interfaces, update docs
Browse files Browse the repository at this point in the history
  • Loading branch information
sgreben committed Oct 11, 2024
1 parent c7f45e1 commit 72a5bed
Show file tree
Hide file tree
Showing 14 changed files with 233 additions and 111 deletions.
94 changes: 58 additions & 36 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,24 @@ The sub-package [`lsh`](https://pkg.go.dev/github.com/keilerkonzept/bitknn/lsh)

## Usage

There are just three methods you'll usually need:

- **Fit** *(data, labels, [options])*: create a model from a dataset

Variants: [`bitknn.Fit`](https://pkg.go.dev/github.com/keilerkonzept/bitknn#Fit), [`bitknn.FitWide`](https://pkg.go.dev/github.com/keilerkonzept/bitknn#FitWide), [`lsh.Fit`](https://pkg.go.dev/github.com/keilerkonzept/bitknn/lsh#Fit), [`lsh.FitWide`](https://pkg.go.dev/github.com/keilerkonzept/bitknn/lsh#FitWide)
- **Find** *(k, point)*: Given a point, return the *k* nearest neighbor's indices and distances.

Variants: [`bitknn.Model.Find`](https://pkg.go.dev/github.com/keilerkonzept/bitknn#Model.Find), [`bitknn.WideModel.Find`](https://pkg.go.dev/github.com/keilerkonzept/bitknn#WideModel.Find), [`lsh.Model.Find`](https://pkg.go.dev/github.com/keilerkonzept/bitknn/lsh#Model.Find), [`lsh.WideModel.Find`](https://pkg.go.dev/github.com/keilerkonzept/bitknn/lsh#WideModel.Find)

- **Predict** *(k, point, votes)*: Predict the label for a given point based on its nearest neighbors, write the label votes into the provided vote counter.

Variants: [`bitknn.Model.Predict`](https://pkg.go.dev/github.com/keilerkonzept/bitknn#Model.Predict), [`bitknn.WideModel.Predict`](https://pkg.go.dev/github.com/keilerkonzept/bitknn#WideModel.Predict), [`lsh.Model.Predict`](https://pkg.go.dev/github.com/keilerkonzept/bitknn/lsh#Model.Predict), [`lsh.WideModel.Predict`](https://pkg.go.dev/github.com/keilerkonzept/bitknn/lsh#WideModel.Predict)

Each of the above methods is available on each model type. There are four model types in total:

- **Exact k-NN** models: [`bitknn.Model`](https://pkg.go.dev/github.com/keilerkonzept/bitknn#Model) (64 bits), [`bitknn.WideModel`](https://pkg.go.dev/github.com/keilerkonzept/bitknn#WideModel) (*N* * 64 bits)
- **Approximate (ANN)** models: [`lsh.Model`](https://pkg.go.dev/github.com/keilerkonzept/bitknn/lsh#Model) (64 bits), [`lsh.WideModel`](https://pkg.go.dev/github.com/keilerkonzept/bitknn/lsh#WideModel) (*N* * 64 bits)

### Basic usage

```go
Expand All @@ -53,14 +71,16 @@ func main() {
votes := make([]float64, 2)

k := 2
model.Predict1(k, 0b101011, bitknn.VoteSlice(votes))
model.Predict(k, 0b101011, bitknn.VoteSlice(votes))
// or, just return the nearest neighbor's distances and indices:
// distances,indices := model.Find(k, 0b101011)

fmt.Println("Votes:", bitknn.VoteSlice(votes))

// you can also use a map for the votes.
// this is good if you have a very large number of different labels:
votesMap := make(map[int]float64)
model.Predict1(k, 0b101011, bitknn.VoteMap(votesMap))
model.Predict(k, 0b101011, bitknn.VoteMap(votesMap))
fmt.Println("Votes for 0:", votesMap[0])
}
```
Expand Down Expand Up @@ -96,13 +116,15 @@ func main() {
votes := make([]float64, 2)

k := 2
model.Predict1(k, 0b101011, bitknn.VoteSlice(votes))
model.Predict(k, 0b101011, bitknn.VoteSlice(votes))
// or, just return the nearest neighbor's distances and indices:
// distances,indices := model.Find(k, 0b101011)

fmt.Println("Votes:", bitknn.VoteSlice(votes))

// you can also use a map for the votes
votesMap := make(map[int]float64)
model.Predict1(k, 0b101011, bitknn.VoteMap(votesMap))
model.Predict(k, 0b101011, bitknn.VoteMap(votesMap))
fmt.Println("Votes for 0:", votesMap[0])
}
```
Expand Down Expand Up @@ -163,7 +185,7 @@ func main() {

k := 2
query := pack.String("fob")
model.Predict1(k, query, bitknn.VoteSlice(votes))
model.Predict(k, query, bitknn.VoteSlice(votes))

fmt.Println("Votes:", bitknn.VoteSlice(votes))
}
Expand All @@ -188,37 +210,37 @@ pkg: github.com/keilerkonzept/bitknn
cpu: Apple M1 Pro
```

| Op | N | k | Distance weighting | Vote values | sec / op | B/op | allocs/op |
|------------|---------|-----|--------------------|-------------|--------------|------|-----------|
| `Predict1` | 100 | 3 | | | 138.7n ± 22% | 0 | 0 |
| `Predict1` | 100 | 3 | | ☑️ | 127.8n ± 11% | 0 | 0 |
| `Predict1` | 100 | 3 | linear | | 137.0n ± 11% | 0 | 0 |
| `Predict1` | 100 | 3 | linear | ☑️ | 136.7n ± 10% | 0 | 0 |
| `Predict1` | 100 | 3 | quadratic | | 137.2n ± 7% | 0 | 0 |
| `Predict1` | 100 | 3 | quadratic | ☑️ | 130.4n ± 4% | 0 | 0 |
| `Predict1` | 100 | 3 | custom | | 140.6n ± 7% | 0 | 0 |
| `Predict1` | 100 | 3 | custom | ☑️ | 134.9n ± 13% | 0 | 0 |
| `Predict1` | 100 | 10 | | | 307.4n ± 11% | 0 | 0 |
| `Predict1` | 100 | 10 | | ☑️ | 297.8n ± 15% | 0 | 0 |
| `Predict1` | 100 | 10 | linear | | 288.2n ± 18% | 0 | 0 |
| `Predict1` | 100 | 10 | linear | ☑️ | 302.9n ± 14% | 0 | 0 |
| `Predict1` | 100 | 10 | quadratic | | 283.7n ± 15% | 0 | 0 |
| `Predict1` | 100 | 10 | quadratic | ☑️ | 290.0n ± 13% | 0 | 0 |
| `Predict1` | 100 | 10 | custom | | 313.1n ± 17% | 0 | 0 |
| `Predict1` | 100 | 10 | custom | ☑️ | 316.2n ± 11% | 0 | 0 |
| `Predict1` | 100 | 100 | | ☑️ | 545.4n ± 4% | 0 | 0 |
| `Predict1` | 100 | 100 | linear | | 542.4n ± 4% | 0 | 0 |
| `Predict1` | 100 | 100 | linear | ☑️ | 577.5n ± 4% | 0 | 0 |
| `Predict1` | 100 | 100 | quadratic | | 553.1n ± 3% | 0 | 0 |
| `Predict1` | 100 | 100 | quadratic | ☑️ | 582.4n ± 6% | 0 | 0 |
| `Predict1` | 100 | 100 | custom | | 683.8n ± 4% | 0 | 0 |
| `Predict1` | 100 | 100 | custom | ☑️ | 748.5n ± 2% | 0 | 0 |
| `Predict1` | 1000 | 3 | | | 669.5n ± 6% | 0 | 0 |
| `Predict1` | 1000 | 10 | | | 930.3n ± 7% | 0 | 0 |
| `Predict1` | 1000 | 100 | | | 3.762µ ± 5% | 0 | 0 |
| `Predict1` | 1000000 | 3 | | | 532.1µ ± 1% | 0 | 0 |
| `Predict1` | 1000000 | 10 | | | 534.5µ ± 1% | 0 | 0 |
| `Predict1` | 1000000 | 100 | | | 551.7µ ± 1% | 0 | 0 |
| Op | N | k | Distance weighting | Vote values | sec / op | B/op | allocs/op |
|-----------|---------|-----|--------------------|-------------|--------------|------|-----------|
| `Predict` | 100 | 3 | | | 138.7n ± 22% | 0 | 0 |
| `Predict` | 100 | 3 | | ☑️ | 127.8n ± 11% | 0 | 0 |
| `Predict` | 100 | 3 | linear | | 137.0n ± 11% | 0 | 0 |
| `Predict` | 100 | 3 | linear | ☑️ | 136.7n ± 10% | 0 | 0 |
| `Predict` | 100 | 3 | quadratic | | 137.2n ± 7% | 0 | 0 |
| `Predict` | 100 | 3 | quadratic | ☑️ | 130.4n ± 4% | 0 | 0 |
| `Predict` | 100 | 3 | custom | | 140.6n ± 7% | 0 | 0 |
| `Predict` | 100 | 3 | custom | ☑️ | 134.9n ± 13% | 0 | 0 |
| `Predict` | 100 | 10 | | | 307.4n ± 11% | 0 | 0 |
| `Predict` | 100 | 10 | | ☑️ | 297.8n ± 15% | 0 | 0 |
| `Predict` | 100 | 10 | linear | | 288.2n ± 18% | 0 | 0 |
| `Predict` | 100 | 10 | linear | ☑️ | 302.9n ± 14% | 0 | 0 |
| `Predict` | 100 | 10 | quadratic | | 283.7n ± 15% | 0 | 0 |
| `Predict` | 100 | 10 | quadratic | ☑️ | 290.0n ± 13% | 0 | 0 |
| `Predict` | 100 | 10 | custom | | 313.1n ± 17% | 0 | 0 |
| `Predict` | 100 | 10 | custom | ☑️ | 316.2n ± 11% | 0 | 0 |
| `Predict` | 100 | 100 | | ☑️ | 545.4n ± 4% | 0 | 0 |
| `Predict` | 100 | 100 | linear | | 542.4n ± 4% | 0 | 0 |
| `Predict` | 100 | 100 | linear | ☑️ | 577.5n ± 4% | 0 | 0 |
| `Predict` | 100 | 100 | quadratic | | 553.1n ± 3% | 0 | 0 |
| `Predict` | 100 | 100 | quadratic | ☑️ | 582.4n ± 6% | 0 | 0 |
| `Predict` | 100 | 100 | custom | | 683.8n ± 4% | 0 | 0 |
| `Predict` | 100 | 100 | custom | ☑️ | 748.5n ± 2% | 0 | 0 |
| `Predict` | 1000 | 3 | | | 669.5n ± 6% | 0 | 0 |
| `Predict` | 1000 | 10 | | | 930.3n ± 7% | 0 | 0 |
| `Predict` | 1000 | 100 | | | 3.762µ ± 5% | 0 | 0 |
| `Predict` | 1000000 | 3 | | | 532.1µ ± 1% | 0 | 0 |
| `Predict` | 1000000 | 10 | | | 534.5µ ± 1% | 0 | 0 |
| `Predict` | 1000000 | 100 | | | 551.7µ ± 1% | 0 | 0 |

## License

Expand Down
8 changes: 5 additions & 3 deletions example_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,16 @@ func Example() {
votes := make([]float64, 2)

k := 2
model.Predict1(k, 0b101011, bitknn.VoteSlice(votes))
model.Predict(k, 0b101011, bitknn.VoteSlice(votes))
// or, just return the nearest neighbor's distances and indices:
// distances,indices := model.Find(k, 0b101011)

fmt.Println("Votes:", bitknn.VoteSlice(votes))

// you can also use a map for the votes.
// this is good if you have a very large number of different labels:
votesMap := make(map[int]float64)
model.Predict1(k, 0b101011, bitknn.VoteMap(votesMap))
model.Predict(k, 0b101011, bitknn.VoteMap(votesMap))
fmt.Println("Votes for 0:", votesMap[0])
// Output:
// Votes: [0.5 0.25]
Expand All @@ -52,7 +54,7 @@ func ExampleFitWide() {

k := 2
query := pack.String("fob")
model.Predict1(k, query, bitknn.VoteSlice(votes))
model.Predict(k, query, bitknn.VoteSlice(votes))

fmt.Println("Votes:", bitknn.VoteSlice(votes))

Expand Down
4 changes: 2 additions & 2 deletions lsh/example_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@ func Example() {
votes := make([]float64, 2)

k := 2
model.Predict1(k, 0b101011, bitknn.VoteSlice(votes))
model.Predict(k, 0b101011, bitknn.VoteSlice(votes))

fmt.Println("Votes:", bitknn.VoteSlice(votes))

// you can also use a map for the votes
votesMap := make(map[int]float64)
model.Predict1(k, 0b101011, bitknn.VoteMap(votesMap))
model.Predict(k, 0b101011, bitknn.VoteMap(votesMap))
fmt.Println("Votes for 0:", votesMap[0])
// Output:
// Votes: [0.5 0.25]
Expand Down
32 changes: 25 additions & 7 deletions lsh/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,24 +72,42 @@ func Fit(data []uint64, labels []int, hash Hash, opts ...bitknn.Option) *Model {
}
}

// Predict1 predicts the label for a single input using the LSH model.
func (me *Model) Predict1(k int, x uint64, votes bitknn.VoteCounter) int {
// Finds the nearest neighbors of the given point.
// Writes their distances and indices in the dataset into the pre-allocated slices.
// Returns the distance and index slices, truncated to the actual number of neighbors found.
func (me *Model) Find(k int, x uint64) ([]int, []int) {
me.PreallocateHeap(k)
return me.Predict1Into(k, x, votes, me.HeapBucketDistances, me.HeapBucketIDs, me.HeapDistances, me.HeapIndices)
return me.FindInto(k, x, me.HeapBucketDistances, me.HeapBucketIDs, me.HeapDistances, me.HeapIndices)
}

// Finds the nearest neighbors of the given point.
// Writes their distances and indices in the dataset into the provided slices.
// The slices should be pre-allocated to length k+1.
// Returns the distance and index slices, truncated to the actual number of neighbors found.
func (me *Model) FindInto(k int, x uint64, bucketDistances []int, bucketIDs []uint64, distances []int, indices []int) ([]int, []int) {
xp := me.Hash.Hash1(x)
k, _ = Nearest(me.Data, me.BucketIDs, me.Buckets, k, xp, x, bucketDistances, bucketIDs, distances, indices)
return distances[:k], indices[:k]
}

// Predict predicts the label for a single input using the LSH model.
func (me *Model) Predict(k int, x uint64, votes bitknn.VoteCounter) int {
me.PreallocateHeap(k)
return me.PredictInto(k, x, votes, me.HeapBucketDistances, me.HeapBucketIDs, me.HeapDistances, me.HeapIndices)
}

// Predicts the label of a single input point. Each call allocates three new slices of length [k]+1 for the neighbor heaps.
func (me *Model) Predict1Alloc(k int, x uint64, votes bitknn.VoteCounter) int {
func (me *Model) PredictAlloc(k int, x uint64, votes bitknn.VoteCounter) int {
bucketDistances := make([]int, k+1)
bucketIDs := make([]uint64, k+1)
distances := make([]int, k+1)
indices := make([]int, k+1)

return me.Predict1Into(k, x, votes, bucketDistances, bucketIDs, distances, indices)
return me.PredictInto(k, x, votes, bucketDistances, bucketIDs, distances, indices)
}

// Predict1Into predicts the label for a single input using the given slices (of length [k]+1 each) for the neighbor heaps.
func (me *Model) Predict1Into(k int, x uint64, votes bitknn.VoteCounter, bucketDistances []int, bucketIDs []uint64, distances []int, indices []int) int {
// PredictInto predicts the label for a single input using the given slices (of length [k]+1 each) for the neighbor heaps.
func (me *Model) PredictInto(k int, x uint64, votes bitknn.VoteCounter, bucketDistances []int, bucketIDs []uint64, distances []int, indices []int) int {
xp := me.Hash.Hash1(x)
k, n := Nearest(me.Data, me.BucketIDs, me.Buckets, k, xp, x, bucketDistances, bucketIDs, distances, indices)
me.Vote(k, distances, indices, votes)
Expand Down
4 changes: 2 additions & 2 deletions lsh/model_bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
"github.com/keilerkonzept/bitknn/lsh"
)

func Benchmark_Model_Predict1(b *testing.B) {
func Benchmark_Model_Predict(b *testing.B) {
type bench struct {
hashes []lsh.Hash
dataSize []int
Expand All @@ -34,7 +34,7 @@ func Benchmark_Model_Predict1(b *testing.B) {
model.PreallocateHeap(k)
b.ResetTimer()
for n := 0; n < b.N; n++ {
model.Predict1(k, query, bitknn.DiscardVotes)
model.Predict(k, query, bitknn.DiscardVotes)
}
})
}
Expand Down
22 changes: 17 additions & 5 deletions lsh/model_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,23 +87,35 @@ func Test_Model_NoHash_IsExact(t *testing.T) {
knn.PreallocateHeap(k)
ann.PreallocateHeap(k)
for _, q := range queries {
knn.Predict1(k, q, bitknn.VoteSlice(knnVotes))

ann.Predict1(k, q, bitknn.VoteSlice(annVotes))
knn.Predict(k, q, bitknn.VoteSlice(knnVotes))
ann.Predict(k, q, bitknn.VoteSlice(annVotes))
slices.Sort(knn.HeapDistances[:k])
slices.Sort(ann.HeapDistances[:k])
if !reflect.DeepEqual(knn.HeapDistances[:k], ann.HeapDistances[:k]) {
t.Fatal("NoHash ANN should result in the same distances for the nearest neighbors: ", knn.HeapDistances[:k], ann.HeapDistances[:k], knn.HeapIndices[:k], ann.HeapIndices[:k])
}

ann0.Predict1Alloc(k, q, bitknn.VoteSlice(annVotes))
kd, ki := knn.Find(k, q)
ad, ai := ann.Find(k, q)
slices.Sort(kd)
slices.Sort(ad)
if !reflect.DeepEqual(kd, ad) {
t.Fatal("NoHash ANN should result in the same distances for the nearest neighbors: ", kd, ad)
}
slices.Sort(ki)
slices.Sort(ai)
if !reflect.DeepEqual(ki, ai) {
t.Fatal("NoHash ANN should result in the same indices for the nearest neighbors: ", ki, ai)
}

ann0.PredictAlloc(k, q, bitknn.VoteSlice(annVotes))
for i, vk := range knnVotes {
va := annVotes[i]
if math.Abs(vk-va) > eps {
t.Fatalf("ANN: %s: %v: %v %v", pair.name, q, knnVotes, annVotes)
}
}
ann0.Predict1(k, q, bitknn.VoteSlice(annVotes))
ann0.Predict(k, q, bitknn.VoteSlice(annVotes))
for i, vk := range knnVotes {
va := annVotes[i]
if math.Abs(vk-va) > eps {
Expand Down
32 changes: 25 additions & 7 deletions lsh/model_wide.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,24 +65,42 @@ func FitWide(data [][]uint64, labels []int, hash HashWide, opts ...bitknn.Option
}
}

// Predict1 predicts the label for a single input using the LSH model.
func (me *WideModel) Predict1(k int, x []uint64, votes bitknn.VoteCounter) int {
// Finds the nearest neighbors of the given point.
// Writes their distances and indices in the dataset into the pre-allocated slices.
// Returns the distance and index slices, truncated to the actual number of neighbors found.
func (me *WideModel) Find(k int, x []uint64) ([]int, []int) {
me.PreallocateHeap(k)
return me.Predict1Into(k, x, votes, me.HeapBucketDistances, me.HeapBucketIDs, me.Narrow.HeapDistances, me.Narrow.HeapIndices)
return me.FindInto(k, x, me.HeapBucketDistances, me.HeapBucketIDs, me.Narrow.HeapDistances, me.Narrow.HeapIndices)
}

// Finds the nearest neighbors of the given point.
// Writes their distances and indices in the dataset into the provided slices.
// The slices should be pre-allocated to length k+1.
// Returns the distance and index slices, truncated to the actual number of neighbors found.
func (me *WideModel) FindInto(k int, x []uint64, bucketDistances []int, bucketIDs []uint64, distances []int, indices []int) ([]int, []int) {
xp := me.Hash.Hash1Wide(x)
k, _ = NearestWide(me.WideData, me.BucketIDs, me.Buckets, k, xp, x, bucketDistances, bucketIDs, distances, indices)
return distances[:k], indices[:k]
}

// Predict predicts the label for a single input using the LSH model.
func (me *WideModel) Predict(k int, x []uint64, votes bitknn.VoteCounter) int {
me.PreallocateHeap(k)
return me.PredictInto(k, x, votes, me.HeapBucketDistances, me.HeapBucketIDs, me.Narrow.HeapDistances, me.Narrow.HeapIndices)
}

// Predicts the label of a single input point. Each call allocates three new slices of length [k]+1 for the neighbor heaps.
func (me *WideModel) Predict1Alloc(k int, x []uint64, votes bitknn.VoteCounter) int {
func (me *WideModel) PredictAlloc(k int, x []uint64, votes bitknn.VoteCounter) int {
bucketDistances := make([]int, k+1)
bucketIDs := make([]uint64, k+1)
distances := make([]int, k+1)
indices := make([]int, k+1)

return me.Predict1Into(k, x, votes, bucketDistances, bucketIDs, distances, indices)
return me.PredictInto(k, x, votes, bucketDistances, bucketIDs, distances, indices)
}

// Predict1Into predicts the label for a single input using the given slices (of length [k]+1 each) for the neighbor heaps.
func (me *WideModel) Predict1Into(k int, x []uint64, votes bitknn.VoteCounter, bucketDistances []int, bucketIDs []uint64, distances []int, indices []int) int {
// PredictInto predicts the label for a single input using the given slices (of length [k]+1 each) for the neighbor heaps.
func (me *WideModel) PredictInto(k int, x []uint64, votes bitknn.VoteCounter, bucketDistances []int, bucketIDs []uint64, distances []int, indices []int) int {
xp := me.Hash.Hash1Wide(x)
k0, _ := NearestWide(me.WideData, me.BucketIDs, me.Buckets, k, xp, x, bucketDistances, bucketIDs, distances, indices)
me.WideModel.Narrow.Vote(k0, distances, indices, votes)
Expand Down
Loading

0 comments on commit 72a5bed

Please sign in to comment.