Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(cmSketch test) and add better cmSketch test #325

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 87 additions & 6 deletions sketch_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package ristretto

import (
"math/rand"
"sort"
"strings"
"testing"

"github.com/stretchr/testify/require"
Expand All @@ -16,17 +19,95 @@ func TestSketch(t *testing.T) {
newCmSketch(0)
}

// Temporary comment. This test is fixed, kind of.
// It will now report when two or more rows are identical but that isn't what
// should be tested. What should be tested is that the rows end up having unique
// values from each other. If the rows have identical counters, but they are in
// different positions, that doesn't help; no reason to have multiple rows if
// the increment for one row always changes counters like to does for the next
// row. So the actual test that shows whether row counters are being incremented
// uniquely is the new one below: TestSketchRowUniqueness. There the counters in
// a row are sorted before being compared.
func TestSketchIncrement(t *testing.T) {
s := newCmSketch(16)
s.Increment(1)
s.Increment(5)
s.Increment(9)
pseudoRandomIncrements(s, anyseed, anycount)
for i := 0; i < cmDepth; i++ {
if s.rows[i].string() != s.rows[0].string() {
break
rowi := s.rows[i].string()
for j := 0; j < i; j++ {
rowj := s.rows[j].string()
require.False(t, rowi == rowj, "identical rows, bad hashing")
}
}
}

const anyseed = int64(990099)
const anycount = int(100) // Hopefully not large enough to max out a counter

func pseudoRandomIncrements(s *cmSketch, seed int64, count int) {
r := rand.New(rand.NewSource(anyseed))
for n := 0; n < count; n++ {
s.Increment(r.Uint64())
}
}

// Bad hashing increments because there is very little entropy
// between the values. This is used to test how well the sketch
// uses multiple rows when there is little entropy between the hashes
// being incremented with.
func badHashingIncrements(s *cmSketch, count int) {
for hashed := 0; hashed < count; hashed++ {
s.Increment(uint64(hashed + 1))
}
}

func TestSketchRowUniqueness(t *testing.T) {
// Test the row uniqueness twice.
// Once when the hashes being added are pretty random
// which we would normally expect.
// And once when the hashes added are not normal, maybe
// they are all low numbers for example.
t.Run("WithGoodHashing", func(t *testing.T) {
s := newCmSketch(16)
pseudoRandomIncrements(s, anyseed, anycount)
testSketchRowUniqueness1(t, s)
})
t.Run("WithBadHashing", func(t *testing.T) {
s := newCmSketch(16)
badHashingIncrements(s, anycount)
testSketchRowUniqueness1(t, s)
})
}

func testSketchRowUniqueness1(t *testing.T, s *cmSketch) {
// Perform test like the one above, TestSketchIncrement, but
// compare the rows counters, once the counters are sorted.
// If after several insertions, the rows have the same counters
// in them, the hashing scheme is likely not actually
// creating uniqueness between rows.

var unswiv [cmDepth]string
for i := 0; i < cmDepth; i++ {
unswiv[i] = s.rows[i].string()
}
// Now perform a kind of unswivel on each row, so counters are in ascending order.
for i := 0; i < cmDepth; i++ {
fields := strings.Split(unswiv[i], " ")
sort.Strings(fields)
unswiv[i] = strings.Join(fields, " ")
}
identical := 0
for i := 0; i < cmDepth; i++ {
t.Logf("s.rows[%d] %s, unswiv[%d] %s", i, s.rows[i].string(), i, unswiv[i])

for j := 0; j < i; j++ {
if unswiv[i] == unswiv[j] {
// Even one would be suspect. But count here so we can see how many rows look the same.
identical++
break // break out of j loop, knowing i looks like any earlier is enough, don't want to double count
}
}
require.False(t, i == cmDepth-1, "identical rows, bad seeding")
}
require.Zero(t, identical, "%d unswiveled rows looked like earlier unswiveled rows", identical)
}

func TestSketchEstimate(t *testing.T) {
Expand Down
Loading