Skip to content

Commit

Permalink
colblk: move Cockroach-specific tests to crdbtest
Browse files Browse the repository at this point in the history
  • Loading branch information
RaduBerinde committed Oct 22, 2024
1 parent 91e718b commit 5e1747f
Show file tree
Hide file tree
Showing 3 changed files with 313 additions and 326 deletions.
230 changes: 230 additions & 0 deletions internal/crdbtest/crdb_bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,20 @@ package crdbtest

import (
"context"
"fmt"
"math/rand/v2"
"slices"
"testing"
"time"

"github.com/cockroachdb/crlib/crstrings"
"github.com/cockroachdb/pebble/internal/base"
"github.com/cockroachdb/pebble/internal/cache"
"github.com/cockroachdb/pebble/internal/sstableinternal"
"github.com/cockroachdb/pebble/objstorage"
"github.com/cockroachdb/pebble/sstable"
"github.com/cockroachdb/pebble/sstable/block"
"github.com/cockroachdb/pebble/sstable/colblk"
"github.com/stretchr/testify/require"
)

Expand Down Expand Up @@ -140,3 +145,228 @@ func benchmarkRandSeekInSST(
// Stop the timer before any deferred cleanup.
b.StopTimer()
}

func BenchmarkCockroachDataBlockWriter(b *testing.B) {
for _, alphaLen := range []int{4, 8, 26} {
for _, lenSharedPct := range []float64{0.25, 0.5} {
for _, prefixLen := range []int{8, 32, 128} {
lenShared := int(float64(prefixLen) * lenSharedPct)
for _, valueLen := range []int{8, 128, 1024} {
keyConfig := KeyConfig{
PrefixAlphabetLen: alphaLen,
PrefixLen: prefixLen,
PrefixLenShared: lenShared,
PercentLogical: 0,
AvgKeysPerPrefix: 2,
BaseWallTime: uint64(time.Now().UnixNano()),
}
b.Run(fmt.Sprintf("%s,valueLen=%d", keyConfig, valueLen), func(b *testing.B) {
benchmarkCockroachDataBlockWriter(b, keyConfig, valueLen)
})
}
}
}
}
}

func benchmarkCockroachDataBlockWriter(b *testing.B, keyConfig KeyConfig, valueLen int) {
const targetBlockSize = 32 << 10
seed := uint64(time.Now().UnixNano())
rng := rand.New(rand.NewPCG(0, seed))
_, keys, values := generateDataBlock(rng, targetBlockSize, keyConfig, valueLen)

var w colblk.DataBlockEncoder
w.Init(&KeySchema)

b.ResetTimer()
for i := 0; i < b.N; i++ {
w.Reset()
var count int
for w.Size() < targetBlockSize {
ik := base.MakeInternalKey(keys[count], base.SeqNum(rng.Uint64N(uint64(base.SeqNumMax))), base.InternalKeyKindSet)
kcmp := w.KeyWriter.ComparePrev(ik.UserKey)
w.Add(ik, values[count], block.InPlaceValuePrefix(kcmp.PrefixEqual()), kcmp, false /* isObsolete */)
count++
}
_, _ = w.Finish(w.Rows(), w.Size())
}
}

func BenchmarkCockroachDataBlockIterFull(b *testing.B) {
for _, alphaLen := range []int{4, 8, 26} {
for _, lenSharedPct := range []float64{0.25, 0.5} {
for _, prefixLen := range []int{8, 32, 128} {
lenShared := int(float64(prefixLen) * lenSharedPct)
for _, avgKeysPerPrefix := range []int{1, 10, 100} {
for _, percentLogical := range []int{0, 50} {
for _, valueLen := range []int{8, 128, 1024} {
cfg := benchConfig{
KeyConfig: KeyConfig{
PrefixAlphabetLen: alphaLen,
PrefixLen: prefixLen,
PrefixLenShared: lenShared,
AvgKeysPerPrefix: avgKeysPerPrefix,
PercentLogical: percentLogical,
},
ValueLen: valueLen,
}
b.Run(cfg.String(), func(b *testing.B) {
benchmarkCockroachDataBlockIter(b, cfg, block.IterTransforms{})
})
}
}
}
}
}
}
}

var shortBenchConfigs = []benchConfig{
{
KeyConfig: KeyConfig{
PrefixAlphabetLen: 8,
PrefixLen: 8,
PrefixLenShared: 4,
AvgKeysPerPrefix: 4,
PercentLogical: 10,
},
ValueLen: 8,
},
{
KeyConfig: KeyConfig{
PrefixAlphabetLen: 8,
PrefixLen: 128,
PrefixLenShared: 64,
AvgKeysPerPrefix: 4,
PercentLogical: 10,
},
ValueLen: 128,
},
}

func BenchmarkCockroachDataBlockIterShort(b *testing.B) {
for _, cfg := range shortBenchConfigs {
b.Run(cfg.String(), func(b *testing.B) {
benchmarkCockroachDataBlockIter(b, cfg, block.IterTransforms{})
})
}
}

func BenchmarkCockroachDataBlockIterTransforms(b *testing.B) {
transforms := []struct {
description string
transforms block.IterTransforms
}{
{},
{
description: "SynthSeqNum",
transforms: block.IterTransforms{
SyntheticSeqNum: 1234,
},
},
{
description: "HideObsolete",
transforms: block.IterTransforms{
HideObsoletePoints: true,
},
},
{
description: "SyntheticPrefix",
transforms: block.IterTransforms{
SyntheticPrefixAndSuffix: block.MakeSyntheticPrefixAndSuffix([]byte("prefix_"), nil),
},
},
{
description: "SyntheticSuffix",
transforms: block.IterTransforms{
SyntheticPrefixAndSuffix: block.MakeSyntheticPrefixAndSuffix(
nil,
EncodeTimestamp(make([]byte, 0, 20), 1_000_000_000_000, 0)[1:],
),
},
},
}
for _, cfg := range shortBenchConfigs {
for _, t := range transforms {
name := cfg.String() + crstrings.If(t.description != "", ","+t.description)
b.Run(name, func(b *testing.B) {
benchmarkCockroachDataBlockIter(b, cfg, t.transforms)
})
}
}
}

type benchConfig struct {
KeyConfig
ValueLen int
}

func (cfg benchConfig) String() string {
return fmt.Sprintf("%s,ValueLen=%d", cfg.KeyConfig, cfg.ValueLen)
}

func benchmarkCockroachDataBlockIter(
b *testing.B, cfg benchConfig, transforms block.IterTransforms,
) {
const targetBlockSize = 32 << 10
seed := uint64(time.Now().UnixNano())
rng := rand.New(rand.NewPCG(0, seed))
cfg.BaseWallTime = seed

serializedBlock, keys, _ := generateDataBlock(rng, targetBlockSize, cfg.KeyConfig, cfg.ValueLen)

var decoder colblk.DataBlockDecoder
var it colblk.DataBlockIter
it.InitOnce(&KeySchema, Compare, Split, getLazyValuer(func([]byte) base.LazyValue {
return base.LazyValue{ValueOrHandle: []byte("mock external value")}
}))
decoder.Init(&KeySchema, serializedBlock)
if err := it.Init(&decoder, transforms); err != nil {
b.Fatal(err)
}
avgRowSize := float64(len(serializedBlock)) / float64(len(keys))

if transforms.HasSyntheticPrefix() {
for i := range keys {
keys[i] = slices.Concat(transforms.SyntheticPrefix(), keys[i])
}
}

b.Run("Next", func(b *testing.B) {
kv := it.First()
b.ResetTimer()
for i := 0; i < b.N; i++ {
if kv == nil {
kv = it.First()
} else {
kv = it.Next()
}
}
b.StopTimer()
b.ReportMetric(avgRowSize, "bytes/row")
})
for _, queryLatest := range []bool{false, true} {
b.Run("SeekGE"+crstrings.If(queryLatest, "Latest"), func(b *testing.B) {
rng := rand.New(rand.NewPCG(1, seed))
const numQueryKeys = 65536
baseWallTime := cfg.BaseWallTime
if queryLatest {
baseWallTime += 24 * uint64(time.Hour)
}
queryKeys := RandomQueryKeys(rng, numQueryKeys, keys, baseWallTime)
b.ResetTimer()
for i := 0; i < b.N; i++ {
k := queryKeys[i%numQueryKeys]
if kv := it.SeekGE(k, base.SeekGEFlagsNone); kv == nil {
// SeekGE should always end up finding a key if we are querying for the
// latest version of each prefix and we are not hiding any points.
if queryLatest && !transforms.HideObsoletePoints {
b.Fatalf("%q not found", k)
}
}
}
b.StopTimer()
b.ReportMetric(avgRowSize, "bytes/row")
})
}
}
83 changes: 83 additions & 0 deletions internal/crdbtest/crdb_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ import (
"github.com/cockroachdb/datadriven"
"github.com/cockroachdb/pebble/internal/base"
"github.com/cockroachdb/pebble/internal/testutils"
"github.com/cockroachdb/pebble/sstable/block"
"github.com/cockroachdb/pebble/sstable/colblk"
"github.com/stretchr/testify/require"
)

func TestComparer(t *testing.T) {
Expand Down Expand Up @@ -122,3 +125,83 @@ func TestRandKeys(t *testing.T) {
return buf.String()
})
}

func TestCockroachDataBlock(t *testing.T) {
const targetBlockSize = 32 << 10
const valueLen = 100
seed := uint64(time.Now().UnixNano())
t.Logf("seed: %d", seed)
rng := rand.New(rand.NewPCG(0, seed))
serializedBlock, keys, values := generateDataBlock(rng, targetBlockSize, KeyConfig{
PrefixAlphabetLen: 26,
PrefixLen: 12,
PercentLogical: rng.IntN(25),
AvgKeysPerPrefix: 2,
BaseWallTime: seed,
}, valueLen)

var decoder colblk.DataBlockDecoder
var it colblk.DataBlockIter
it.InitOnce(&KeySchema, Compare, Split, getLazyValuer(func([]byte) base.LazyValue {
return base.LazyValue{ValueOrHandle: []byte("mock external value")}
}))
decoder.Init(&KeySchema, serializedBlock)
if err := it.Init(&decoder, block.IterTransforms{}); err != nil {
t.Fatal(err)
}

t.Run("Next", func(t *testing.T) {
// Scan the block using Next and ensure that all the keys values match.
i := 0
for kv := it.First(); kv != nil; i, kv = i+1, it.Next() {
if !bytes.Equal(kv.K.UserKey, keys[i]) {
t.Fatalf("expected %q, but found %q", keys[i], kv.K.UserKey)
}
if !bytes.Equal(kv.V.InPlaceValue(), values[i]) {
t.Fatalf("expected %x, but found %x", values[i], kv.V.InPlaceValue())
}
}
require.Equal(t, len(keys), i)
})
t.Run("SeekGE", func(t *testing.T) {
rng := rand.New(rand.NewPCG(0, seed))
for _, i := range rng.Perm(len(keys)) {
kv := it.SeekGE(keys[i], base.SeekGEFlagsNone)
if kv == nil {
t.Fatalf("%q not found", keys[i])
}
if !bytes.Equal(kv.V.InPlaceValue(), values[i]) {
t.Fatalf(
"expected:\n %x\nfound:\n %x\nquery key:\n %x\nreturned key:\n %x",
values[i], kv.V.InPlaceValue(), keys[i], kv.K.UserKey)
}
}
})
}

// generateDataBlock writes out a random cockroach data block using the given
// parameters. Returns the serialized block data and the keys and values
// written.
func generateDataBlock(
rng *rand.Rand, targetBlockSize int, cfg KeyConfig, valueLen int,
) (data []byte, keys [][]byte, values [][]byte) {
keys, values = RandomKVs(rng, targetBlockSize/valueLen, cfg, valueLen)

var w colblk.DataBlockEncoder
w.Init(&KeySchema)
count := 0
for w.Size() < targetBlockSize {
ik := base.MakeInternalKey(keys[count], base.SeqNum(rng.Uint64N(uint64(base.SeqNumMax))), base.InternalKeyKindSet)
kcmp := w.KeyWriter.ComparePrev(ik.UserKey)
w.Add(ik, values[count], block.InPlaceValuePrefix(kcmp.PrefixEqual()), kcmp, false /* isObsolete */)
count++
}
data, _ = w.Finish(w.Rows(), w.Size())
return data, keys[:count], values[:count]
}

type getLazyValuer func([]byte) base.LazyValue

func (g getLazyValuer) GetLazyValueForPrefixAndValueHandle(handle []byte) base.LazyValue {
return g(handle)
}
Loading

0 comments on commit 5e1747f

Please sign in to comment.