diff --git a/internal/compact/testdata/split_and_encode_span b/internal/compact/testdata/split_and_encode_span index d92f9dc8d2..eb3a1e9dd1 100644 --- a/internal/compact/testdata/split_and_encode_span +++ b/internal/compact/testdata/split_and_encode_span @@ -32,7 +32,7 @@ Encoded: b-c:{(#9,RANGEKEYUNSET,@3) (#8,RANGEKEYSET,@3,foo5) (#4,RANGEKEYSET,@ Remaining: . set -a-c:{(#9,RANGEDEL) (#8,RANGEDEL) (#4,RANGEDEL)) +a-c:{(#9,RANGEDEL) (#8,RANGEDEL) (#4,RANGEDEL)} ---- encode up-to=b diff --git a/internal/keyspan/span.go b/internal/keyspan/span.go index 12a94dcb2b..335b10ed3d 100644 --- a/internal/keyspan/span.go +++ b/internal/keyspan/span.go @@ -491,9 +491,12 @@ func ParseSpan(input string) Span { // Each of the remaining parts represents a single Key. s.Keys = make([]Key, 0, len(parts)-2) for _, p := range parts[2:] { + if len(p) >= 2 && p[0] == '(' && p[len(p)-1] == ')' { + p = p[1 : len(p)-1] + } keyFields := strings.FieldsFunc(p, func(r rune) bool { switch r { - case '#', ',', '(', ')': + case '#', ',': return true default: return unicode.IsSpace(r) diff --git a/sstable/rowblk/rowblk_fragment_iter_test.go b/sstable/rowblk/rowblk_fragment_iter_test.go new file mode 100644 index 0000000000..3d1f4fde4a --- /dev/null +++ b/sstable/rowblk/rowblk_fragment_iter_test.go @@ -0,0 +1,122 @@ +// Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package rowblk + +import ( + "fmt" + "strings" + "testing" + + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/cache" + "github.com/cockroachdb/pebble/internal/keyspan" + "github.com/cockroachdb/pebble/internal/rangedel" + "github.com/cockroachdb/pebble/internal/rangekey" + "github.com/cockroachdb/pebble/sstable/block" + "github.com/stretchr/testify/require" +) + +func TestBlockFragmentIterator(t *testing.T) { + comparer := base.DefaultComparer + var cacheVal *cache.Value + c := cache.New(1024) + defer func() { + c.Unref() + if cacheVal != nil { + cache.Free(cacheVal) + } + }() + + datadriven.RunTest(t, "testdata/rowblk_fragment_iter", func(t *testing.T, d *datadriven.TestData) string { + var buf strings.Builder + switch d.Cmd { + case "build": + var spans []keyspan.Span + fragmenter := keyspan.Fragmenter{ + Cmp: comparer.Compare, + Format: comparer.FormatKey, + Emit: func(s keyspan.Span) { + spans = append(spans, s) + }, + } + for _, l := range strings.Split(d.Input, "\n") { + if l == "" { + continue + } + span := keyspan.ParseSpan(l) + fragmenter.Add(span) + } + fragmenter.Finish() + // Range del or range key blocks always use restart interval 1. + w := Writer{RestartInterval: 1} + emitFn := func(k base.InternalKey, v []byte) error { + w.Add(k, v) + return nil + } + for _, s := range spans { + if s.Keys[0].Kind() == base.InternalKeyKindRangeDelete { + rangedel.Encode(&s, emitFn) + } else { + rangekey.Encode(&s, emitFn) + } + } + blockData := w.Finish() + + oldCacheVal := cacheVal + cacheVal = cache.Alloc(len(blockData)) + copy(cacheVal.Buf(), blockData) + c.Set(1, 0, 0, cacheVal) + if oldCacheVal != nil { + cache.Free(oldCacheVal) + } + + for _, s := range spans { + buf.WriteString(s.String() + "\n") + } + + case "iter": + var transforms block.FragmentIterTransforms + transforms.ElideSameSeqNum = d.HasArg("elide-same-seq-num") + var seqNum uint64 + d.MaybeScanArgs(t, "synthetic-seq-num", &seqNum) + transforms.SyntheticSeqNum = block.SyntheticSeqNum(seqNum) + + blockHandle := block.CacheBufferHandle(c.Get(1, 0, 0)) + i, err := NewFragmentIter(comparer.Compare, comparer.Split, blockHandle, transforms) + defer i.Close() + require.NoError(t, err) + + for _, l := range strings.Split(d.Input, "\n") { + if l == "" { + continue + } + var span *keyspan.Span + var err error + fields := strings.Fields(l) + switch fields[0] { + case "first": + span, err = i.First() + case "last": + span, err = i.Last() + case "next": + span, err = i.Next() + case "prev": + span, err = i.Prev() + case "seek-ge": + span, err = i.SeekGE([]byte(fields[1])) + case "seek-lt": + span, err = i.SeekLT([]byte(fields[1])) + } + require.NoError(t, err) + fmt.Fprintf(&buf, "%8s: %v\n", fields[0], span) + } + + default: + d.Fatalf(t, "unknown command %s", d.Cmd) + } + return buf.String() + }) +} diff --git a/sstable/rowblk/testdata/rowblk_fragment_iter b/sstable/rowblk/testdata/rowblk_fragment_iter new file mode 100644 index 0000000000..cb12705f65 --- /dev/null +++ b/sstable/rowblk/testdata/rowblk_fragment_iter @@ -0,0 +1,83 @@ +build +a-d:{(#11,RANGEDEL)} +b-e:{(#12,RANGEDEL) (#11,RANGEDEL)} +---- +a-b:{(#11,RANGEDEL)} +b-d:{(#12,RANGEDEL) (#11,RANGEDEL) (#11,RANGEDEL)} +d-e:{(#12,RANGEDEL) (#11,RANGEDEL)} + +iter +first +next +next +next +prev +prev +prev +last +next +prev +prev +prev +prev +---- + first: a-b:{(#11,RANGEDEL)} + next: b-d:{(#12,RANGEDEL) (#11,RANGEDEL) (#11,RANGEDEL)} + next: d-e:{(#12,RANGEDEL) (#11,RANGEDEL)} + next: + prev: d-e:{(#12,RANGEDEL) (#11,RANGEDEL)} + prev: b-d:{(#12,RANGEDEL) (#11,RANGEDEL) (#11,RANGEDEL)} + prev: a-b:{(#11,RANGEDEL)} + last: d-e:{(#12,RANGEDEL) (#11,RANGEDEL)} + next: + prev: d-e:{(#12,RANGEDEL) (#11,RANGEDEL)} + prev: b-d:{(#12,RANGEDEL) (#11,RANGEDEL) (#11,RANGEDEL)} + prev: a-b:{(#11,RANGEDEL)} + prev: + +iter +seek-ge c +seek-lt g +---- + seek-ge: b-d:{(#12,RANGEDEL) (#11,RANGEDEL) (#11,RANGEDEL)} + seek-lt: d-e:{(#12,RANGEDEL) (#11,RANGEDEL)} + +# Check that seeking outside all ranges positions the iterator properly. +iter +seek-ge z +prev +seek-lt a +next +---- + seek-ge: + prev: d-e:{(#12,RANGEDEL) (#11,RANGEDEL)} + seek-lt: + next: a-b:{(#11,RANGEDEL)} + + +iter elide-same-seq-num +first +next +next +---- + first: a-b:{(#11,RANGEDEL)} + next: b-d:{(#12,RANGEDEL) (#11,RANGEDEL)} + next: d-e:{(#12,RANGEDEL) (#11,RANGEDEL)} + +iter synthetic-seq-num=10 +first +next +next +---- + first: a-b:{(#10,RANGEDEL)} + next: b-d:{(#10,RANGEDEL) (#10,RANGEDEL) (#10,RANGEDEL)} + next: d-e:{(#10,RANGEDEL) (#10,RANGEDEL)} + +iter synthetic-seq-num=10 elide-same-seq-num +first +next +next +---- + first: a-b:{(#10,RANGEDEL)} + next: b-d:{(#10,RANGEDEL)} + next: d-e:{(#10,RANGEDEL)}