Skip to content

Commit

Permalink
colblk: add DataBlockDecoder.Validate
Browse files Browse the repository at this point in the history
Add a Validate method to DataBlockDecoder that validates invariants of the
decoded data block.

Informs #4103.
  • Loading branch information
jbowens committed Oct 25, 2024
1 parent f498510 commit 62cdcb4
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 6 deletions.
47 changes: 47 additions & 0 deletions sstable/colblk/data_block.go
Original file line number Diff line number Diff line change
Expand Up @@ -946,6 +946,53 @@ func (d *DataBlockDecoder) Describe(f *binfmt.Formatter, tp treeprinter.Node) {
f.ToTreePrinter(n)
}

// Validate validates invariants that should hold across all data blocks.
func (d *DataBlockDecoder) Validate(comparer *base.Comparer, keySchema *KeySchema) error {
// TODO(jackson): Consider avoiding these allocations, even if this is only
// called in invariants builds.
n := d.d.header.Rows
meta := &KeySeekerMetadata{}
keySchema.InitKeySeekerMetadata(meta, d)
keySeeker := keySchema.KeySeeker(meta)
prevKey := base.InternalKey{UserKey: make([]byte, 0, d.maximumKeyLength+1)}
var curKey PrefixBytesIter
curKey.Init(int(d.maximumKeyLength), nil)

for i := 0; i < int(n); i++ {
k := base.InternalKey{
UserKey: keySeeker.MaterializeUserKey(&curKey, i-1, i),
Trailer: base.InternalKeyTrailer(d.trailers.At(i)),
}
// Ensure the keys are ordered.
ucmp := comparer.Compare(k.UserKey, prevKey.UserKey)
if ucmp < 0 || (ucmp == 0 && k.Trailer >= prevKey.Trailer) {
return errors.AssertionFailedf("key %s (row %d) and key %s (row %d) are out of order",
prevKey, i-1, k, i)
}
// Ensure the obsolete bit is set if the key is definitively obsolete.
// Not all sources of obsolescence are evident with only a data block
// available (range deletions or point keys in previous blocks may cause
// a key to be obsolete).
if ucmp == 0 && prevKey.Kind() != base.InternalKeyKindMerge && !d.isObsolete.At(i) {
return errors.AssertionFailedf("key %s (row %d) is shadowed by previous key %s but is not marked as obsolete",
k, i, prevKey)
}
// Ensure that the prefix-changed bit is set correctly.
if i > 0 {
currPrefix := comparer.Split.Prefix(k.UserKey)
prevPrefix := comparer.Split.Prefix(prevKey.UserKey)
prefixChanged := !bytes.Equal(prevPrefix, currPrefix)
if prefixChanged != d.prefixChanged.At(i) {
return errors.AssertionFailedf("prefix changed bit for key %q (row %d) is %t, expected %t [prev key was %q]",
k.UserKey, i, d.prefixChanged.At(i), prefixChanged, prevKey.UserKey)
}
}

prevKey.CopyFrom(k)
}
return nil
}

// Assert that *DataBlockIter implements block.DataBlockIterator.
var _ block.DataBlockIterator = (*DataBlockIter)(nil)

Expand Down
8 changes: 8 additions & 0 deletions sstable/colblk/data_block_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ func TestDataBlock(t *testing.T) {
if td.Cmd == "write-block" {
w.Init(&testKeysSchema)
}
var prevKey base.InternalKey
for _, line := range strings.Split(td.Input, "\n") {
line, isObsolete := strings.CutSuffix(line, "obsolete")

Expand All @@ -68,8 +69,12 @@ func TestDataBlock(t *testing.T) {
if strings.HasPrefix(valueString, "valueHandle") {
vp = block.ValueHandlePrefix(kcmp.PrefixEqual(), 0)
}
if kcmp.UserKeyComparison == 0 && prevKey.Kind() != base.InternalKeyKindMerge {
isObsolete = true
}
v := []byte(line[j+1:])
w.Add(ik, v, vp, kcmp, isObsolete)
prevKey = ik
sizes = append(sizes, w.Size())
}
if td.Cmd == "write-block" {
Expand Down Expand Up @@ -105,6 +110,9 @@ func TestDataBlock(t *testing.T) {
tp := treeprinter.New()
r.Describe(f, tp)
fmt.Fprintf(&buf, "LastKey: %s\n%s", lastKey.Pretty(testkeys.Comparer.FormatKey), tp.String())
if err := r.Validate(testkeys.Comparer, &testKeysSchema); err != nil {
fmt.Fprintln(&buf, err)
}
return buf.String()
case "iter":
var seqNum uint64
Expand Down
9 changes: 6 additions & 3 deletions sstable/colblk/testdata/data_block/external_value
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ blockprefix_kiwi@99#0,SET:valueHandle-kiwi99
blockprefix_kiwi@98#0,SET:valueHandle-kiwi98
blockprefix_lemon@92#0,DEL:
----
size=650:
size=673:
0: prefixes: prefixbytes(16): 20 keys
1: suffixes: bytes: 20 rows set; 54 bytes in data
2: trailers: uint: 20 rows
Expand Down Expand Up @@ -260,8 +260,11 @@ data block header
│ ├── 632-640: b 1100111011111011000001110000000000000000000000000000000000000000 # bitmap word 0
│ └── 640-648: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63
├── data for column 6 (bool)
│ └── 648-649: x 01 # zero bitmap encoding
└── 649-650: x 00 # block padding byte
│ ├── 648-649: x 00 # default bitmap encoding
│ ├── 649-656: x 00000000000000 # padding to align to 64-bit boundary
│ ├── 656-664: b 0100000000000000000000000000000000000000000000000000000000000000 # bitmap word 0
│ └── 664-672: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63
└── 672-673: x 00 # block padding byte

# Scan across the block using next.
iter
Expand Down
8 changes: 5 additions & 3 deletions sstable/colblk/testdata/data_block/next_prefix
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ blockprefix_kiwi@99#0,SET:kiwi99
blockprefix_kiwi@98#0,SET:kiwi98
blockprefix_lemon@92#0,DEL:
----
size=417:
size=433:
0: prefixes: prefixbytes(16): 20 keys
1: suffixes: bytes: 20 rows set; 54 bytes in data
2: trailers: uint: 20 rows
Expand Down Expand Up @@ -235,8 +235,10 @@ data block header
├── data for column 5 (bool)
│ └── 414-415: x 01 # zero bitmap encoding
├── data for column 6 (bool)
│ └── 415-416: x 01 # zero bitmap encoding
└── 416-417: x 00 # block padding byte
│ ├── 415-416: x 00 # default bitmap encoding
│ ├── 416-424: b 0100000000000000000000000000000000000000000000000000000000000000 # bitmap word 0
│ └── 424-432: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63
└── 432-433: x 00 # block padding byte

# Scan across the block using next prefix.

Expand Down
8 changes: 8 additions & 0 deletions sstable/colblk_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,14 @@ func (w *RawColumnWriter) enqueueDataBlock(
// it's unnecessary.
w.meta.SetLargestPointKey(lastKey.Clone())

if invariants.Enabled {
var dec colblk.DataBlockDecoder
dec.Init(w.opts.KeySchema, serializedBlock)
if err := dec.Validate(w.comparer, w.opts.KeySchema); err != nil {
panic(err)
}
}

// Serialize the data block, compress it and send it to the write queue.
cb := compressedBlockPool.Get().(*compressedBlock)
cb.blockBuf.checksummer.Type = w.opts.Checksum
Expand Down

0 comments on commit 62cdcb4

Please sign in to comment.