Skip to content

Commit

Permalink
crdbtest: fix KeySchema handling of byte-wise prefixes
Browse files Browse the repository at this point in the history
This is a port of the Cockroach KeySchema bug fix in #134349.
  • Loading branch information
jbowens committed Nov 8, 2024
1 parent b2cb561 commit e69d448
Show file tree
Hide file tree
Showing 4 changed files with 205 additions and 156 deletions.
78 changes: 51 additions & 27 deletions internal/crdbtest/crdb.go
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,7 @@ type cockroachKeyWriter struct {
logicalTimes colblk.UintBuilder
untypedVersions colblk.RawBytesBuilder
suffixTypes suffixTypes
prevRoachKeyLen int32
prevSuffix []byte
}

Expand All @@ -449,35 +450,57 @@ func (kw *cockroachKeyWriter) Reset() {
kw.logicalTimes.Reset()
kw.untypedVersions.Reset()
kw.suffixTypes = 0
kw.prevRoachKeyLen = 0
}

func (kw *cockroachKeyWriter) ComparePrev(key []byte) colblk.KeyComparison {
var cmpv colblk.KeyComparison
cmpv.PrefixLen = int32(Split(key)) // TODO(jackson): Inline
prefixLen := Split(key)
if kw.roachKeys.Rows() == 0 {
cmpv.UserKeyComparison = 1
return cmpv
}
lp := kw.roachKeys.UnsafeGet(kw.roachKeys.Rows() - 1)
cmpv.CommonPrefixLen = int32(crbytes.CommonPrefix(lp, key[:cmpv.PrefixLen-1]))
if cmpv.CommonPrefixLen == cmpv.PrefixLen-1 {
// Adjust CommonPrefixLen to include the sentinel byte.
cmpv.CommonPrefixLen = cmpv.PrefixLen
cmpv.UserKeyComparison = int32(ComparePointSuffixes(key[cmpv.PrefixLen:], kw.prevSuffix))
return cmpv
}
// The keys have different MVCC prefixes. We haven't determined which is
// greater, but we know the index at which they diverge. The base.Comparer
// contract dictates that prefixes must be lexicographically ordered.
if len(lp) == int(cmpv.CommonPrefixLen) {
// cmpv.PrefixLen > cmpv.PrefixLenShared; key is greater.
cmpv.UserKeyComparison = +1
} else {
// Both keys have at least 1 additional byte at which they diverge.
// Compare the diverging byte.
cmpv.UserKeyComparison = int32(cmp.Compare(key[cmpv.CommonPrefixLen], lp[cmpv.CommonPrefixLen]))
return colblk.KeyComparison{
PrefixLen: int32(prefixLen),
CommonPrefixLen: 0,
UserKeyComparison: +1,
}
}
lastRoachKey := kw.roachKeys.UnsafeGet(kw.roachKeys.Rows() - 1)
commonPrefixLen := crbytes.CommonPrefix(lastRoachKey, key[:prefixLen-1])
if len(lastRoachKey) == commonPrefixLen {
if invariants.Enabled && len(lastRoachKey) > prefixLen-1 {
panic(errors.AssertionFailedf("out-of-order keys: previous roach key %q > roach key of key %q",
lastRoachKey, key))
}
// All the bytes of the previous roach key form a byte-wise prefix of
// [key]'s prefix. The last byte of the previous prefix is the 0x00
// sentinel byte, which is not stored within roachKeys. It's possible
// that [key] also has a 0x00 byte in the same position (either also
// serving as a sentinel byte, in which case the prefixes are equal, or
// not in which case [key] is greater). In both cases, we need to
// increment CommonPrefixLen.
if key[commonPrefixLen] == 0x00 {
commonPrefixLen++
if commonPrefixLen == prefixLen {
// The prefixes are equal; compare the suffixes.
return colblk.KeyComparison{
PrefixLen: int32(prefixLen),
CommonPrefixLen: int32(commonPrefixLen),
UserKeyComparison: int32(ComparePointSuffixes(key[prefixLen:], kw.prevSuffix)),
}
}
}
// prefixLen > commonPrefixLen; key is greater.
return colblk.KeyComparison{
PrefixLen: int32(prefixLen),
CommonPrefixLen: int32(commonPrefixLen),
UserKeyComparison: +1,
}
}
// Both keys have at least 1 additional byte at which they diverge.
// Compare the diverging byte.
return colblk.KeyComparison{
PrefixLen: int32(prefixLen),
CommonPrefixLen: int32(commonPrefixLen),
UserKeyComparison: int32(cmp.Compare(key[commonPrefixLen], lastRoachKey[commonPrefixLen])),
}
return cmpv
}

func (kw *cockroachKeyWriter) WriteKey(
Expand All @@ -496,9 +519,10 @@ func (kw *cockroachKeyWriter) WriteKey(
// TODO(jackson): Avoid copying the previous suffix.
kw.prevSuffix = append(kw.prevSuffix[:0], key[keyPrefixLen:]...)

// When the roach key is the same, keyPrefixLenSharedWithPrev includes the
// separator byte.
kw.roachKeys.Put(key[:keyPrefixLen-1], min(int(keyPrefixLenSharedWithPrev), int(keyPrefixLen)-1))
// When the roach key is the same or contain the previous key as a prefix,
// keyPrefixLenSharedWithPrev includes the previous key's separator byte.
kw.roachKeys.Put(key[:keyPrefixLen-1], min(int(keyPrefixLenSharedWithPrev), int(kw.prevRoachKeyLen)))
kw.prevRoachKeyLen = keyPrefixLen - 1

// NB: The w.logicalTimes builder was initialized with InitWithDefault, so
// if we don't set a value, the column value is implicitly zero. We only
Expand Down
13 changes: 13 additions & 0 deletions internal/crdbtest/crdb_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"fmt"
"math/rand/v2"
"slices"
"strconv"
"strings"
"testing"
"time"
Expand Down Expand Up @@ -386,6 +387,9 @@ func formatUserKey(key []byte) string {
if key[len(key)-1] != byte(len(suffix)+1) {
panic("invalid suffix length byte")
}
if bytes.ContainsFunc(prefix, func(r rune) bool { return r < ' ' || r > '~' }) {
return fmt.Sprintf("%q @ %X", prefix, suffix)
}
return fmt.Sprintf("%s @ %X", prefix, suffix)
}

Expand Down Expand Up @@ -423,6 +427,15 @@ func formatKV(kv base.InternalKV) string {
// foo @ 0001020304050607
func parseUserKey(userKeyStr string) []byte {
roachKey, versionStr := splitStringAt(userKeyStr, " @ ")

if len(roachKey) >= 2 && roachKey[0] == '"' && roachKey[len(roachKey)-1] == '"' {
var err error
roachKey, err = strconv.Unquote(roachKey)
if err != nil {
panic(fmt.Sprintf("invalid user key string %s: %v", userKeyStr, err))
}
}

// Append sentinel byte.
userKey := append([]byte(roachKey), 0)
if versionStr != "" {
Expand Down
2 changes: 1 addition & 1 deletion internal/crdbtest/key_schema_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ func runDataDrivenTest(t *testing.T, path string) {
e.Add(key, value, 0, kcmp, false /* isObsolete */)
buf = e.MaterializeLastUserKey(buf[:0])
if !Comparer.Equal(key.UserKey, buf) {
td.Fatalf(t, "incorect MaterializeLastKey: %s instead of %s", formatUserKey(buf), formatUserKey(key.UserKey))
td.Fatalf(t, "incorrect MaterializeLastKey: %s instead of %s", formatUserKey(buf), formatUserKey(key.UserKey))
}
}
numRows := e.Rows()
Expand Down
Loading

0 comments on commit e69d448

Please sign in to comment.