From 55fcc6e95f602b58edc110b851714962507bcc2c Mon Sep 17 00:00:00 2001 From: Radu Berinde Date: Wed, 21 Aug 2024 17:18:21 -0700 Subject: [PATCH] colblk: special case zero bitmaps Some bitmaps are going to be all-zeros in common cases, for example the `isValueExternal` bitmap. It is worth special-casing these to speed up the decoding. We also improve `Bitmap.At` a bit using a shift and a mask to get the offset to the respective word. --- sstable/colblk/bitmap.go | 90 +++++++-- sstable/colblk/bitmap_test.go | 18 +- sstable/colblk/block_test.go | 8 +- sstable/colblk/data_block.go | 14 +- sstable/colblk/testdata/bitmap | 177 ++++++++++++------ sstable/colblk/testdata/block_writer | 3 +- .../colblk/testdata/data_block/bundle_search | 11 +- .../colblk/testdata/data_block/external_value | 7 +- .../colblk/testdata/data_block/next_prefix | 11 +- sstable/colblk/testdata/data_block/simple | 24 ++- 10 files changed, 249 insertions(+), 114 deletions(-) diff --git a/sstable/colblk/bitmap.go b/sstable/colblk/bitmap.go index a89d256d6d..3b5aacd68c 100644 --- a/sstable/colblk/bitmap.go +++ b/sstable/colblk/bitmap.go @@ -26,6 +26,8 @@ import ( // non-zero. The summary bitmap accelerates predecessor and successor // operations. type Bitmap struct { + // data contains the bitmap data, according to defaultBitmapEncoding, or it + // is nil if the bitmap is all zeros. data UnsafeRawSlice[uint64] bitCount int } @@ -38,8 +40,13 @@ var _ Array[bool] = Bitmap{} // performed, so the caller must guarantee the bitmap is appropriately sized and // the provided bitCount correctly identifies the number of bits in the bitmap. func DecodeBitmap(b []byte, off uint32, bitCount int) (bitmap Bitmap, endOffset uint32) { - sz := bitmapRequiredSize(bitCount) + encoding := bitmapEncoding(b[off]) + off++ + if encoding == zeroBitmapEncoding { + return Bitmap{bitCount: bitCount}, off + } off = align(off, align64) + sz := bitmapRequiredSize(bitCount) if len(b) < int(off)+sz { panic(errors.AssertionFailedf("bitmap of %d bits requires at least %d bytes; provided with %d-byte slice", bitCount, bitmapRequiredSize(bitCount), len(b[off:]))) @@ -55,13 +62,25 @@ var _ DecodeFunc[Bitmap] = DecodeBitmap // At returns true if the bit at position i is set and false otherwise. func (b Bitmap) At(i int) bool { - return (b.data.At(i>>6 /* i/64 */) & (1 << uint(i%64))) != 0 + if b.data.ptr == nil { + // zero bitmap case. + return false + } + // Inline b.data.At(i/64). + // The offset of the correct word is i / 64 * 8 = (i >> 3) &^ 0b111 + const mask = ^uintptr(0b111) + val := *(*uint64)(unsafe.Pointer(uintptr(b.data.ptr) + (uintptr(i)>>3)&mask)) + return val&(1<<(uint(i)&63)) != 0 } // Successor returns the next bit greater than or equal to i set in the bitmap. // The i parameter must be in [0, bitCount). Returns the number of bits // represented by the bitmap if no next bit is set. func (b Bitmap) Successor(i int) int { + if b.data.ptr == nil { + // Zero bitmap case. + return b.bitCount + } // nextInWord returns the index of the smallest set bit with an index >= bit // within the provided word. The returned index is an index local to the // word. @@ -127,6 +146,10 @@ func (b Bitmap) Successor(i int) int { // bitmap. The i parameter must be in [0, bitCount). Returns -1 if no previous // bit is set. func (b Bitmap) Predecessor(i int) int { + if b.data.ptr == nil { + // Zero bitmap case. + return -1 + } // prevInWord returns the index of the largest set bit ≤ bit within the // provided word. The returned index is an index local to the word. Returns // -1 if no set bit is found. @@ -204,33 +227,44 @@ func (b Bitmap) String() string { return sb.String() } -// BitmapBuilder constructs a Bitmap. Bits are default false. +// BitmapBuilder constructs a Bitmap. Bits default to false. type BitmapBuilder struct { words []uint64 } +type bitmapEncoding uint8 + +const ( + // defaultBitmapEncoding encodes the bitmap using ⌈n/64⌉ words followed by + // ⌈⌈n/64⌉/64⌉ summary words. + defaultBitmapEncoding bitmapEncoding = iota + // zeroBitmapEncoding is used for the special case when the bitmap is empty. + zeroBitmapEncoding +) + // Assert that BitmapBuilder implements ColumnWriter. var _ ColumnWriter = (*BitmapBuilder)(nil) +// bitmapRequiredSize returns the size of an encoded bitmap in bytes, using the +// defaultBitmapEncoding. func bitmapRequiredSize(total int) int { nWords := (total + 63) >> 6 // divide by 64 nSummaryWords := (nWords + 63) >> 6 // divide by 64 return (nWords + nSummaryWords) << 3 // multiply by 8 } -// Set sets the bit at position i if v is true and clears the bit at position i -// otherwise. Callers need not call Set if v is false and Set(i, true) has not -// been called yet. -func (b *BitmapBuilder) Set(i int, v bool) { +// Set sets the bit at position i to true. +func (b *BitmapBuilder) Set(i int) { w := i >> 6 // divide by 64 for len(b.words) <= w { b.words = append(b.words, 0) } - if v { - b.words[w] |= 1 << uint(i%64) - } else { - b.words[w] &^= 1 << uint(i%64) - } + b.words[w] |= 1 << uint(i%64) +} + +// isZero returns true if no bits are set and Invert was not called. +func (b *BitmapBuilder) isZero() bool { + return len(b.words) == 0 } // Reset resets the bitmap to the empty state. @@ -247,6 +281,21 @@ func (b *BitmapBuilder) DataType(int) DataType { return DataTypeBool } // Size implements the ColumnWriter interface. func (b *BitmapBuilder) Size(rows int, offset uint32) uint32 { + // First byte will be the encoding type. + offset++ + if b.isZero() { + return offset + } + offset = align(offset, align64) + return offset + uint32(bitmapRequiredSize(rows)) +} + +// InvertedSize returns the size of the encoded bitmap, assuming Invert will be called. +func (b *BitmapBuilder) InvertedSize(rows int, offset uint32) uint32 { + // First byte will be the encoding type. + offset++ + // An inverted bitmap will never use all-zeros encoding (even if it happens to + // be all zero). offset = align(offset, align64) return offset + uint32(bitmapRequiredSize(rows)) } @@ -254,6 +303,9 @@ func (b *BitmapBuilder) Size(rows int, offset uint32) uint32 { // Invert inverts the bitmap, setting all bits that are not set and clearing all // bits that are set. If the bitmap's tail is sparse and is not large enough to // represent nRows rows, it's first materialized. +// +// Note that Invert can affect the Size of the bitmap. Use InvertedSize() if you +// intend to invert the bitmap before finishing. func (b *BitmapBuilder) Invert(nRows int) { // If the tail of b is sparse, fill in zeroes before inverting. nBitmapWords := (nRows + 63) >> 6 @@ -266,6 +318,12 @@ func (b *BitmapBuilder) Invert(nRows int) { // Finish finalizes the bitmap, computing the per-word summary bitmap and // writing the resulting data to buf at offset. func (b *BitmapBuilder) Finish(col, nRows int, offset uint32, buf []byte) uint32 { + if b.isZero() { + buf[offset] = byte(zeroBitmapEncoding) + return offset + 1 + } + buf[offset] = byte(defaultBitmapEncoding) + offset++ offset = alignWithZeroes(buf, offset, align64) dest := makeUnsafeRawSlice[uint64](unsafe.Pointer(&buf[offset])) @@ -318,6 +376,14 @@ func (b *BitmapBuilder) WriteDebug(w io.Writer, rows int) { } func bitmapToBinFormatter(f *binfmt.Formatter, rows int) { + encoding := bitmapEncoding(f.PeekUint(1)) + f.HexBytesln(1, "bitmap encoding") + if encoding == zeroBitmapEncoding { + return + } + if encoding != defaultBitmapEncoding { + panic(fmt.Sprintf("unknown bitmap encoding %d", encoding)) + } if aligned := align(f.Offset(), 8); aligned-f.Offset() != 0 { f.HexBytesln(aligned-f.Offset(), "padding to align to 64-bit boundary") } diff --git a/sstable/colblk/bitmap_test.go b/sstable/colblk/bitmap_test.go index 58c5e320af..e544487fe0 100644 --- a/sstable/colblk/bitmap_test.go +++ b/sstable/colblk/bitmap_test.go @@ -32,18 +32,26 @@ func TestBitmapFixed(t *testing.T) { continue } if r == '1' { - builder.Set(n, r == '1') + builder.Set(n) } n++ } td.MaybeScanArgs(t, "rows", &n) - data := make([]byte, builder.Size(n, 0)) + size := builder.Size(n, 0) if td.HasArg("invert") { + size = builder.InvertedSize(n, 0) builder.Invert(n) + if newSize := builder.Size(n, 0); size != newSize { + td.Fatalf(t, "InvertedSize=%d, after invert Size=%d", size, newSize) + } } + data := make([]byte, builder.Size(n, size)) - _ = builder.Finish(0, n, 0, data) + endOffset := builder.Finish(0, n, 0, data) + if endOffset != size { + td.Fatalf(t, "endOffset=%d size=%d", endOffset, size) + } bitmap, _ = DecodeBitmap(data, 0, n) dumpBitmap(&buf, bitmap) fmt.Fprint(&buf, "\nBinary representation:\n") @@ -96,7 +104,7 @@ func TestBitmapRandom(t *testing.T) { for i := 0; i < size; i++ { v[i] = rng.Float64() < p if v[i] { - builder.Set(i, v[i]) + builder.Set(i) } } data := make([]byte, builder.Size(size, 0)) @@ -163,7 +171,7 @@ func BenchmarkBitmapBuilder(b *testing.B) { var builder BitmapBuilder for i := 0; i < size; i++ { if v[i] { - builder.Set(i, v[i]) + builder.Set(i) } } _ = builder.Finish(0, size, 0, data) diff --git a/sstable/colblk/block_test.go b/sstable/colblk/block_test.go index 3f077d3eed..a49a8dd262 100644 --- a/sstable/colblk/block_test.go +++ b/sstable/colblk/block_test.go @@ -130,7 +130,9 @@ func TestBlockWriter(t *testing.T) { for r := range lineFields { v, err := strconv.ParseBool(lineFields[r][c]) panicIfErr(dataType, lineFields[r][c], err) - bb.Set(r, v) + if v { + bb.Set(r) + } } case DataTypeUint: b := colWriters[c].(*UintBuilder) @@ -221,7 +223,9 @@ func buildBlock(schema []testColumnSpec, rows int, data []interface{}) []byte { var bb BitmapBuilder bb.Reset() for row, v := range data[col].([]bool) { - bb.Set(row, v) + if v { + bb.Set(row) + } } cw[col] = &bb case DataTypeUint: diff --git a/sstable/colblk/data_block.go b/sstable/colblk/data_block.go index 967e639a99..33e1fcf383 100644 --- a/sstable/colblk/data_block.go +++ b/sstable/colblk/data_block.go @@ -446,11 +446,11 @@ func (w *DataBlockWriter) Add( ) { w.KeyWriter.WriteKey(w.rows, ikey.UserKey, kcmp.PrefixLen, kcmp.CommonPrefixLen) if kcmp.PrefixEqual() { - w.prefixSame.Set(w.rows, true) + w.prefixSame.Set(w.rows) } w.trailers.Set(w.rows, uint64(ikey.Trailer)) if valuePrefix.IsValueHandle() { - w.isValueExternal.Set(w.rows, true) + w.isValueExternal.Set(w.rows) // Write the value with the value prefix byte preceding the value. w.valuePrefixTmp[0] = byte(valuePrefix) w.values.PutConcat(w.valuePrefixTmp[:], value) @@ -475,7 +475,7 @@ func (w *DataBlockWriter) Size() int { off := blockHeaderSize(len(w.Schema.ColumnTypes)+dataBlockColumnMax, dataBlockCustomHeaderSize) off = w.KeyWriter.Size(w.rows, off) off = w.trailers.Size(w.rows, off) - off = w.prefixSame.Size(w.rows, off) + off = w.prefixSame.InvertedSize(w.rows, off) off = w.values.Size(w.rows, off) off = w.isValueExternal.Size(w.rows, off) off++ // trailer padding byte @@ -490,6 +490,11 @@ func (w *DataBlockWriter) Finish() []byte { Columns: uint16(cols), Rows: uint32(w.rows), } + + // Invert the prefix-same bitmap before writing it out, because we want it + // to represent when the prefix changes. + w.prefixSame.Invert(w.rows) + w.enc.init(w.Size(), h, dataBlockCustomHeaderSize) // Write the max key length in the custom header. @@ -501,9 +506,6 @@ func (w *DataBlockWriter) Finish() []byte { // Write the internal key trailers. w.enc.encode(w.rows, &w.trailers) - // Invert the prefix-same bitmap before writing it out, because we want it - // to represent when the prefix changes. - w.prefixSame.Invert(w.rows) w.enc.encode(w.rows, &w.prefixSame) // Write the value columns. diff --git a/sstable/colblk/testdata/bitmap b/sstable/colblk/testdata/bitmap index 36ccb5fa32..74239c79d4 100644 --- a/sstable/colblk/testdata/bitmap +++ b/sstable/colblk/testdata/bitmap @@ -1,10 +1,48 @@ +build +00000 +---- +00000 +Binary representation: +0-1: x 01 # bitmap encoding + +build invert +00000 +---- +11111 +Binary representation: +00-01: x 00 # bitmap encoding +01-08: x 00000000000000 # padding to align to 64-bit boundary +08-16: b 0001111100000000000000000000000000000000000000000000000000000000 # bitmap word 0 +16-24: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 + +# An inverted bitmap never uses the zero encoding. +build invert +11111 +---- +00000 +Binary representation: +00-01: x 00 # bitmap encoding +01-08: x 00000000000000 # padding to align to 64-bit boundary +08-16: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 0 +16-24: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 + +build rows=100 +0 +---- +0000000000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000000000000 +Binary representation: +0-1: x 01 # bitmap encoding + build 10101011100011100 ---- 10101011100011100 Binary representation: -00-08: b 1101010101110001000000000000000000000000000000000000000000000000 # bitmap word 0 -08-16: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 +00-01: x 00 # bitmap encoding +01-08: x 00000000000000 # padding to align to 64-bit boundary +08-16: b 1101010101110001000000000000000000000000000000000000000000000000 # bitmap word 0 +16-24: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 successor indexes=(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16) ---- @@ -53,8 +91,10 @@ build invert ---- 01010100011100011 Binary representation: -00-08: b 0010101010001110000000010000000000000000000000000000000000000000 # bitmap word 0 -08-16: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 +00-01: x 00 # bitmap encoding +01-08: x 00000000000000 # padding to align to 64-bit boundary +08-16: b 0010101010001110000000010000000000000000000000000000000000000000 # bitmap word 0 +16-24: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 build @@ -62,8 +102,10 @@ build ---- 1 Binary representation: -00-08: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap word 0 -08-16: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 +00-01: x 00 # bitmap encoding +01-08: x 00000000000000 # padding to align to 64-bit boundary +08-16: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap word 0 +16-24: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 # Test sparseness; relying on the tailing bits being implicitly zeroed. @@ -79,23 +121,27 @@ build rows=512 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 Binary representation: -00-08: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap word 0 -08-16: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 1 -16-24: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 2 -24-32: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 3 -32-40: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 4 -40-48: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 5 -48-56: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 6 -56-64: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 7 -64-72: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 +000-001: x 00 # bitmap encoding +001-008: x 00000000000000 # padding to align to 64-bit boundary +008-016: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap word 0 +016-024: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 1 +024-032: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 2 +032-040: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 3 +040-048: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 4 +048-056: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 5 +056-064: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 6 +064-072: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 7 +072-080: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 build invert 1 ---- 0 Binary representation: -00-08: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 0 -08-16: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 +00-01: x 00 # bitmap encoding +01-08: x 00000000000000 # padding to align to 64-bit boundary +08-16: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 0 +16-24: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 # Test sparseness with inversion, relying on the trailing bits being implicitly # set to one. @@ -112,15 +158,17 @@ build invert rows=512 1111111111111111111111111111111111111111111111111111111111111111 1111111111111111111111111111111111111111111111111111111111111111 Binary representation: -00-08: b 1111111011111111111111111111111111111111111111111111111111111111 # bitmap word 0 -08-16: b 1111111111111111111111111111111111111111111111111111111111111111 # bitmap word 1 -16-24: b 1111111111111111111111111111111111111111111111111111111111111111 # bitmap word 2 -24-32: b 1111111111111111111111111111111111111111111111111111111111111111 # bitmap word 3 -32-40: b 1111111111111111111111111111111111111111111111111111111111111111 # bitmap word 4 -40-48: b 1111111111111111111111111111111111111111111111111111111111111111 # bitmap word 5 -48-56: b 1111111111111111111111111111111111111111111111111111111111111111 # bitmap word 6 -56-64: b 1111111111111111111111111111111111111111111111111111111111111111 # bitmap word 7 -64-72: b 1111111100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 +000-001: x 00 # bitmap encoding +001-008: x 00000000000000 # padding to align to 64-bit boundary +008-016: b 1111111011111111111111111111111111111111111111111111111111111111 # bitmap word 0 +016-024: b 1111111111111111111111111111111111111111111111111111111111111111 # bitmap word 1 +024-032: b 1111111111111111111111111111111111111111111111111111111111111111 # bitmap word 2 +032-040: b 1111111111111111111111111111111111111111111111111111111111111111 # bitmap word 3 +040-048: b 1111111111111111111111111111111111111111111111111111111111111111 # bitmap word 4 +048-056: b 1111111111111111111111111111111111111111111111111111111111111111 # bitmap word 5 +056-064: b 1111111111111111111111111111111111111111111111111111111111111111 # bitmap word 6 +064-072: b 1111111111111111111111111111111111111111111111111111111111111111 # bitmap word 7 +072-080: b 1111111100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 # 32-bits wide @@ -129,8 +177,10 @@ build ---- 1010101010111111111110000001110 Binary representation: -00-08: b 0101010111111101000111110011100000000000000000000000000000000000 # bitmap word 0 -08-16: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 +00-01: x 00 # bitmap encoding +01-08: x 00000000000000 # padding to align to 64-bit boundary +08-16: b 0101010111111101000111110011100000000000000000000000000000000000 # bitmap word 0 +16-24: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 # 33-bits wide @@ -139,8 +189,10 @@ build ---- 10101010101111111111100000011101 Binary representation: -00-08: b 0101010111111101000111111011100000000000000000000000000000000000 # bitmap word 0 -08-16: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 +00-01: x 00 # bitmap encoding +01-08: x 00000000000000 # padding to align to 64-bit boundary +08-16: b 0101010111111101000111111011100000000000000000000000000000000000 # bitmap word 0 +16-24: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 # 64-bits wide @@ -149,8 +201,10 @@ build ---- 1010101010111111111110000001110101010101011111111111000000111011 Binary representation: -00-08: b 0101010111111101000111111011100010101010111111100000111111011100 # bitmap word 0 -08-16: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 +00-01: x 00 # bitmap encoding +01-08: x 00000000000000 # padding to align to 64-bit boundary +08-16: b 0101010111111101000111111011100010101010111111100000111111011100 # bitmap word 0 +16-24: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 # 63-bits wide @@ -159,8 +213,10 @@ build ---- 101010101011111111111000000111010101010101111111111100000011101 Binary representation: -00-08: b 0101010111111101000111111011100010101010111111100000111101011100 # bitmap word 0 -08-16: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 +00-01: x 00 # bitmap encoding +01-08: x 00000000000000 # padding to align to 64-bit boundary +08-16: b 0101010111111101000111111011100010101010111111100000111101011100 # bitmap word 0 +16-24: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 # 65-bits wide @@ -171,9 +227,11 @@ build 1010101010111111111110000001110101010101011111111111000000111011 1 Binary representation: -00-08: b 0101010111111101000111111011100010101010111111100000111111011100 # bitmap word 0 -08-16: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap word 1 -16-24: b 0000001100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 +00-01: x 00 # bitmap encoding +01-08: x 00000000000000 # padding to align to 64-bit boundary +08-16: b 0101010111111101000111111011100010101010111111100000111111011100 # bitmap word 0 +16-24: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap word 1 +24-32: b 0000001100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 build 1111111111111111111111111111111111111111111111111111111111111111 @@ -188,12 +246,14 @@ build 1111111111111111111111111111111111111111111111111111111111111111 1111111111111111111111111111111111111111111111111111111111111111 Binary representation: -00-08: b 1111111111111111111111111111111111111111111111111111111111111111 # bitmap word 0 -08-16: b 1111111111111111111111111111111111111111111111111111111111111111 # bitmap word 1 -16-24: b 1111111111111111111111111111111111111111111111111111111111111111 # bitmap word 2 -24-32: b 1111111111111111111111111111111111111111111111111111111111111111 # bitmap word 3 -32-40: b 1111111111111111111111111111111111111111111111111111111111111111 # bitmap word 4 -40-48: b 0001111100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 +000-001: x 00 # bitmap encoding +001-008: x 00000000000000 # padding to align to 64-bit boundary +008-016: b 1111111111111111111111111111111111111111111111111111111111111111 # bitmap word 0 +016-024: b 1111111111111111111111111111111111111111111111111111111111111111 # bitmap word 1 +024-032: b 1111111111111111111111111111111111111111111111111111111111111111 # bitmap word 2 +032-040: b 1111111111111111111111111111111111111111111111111111111111111111 # bitmap word 3 +040-048: b 1111111111111111111111111111111111111111111111111111111111111111 # bitmap word 4 +048-056: b 0001111100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 build 0000000000000000000000000000000000000000000000000000000000000000 @@ -208,12 +268,7 @@ build 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 Binary representation: -00-08: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 0 -08-16: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 1 -16-24: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 2 -24-32: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 3 -32-40: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 4 -40-48: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 +0-1: x 01 # bitmap encoding build 0000000000000000000000000000000000000000000000000000000000000000 @@ -228,12 +283,14 @@ build 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000001 Binary representation: -00-08: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 0 -08-16: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 1 -16-24: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 2 -24-32: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 3 -32-40: b 0000000000000000000000000000000000000000000000000000000010000000 # bitmap word 4 -40-48: b 0001000000000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 +000-001: x 00 # bitmap encoding +001-008: x 00000000000000 # padding to align to 64-bit boundary +008-016: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 0 +016-024: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 1 +024-032: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 2 +032-040: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 3 +040-048: b 0000000000000000000000000000000000000000000000000000000010000000 # bitmap word 4 +048-056: b 0001000000000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 # Write out fewer rows than we set. The bitmap summary should reflect the # truncated view of the bitmap. @@ -248,7 +305,9 @@ build rows=192 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 Binary representation: -00-08: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 0 -08-16: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 1 -16-24: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 2 -24-32: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 +00-01: x 00 # bitmap encoding +01-08: x 00000000000000 # padding to align to 64-bit boundary +08-16: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 0 +16-24: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 1 +24-32: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 2 +32-40: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 diff --git a/sstable/colblk/testdata/block_writer b/sstable/colblk/testdata/block_writer index f86c5e7933..b7b3a7a915 100644 --- a/sstable/colblk/testdata/block_writer +++ b/sstable/colblk/testdata/block_writer @@ -273,7 +273,8 @@ finish 07-08: b 00000001 # bool 08-12: x 0c000000 # page start 12 # data for column 0 -12-16: x 00000000 # padding to align to 64-bit boundary +12-13: x 00 # bitmap encoding +13-16: x 000000 # padding to align to 64-bit boundary 16-24: b 1011011101111101110110110000000000000000000000000000000000000000 # bitmap word 0 24-32: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 32-33: x 00 # block trailer padding diff --git a/sstable/colblk/testdata/data_block/bundle_search b/sstable/colblk/testdata/data_block/bundle_search index c23b7b6a27..6a10e82fd5 100644 --- a/sstable/colblk/testdata/data_block/bundle_search +++ b/sstable/colblk/testdata/data_block/bundle_search @@ -10,7 +10,7 @@ init bundle-size=4 ---- -size=49: +size=50: 0: prefixes: prefixbytes(4): 0 keys 1: suffixes: bytes: 0 rows set; 0 bytes in data 2: trailers: uint: 0 rows @@ -86,7 +86,7 @@ backwash#1,SET:v backwoods#1,SET:v bacteria#1,SET:v ---- -size=745: +size=720: 0: prefixes: prefixbytes(4): 66 keys 1: suffixes: bytes: 66 rows set; 0 bytes in data 2: trailers: uint: 66 rows @@ -371,7 +371,7 @@ finish 550-551: x 80 # encoding: const 551-559: x 0101000000000000 # 64-bit constant: 257 # data for column 3 -559-560: x 00 # padding to align to 64-bit boundary +559-560: x 00 # bitmap encoding 560-568: b 1111111111111111111111111111111111111111111111111111111111111111 # bitmap word 0 568-576: b 0000001100000000000000000000000000000000000000000000000000000000 # bitmap word 1 576-584: b 0000001100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 @@ -514,10 +514,7 @@ finish 716-717: x 76 # data[64]: v 717-718: x 76 # data[65]: v # data for column 5 -718-720: x 0000 # padding to align to 64-bit boundary -720-728: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 0 -728-736: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 1 -736-744: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 +718-719: x 01 # bitmap encoding iter seek-ge backache diff --git a/sstable/colblk/testdata/data_block/external_value b/sstable/colblk/testdata/data_block/external_value index 301f56c688..9b9a36f783 100644 --- a/sstable/colblk/testdata/data_block/external_value +++ b/sstable/colblk/testdata/data_block/external_value @@ -1,6 +1,6 @@ init ---- -size=49: +size=50: 0: prefixes: prefixbytes(16): 0 keys 1: suffixes: bytes: 0 rows set; 0 bytes in data 2: trailers: uint: 0 rows @@ -187,7 +187,8 @@ finish 224-226: x 0100 # data[18] = 1 226-228: x 0000 # data[19] = 0 # data for column 3 -228-232: x 00000000 # padding to align to 64-bit boundary +228-229: x 00 # bitmap encoding +229-232: x 000000 # padding to align to 64-bit boundary 232-240: b 0001000100000100000010110000000000000000000000000000000000000000 # bitmap word 0 240-248: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 # data for column 4 @@ -260,7 +261,7 @@ finish 614-623: x 6c652d6b6977693938 # (continued...) 623-623: x # data[19]: # data for column 5 -623-624: x 00 # padding to align to 64-bit boundary +623-624: x 00 # bitmap encoding 624-632: b 1100111011111011000001110000000000000000000000000000000000000000 # bitmap word 0 632-640: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 diff --git a/sstable/colblk/testdata/data_block/next_prefix b/sstable/colblk/testdata/data_block/next_prefix index f56cd1f200..b1efbcb098 100644 --- a/sstable/colblk/testdata/data_block/next_prefix +++ b/sstable/colblk/testdata/data_block/next_prefix @@ -1,6 +1,6 @@ init ---- -size=49: +size=50: 0: prefixes: prefixbytes(16): 0 keys 1: suffixes: bytes: 0 rows set; 0 bytes in data 2: trailers: uint: 0 rows @@ -30,7 +30,7 @@ blockprefix_kiwi@99#0,SET:kiwi99 blockprefix_kiwi@98#0,SET:kiwi98 blockprefix_lemon@92#0,DEL: ---- -size=425: +size=408: 0: prefixes: prefixbytes(16): 20 keys 1: suffixes: bytes: 20 rows set; 54 bytes in data 2: trailers: uint: 20 rows @@ -187,7 +187,8 @@ finish 224-226: x 0100 # data[18] = 1 226-228: x 0000 # data[19] = 0 # data for column 3 -228-232: x 00000000 # padding to align to 64-bit boundary +228-229: x 00 # bitmap encoding +229-232: x 000000 # padding to align to 64-bit boundary 232-240: b 0001000100000100000010110000000000000000000000000000000000000000 # bitmap word 0 240-248: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 # data for column 4 @@ -237,9 +238,7 @@ finish 400-406: x 6b6977693938 # data[18]: kiwi98 406-406: x # data[19]: # data for column 5 -406-408: x 0000 # padding to align to 64-bit boundary -408-416: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 0 -416-424: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 +406-407: x 01 # bitmap encoding # Scan across the block using next prefix. diff --git a/sstable/colblk/testdata/data_block/simple b/sstable/colblk/testdata/data_block/simple index b1f304a2ff..c3594142df 100644 --- a/sstable/colblk/testdata/data_block/simple +++ b/sstable/colblk/testdata/data_block/simple @@ -1,6 +1,6 @@ init ---- -size=49: +size=50: 0: prefixes: prefixbytes(16): 0 keys 1: suffixes: bytes: 0 rows set; 0 bytes in data 2: trailers: uint: 0 rows @@ -16,7 +16,7 @@ c@9#0,SETWITHDEL:coconut c@6#0,SET:cantelope c@1#0,SET:clementine ---- -size=177: +size=160: 0: prefixes: prefixbytes(16): 6 keys 1: suffixes: bytes: 6 rows set; 13 bytes in data 2: trailers: uint: 6 rows @@ -27,7 +27,7 @@ size=177: write d@11#0,DEL: ---- -size=185: +size=169: 0: prefixes: prefixbytes(16): 7 keys 1: suffixes: bytes: 7 rows set; 16 bytes in data 2: trailers: uint: 7 rows @@ -115,7 +115,8 @@ finish 087-088: x 01 # data[5] = 1 088-089: x 00 # data[6] = 0 # data for column 3 -089-096: x 00000000000000 # padding to align to 64-bit boundary +089-090: x 00 # bitmap encoding +090-096: x 000000000000 # padding to align to 64-bit boundary 096-104: b 0100101100000000000000000000000000000000000000000000000000000000 # bitmap word 0 104-112: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 # data for column 4 @@ -139,9 +140,7 @@ finish 157-167: x 636c656d656e74696e65 # data[5]: clementine 167-167: x # data[6]: # data for column 5 -167-168: x 00 # padding to align to 64-bit boundary -168-176: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 0 -176-184: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 +167-168: x 01 # bitmap encoding iter first @@ -299,7 +298,7 @@ d@11: init ---- -size=49: +size=50: 0: prefixes: prefixbytes(16): 0 keys 1: suffixes: bytes: 0 rows set; 0 bytes in data 2: trailers: uint: 0 rows @@ -326,7 +325,7 @@ aaaaaaaaaaaaaaapproves@10#0,SET:a aaaaaaaaaaaaaaarresting@10#0,SET:a aaaaaaaaaaaaaaarrived@10#0,SET:a ---- -size=353: +size=334: 0: prefixes: prefixbytes(16): 17 keys 1: suffixes: bytes: 17 rows set; 51 bytes in data 2: trailers: uint: 17 rows @@ -451,7 +450,8 @@ finish 265-266: x 80 # encoding: const 266-274: x 0100000000000000 # 64-bit constant: 1 # data for column 3 -274-280: x 000000000000 # padding to align to 64-bit boundary +274-275: x 00 # bitmap encoding +275-280: x 0000000000 # padding to align to 64-bit boundary 280-288: b 1111111111111111000000010000000000000000000000000000000000000000 # bitmap word 0 288-296: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 # data for column 4 @@ -495,9 +495,7 @@ finish 330-331: x 61 # data[15]: a 331-332: x 61 # data[16]: a # data for column 5 -332-336: x 00000000 # padding to align to 64-bit boundary -336-344: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap word 0 -344-352: b 0000000000000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63 +332-333: x 01 # bitmap encoding iter seek-ge aaa