Skip to content

Commit

Permalink
sstable: flush value blocks if 128 are buffered
Browse files Browse the repository at this point in the history
  • Loading branch information
dt committed Dec 28, 2023
1 parent 1cce3d0 commit 593fc13
Show file tree
Hide file tree
Showing 6 changed files with 417 additions and 29 deletions.
13 changes: 13 additions & 0 deletions sstable/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,16 @@ type WriterOptions struct {
// RequiredInPlaceValueBound mirrors
// Options.Experimental.RequiredInPlaceValueBound.
RequiredInPlaceValueBound UserKeyPrefixBound

// ValueBlockBufferLimit is the number of value blocks to buffer in-memory
// before flushing them to the underlying writer. Buffering these blocks and
// flushing them in groups, rather than interleaved block-by-block with data
// blocks, potentially improves locality of scans over data blocks in the
// presence of prefetching/read-ahead, page caching, etc.
//
// A value of 0 implies the default of max(8MB/BlockSize, 16) while a value of
// less than 0 disables buffering entirely.
ValueBlockBufferLimit int
}

func (o WriterOptions) ensureDefaults() WriterOptions {
Expand All @@ -288,6 +298,9 @@ func (o WriterOptions) ensureDefaults() WriterOptions {
if o.IndexBlockSize <= 0 {
o.IndexBlockSize = o.BlockSize
}
if o.ValueBlockBufferLimit == 0 {
o.ValueBlockBufferLimit = max(16, 8<<20/o.BlockSize)
}
if o.MergerName == "" {
o.MergerName = base.DefaultMerger.Name
}
Expand Down
315 changes: 315 additions & 0 deletions sstable/testdata/writer_value_blocks
Original file line number Diff line number Diff line change
Expand Up @@ -328,3 +328,318 @@ layout
787 version: 4
791 magic number: 0xf09faab3f09faab3
799 EOF

# Show value block buffering of 2 causing groups of 2 val blocks in the middle.
build block-size=8 buf-limit=2
[email protected]:blue10
[email protected]:blue8
[email protected]:blue8s
[email protected]:blue6isverylong
[email protected]:blue6isverylong1
[email protected]:blue6isverylong1
[email protected]:blue6isverylong1
[email protected]:blue6isverylong1
----
value-blocks: num-values 7, num-blocks: 6, size: 149

layout
----
0 data (33)
0 record (25 = 3 [0] + 15 + 7) [restart]
blue@10#20,1:blue10
25 [restart 0]
33 [trailer compression=none checksum=0x5fb0d551]
38 data (29)
38 record (21 = 3 [0] + 14 + 4) [restart]
blue@8#18,1:value handle {valueLen:5 blockNum:0 offsetInBlock:0}
59 [restart 38]
67 [trailer compression=none checksum=0x628e4a10]
72 data (29)
72 record (21 = 3 [0] + 14 + 4) [restart]
blue@8#16,1:value handle {valueLen:6 blockNum:0 offsetInBlock:5}
93 [restart 72]
101 [trailer compression=none checksum=0x4e65b9b6]
106 data (29)
106 record (21 = 3 [0] + 14 + 4) [restart]
blue@6#15,1:value handle {valueLen:15 blockNum:1 offsetInBlock:0}
127 [restart 106]
135 [trailer compression=none checksum=0xc992640e]
140 value-block (11)
156 value-block (15)
176 data (29)
176 record (21 = 3 [0] + 14 + 4) [restart]
blue@6#14,1:value handle {valueLen:16 blockNum:2 offsetInBlock:0}
197 [restart 176]
205 [trailer compression=none checksum=0x62a8bb33]
210 data (29)
210 record (21 = 3 [0] + 14 + 4) [restart]
blue@6#13,1:value handle {valueLen:16 blockNum:3 offsetInBlock:0}
231 [restart 210]
239 [trailer compression=none checksum=0xc0ab3808]
244 value-block (16)
265 value-block (16)
286 data (29)
286 record (21 = 3 [0] + 14 + 4) [restart]
blue@6#12,1:value handle {valueLen:16 blockNum:4 offsetInBlock:0}
307 [restart 286]
315 [trailer compression=none checksum=0xec7ee24d]
320 data (29)
320 record (21 = 3 [0] + 14 + 4) [restart]
blue@6#11,1:value handle {valueLen:16 blockNum:5 offsetInBlock:0}
341 [restart 320]
349 [trailer compression=none checksum=0xcca2bad9]
354 index (28)
354 block:0/33 [restart]
374 [restart 354]
382 [trailer compression=none checksum=0x32b37f08]
387 index (27)
387 block:38/29 [restart]
406 [restart 387]
414 [trailer compression=none checksum=0x21d27815]
419 index (30)
419 block:72/29 [restart]
441 [restart 419]
449 [trailer compression=none checksum=0xba0b26fe]
454 index (27)
454 block:106/29 [restart]
473 [restart 454]
481 [trailer compression=none checksum=0xf2c4e3d7]
486 index (31)
486 block:176/29 [restart]
509 [restart 486]
517 [trailer compression=none checksum=0x1c2b03b0]
522 index (31)
522 block:210/29 [restart]
545 [restart 522]
553 [trailer compression=none checksum=0x5121de43]
558 index (31)
558 block:286/29 [restart]
581 [restart 558]
589 [trailer compression=none checksum=0x5670ba6d]
594 index (26)
594 block:320/29 [restart]
612 [restart 594]
620 [trailer compression=none checksum=0x5d0eec20]
625 top-index (151)
625 block:354/28 [restart]
646 block:387/27 [restart]
666 block:419/30 [restart]
689 block:454/27 [restart]
709 block:486/31 [restart]
732 block:522/31 [restart]
755 block:558/31 [restart]
778 block:594/26 [restart]
796 [restart 625]
800 [restart 646]
804 [restart 666]
808 [restart 689]
812 [restart 709]
816 [restart 732]
820 [restart 755]
824 [restart 778]
776 [trailer compression=snappy checksum=0x1a8319dc]
781 value-block (16)
802 value-block (16)
823 value-index (24)
852 properties (678)
852 obsolete-key (16) [restart]
868 pebble.num.value-blocks (27)
895 pebble.num.values.in.value-blocks (21)
916 pebble.value-blocks.size (22)
938 rocksdb.block.based.table.index.type (43)
981 rocksdb.block.based.table.prefix.filtering (20)
1001 rocksdb.block.based.table.whole.key.filtering (23)
1024 rocksdb.comparator (37)
1061 rocksdb.compression (16)
1077 rocksdb.compression_options (106)
1183 rocksdb.data.size (14)
1197 rocksdb.deleted.keys (15)
1212 rocksdb.external_sst_file.global_seqno (41)
1253 rocksdb.external_sst_file.version (14)
1267 rocksdb.filter.size (15)
1282 rocksdb.index.partitions (20)
1302 rocksdb.index.size (9)
1311 rocksdb.merge.operands (18)
1329 rocksdb.merge.operator (24)
1353 rocksdb.num.data.blocks (19)
1372 rocksdb.num.entries (11)
1383 rocksdb.num.range-deletions (19)
1402 rocksdb.prefix.extractor.name (31)
1433 rocksdb.property.collectors (34)
1467 rocksdb.raw.key.size (16)
1483 rocksdb.raw.value.size (14)
1497 rocksdb.top-level.index.size (25)
1522 [restart 852]
1530 [trailer compression=none checksum=0xe690121f]
1535 meta-index (64)
1535 pebble.value_index block:823/24 value-blocks-index-lengths: 1(num), 2(offset), 1(length) [restart]
1562 rocksdb.properties block:852/678 [restart]
1587 [restart 1535]
1591 [restart 1562]
1599 [trailer compression=none checksum=0x98d2a4dd]
1604 footer (53)
1604 checksum type: crc32c
1605 meta: offset=1535, length=64
1608 index: offset=625, length=151
1612 [padding]
1645 version: 4
1649 magic number: 0xf09faab3f09faab3
1657 EOF

# Show val block buffering limit of 1 flushing every block.
build block-size=8 buf-limit=1
[email protected]:blue10
[email protected]:blue8
[email protected]:blue8s
[email protected]:blue6isverylong
[email protected]:blue6isverylong1
[email protected]:blue6isverylong1
[email protected]:blue6isverylong1
[email protected]:blue6isverylong1
----
value-blocks: num-values 7, num-blocks: 6, size: 149


layout
----
0 data (33)
0 record (25 = 3 [0] + 15 + 7) [restart]
blue@10#20,1:blue10
25 [restart 0]
33 [trailer compression=none checksum=0x5fb0d551]
38 data (29)
38 record (21 = 3 [0] + 14 + 4) [restart]
blue@8#18,1:value handle {valueLen:5 blockNum:0 offsetInBlock:0}
59 [restart 38]
67 [trailer compression=none checksum=0x628e4a10]
72 data (29)
72 record (21 = 3 [0] + 14 + 4) [restart]
blue@8#16,1:value handle {valueLen:6 blockNum:0 offsetInBlock:5}
93 [restart 72]
101 [trailer compression=none checksum=0x4e65b9b6]
106 value-block (11)
122 data (29)
122 record (21 = 3 [0] + 14 + 4) [restart]
blue@6#15,1:value handle {valueLen:15 blockNum:1 offsetInBlock:0}
143 [restart 122]
151 [trailer compression=none checksum=0xc992640e]
156 value-block (15)
176 data (29)
176 record (21 = 3 [0] + 14 + 4) [restart]
blue@6#14,1:value handle {valueLen:16 blockNum:2 offsetInBlock:0}
197 [restart 176]
205 [trailer compression=none checksum=0x62a8bb33]
210 value-block (16)
231 data (29)
231 record (21 = 3 [0] + 14 + 4) [restart]
blue@6#13,1:value handle {valueLen:16 blockNum:3 offsetInBlock:0}
252 [restart 231]
260 [trailer compression=none checksum=0xc0ab3808]
265 value-block (16)
286 data (29)
286 record (21 = 3 [0] + 14 + 4) [restart]
blue@6#12,1:value handle {valueLen:16 blockNum:4 offsetInBlock:0}
307 [restart 286]
315 [trailer compression=none checksum=0xec7ee24d]
320 data (29)
320 record (21 = 3 [0] + 14 + 4) [restart]
blue@6#11,1:value handle {valueLen:16 blockNum:5 offsetInBlock:0}
341 [restart 320]
349 [trailer compression=none checksum=0xcca2bad9]
354 index (28)
354 block:0/33 [restart]
374 [restart 354]
382 [trailer compression=none checksum=0x32b37f08]
387 index (27)
387 block:38/29 [restart]
406 [restart 387]
414 [trailer compression=none checksum=0x21d27815]
419 index (30)
419 block:72/29 [restart]
441 [restart 419]
449 [trailer compression=none checksum=0xba0b26fe]
454 index (27)
454 block:122/29 [restart]
473 [restart 454]
481 [trailer compression=none checksum=0xcd162eb6]
486 index (31)
486 block:176/29 [restart]
509 [restart 486]
517 [trailer compression=none checksum=0x1c2b03b0]
522 index (31)
522 block:231/29 [restart]
545 [restart 522]
553 [trailer compression=none checksum=0xa8453ba7]
558 index (31)
558 block:286/29 [restart]
581 [restart 558]
589 [trailer compression=none checksum=0x5670ba6d]
594 index (26)
594 block:320/29 [restart]
612 [restart 594]
620 [trailer compression=none checksum=0x5d0eec20]
625 top-index (151)
625 block:354/28 [restart]
646 block:387/27 [restart]
666 block:419/30 [restart]
689 block:454/27 [restart]
709 block:486/31 [restart]
732 block:522/31 [restart]
755 block:558/31 [restart]
778 block:594/26 [restart]
796 [restart 625]
800 [restart 646]
804 [restart 666]
808 [restart 689]
812 [restart 709]
816 [restart 732]
820 [restart 755]
824 [restart 778]
776 [trailer compression=snappy checksum=0x1a8319dc]
781 value-block (16)
802 value-block (16)
823 value-index (24)
852 properties (678)
852 obsolete-key (16) [restart]
868 pebble.num.value-blocks (27)
895 pebble.num.values.in.value-blocks (21)
916 pebble.value-blocks.size (22)
938 rocksdb.block.based.table.index.type (43)
981 rocksdb.block.based.table.prefix.filtering (20)
1001 rocksdb.block.based.table.whole.key.filtering (23)
1024 rocksdb.comparator (37)
1061 rocksdb.compression (16)
1077 rocksdb.compression_options (106)
1183 rocksdb.data.size (14)
1197 rocksdb.deleted.keys (15)
1212 rocksdb.external_sst_file.global_seqno (41)
1253 rocksdb.external_sst_file.version (14)
1267 rocksdb.filter.size (15)
1282 rocksdb.index.partitions (20)
1302 rocksdb.index.size (9)
1311 rocksdb.merge.operands (18)
1329 rocksdb.merge.operator (24)
1353 rocksdb.num.data.blocks (19)
1372 rocksdb.num.entries (11)
1383 rocksdb.num.range-deletions (19)
1402 rocksdb.prefix.extractor.name (31)
1433 rocksdb.property.collectors (34)
1467 rocksdb.raw.key.size (16)
1483 rocksdb.raw.value.size (14)
1497 rocksdb.top-level.index.size (25)
1522 [restart 852]
1530 [trailer compression=none checksum=0xe690121f]
1535 meta-index (64)
1535 pebble.value_index block:823/24 value-blocks-index-lengths: 1(num), 2(offset), 1(length) [restart]
1562 rocksdb.properties block:852/678 [restart]
1587 [restart 1535]
1591 [restart 1562]
1599 [trailer compression=none checksum=0x98d2a4dd]
1604 footer (53)
1604 checksum type: crc32c
1605 meta: offset=1535, length=64
1608 index: offset=625, length=151
1612 [padding]
1645 version: 4
1649 magic number: 0xf09faab3f09faab3
1657 EOF
Loading

0 comments on commit 593fc13

Please sign in to comment.