Skip to content

Commit

Permalink
go/store/nbs: archive_build.go: Improve memory utilization of archive…
Browse files Browse the repository at this point in the history
… build by not preallocating a buffer for 'max' chunk size.
  • Loading branch information
reltuk committed Jan 3, 2025
1 parent e3ff1f7 commit 41509e2
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 10 deletions.
18 changes: 10 additions & 8 deletions go/store/nbs/archive_build.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,23 +225,25 @@ func convertTableFileToArchive(
// cg.print(n, p)
//}

const fourMb = 1 << 22

// Allocate buffer used to compress chunks.
cmpBuff := make([]byte, 0, maxChunkSize)
cmpBuff := make([]byte, 0, fourMb)

cmpDefDict := gozstd.Compress(cmpBuff, defaultDict)
cmpBuff = gozstd.Compress(cmpBuff[:0], defaultDict)
// p("Default Dict Raw vs Compressed: %d , %d\n", len(defaultDict), len(cmpDefDict))

arcW, err := newArchiveWriter()
if err != nil {
return "", hash.Hash{}, err
}
var defaultDictByteSpanId uint32
defaultDictByteSpanId, err = arcW.writeByteSpan(cmpDefDict)
defaultDictByteSpanId, err = arcW.writeByteSpan(cmpBuff)
if err != nil {
return "", hash.Hash{}, err
}

_, grouped, singles, err := writeDataToArchive(ctx, cmpBuff, allChunks, cgList, defaultDictByteSpanId, defaultCDict, arcW, progress, stats)
_, grouped, singles, err := writeDataToArchive(ctx, cmpBuff[:0], allChunks, cgList, defaultDictByteSpanId, defaultCDict, arcW, progress, stats)
if err != nil {
return "", hash.Hash{}, err
}
Expand Down Expand Up @@ -337,9 +339,9 @@ func writeDataToArchive(
if cg.totalBytesSavedWDict > cg.totalBytesSavedDefaultDict {
groupCount++

cmpDict := gozstd.Compress(cmpBuff, cg.dict)
cmpBuff = gozstd.Compress(cmpBuff[:0], cg.dict)

dictId, err := arcW.writeByteSpan(cmpDict)
dictId, err := arcW.writeByteSpan(cmpBuff)
if err != nil {
return 0, 0, 0, err
}
Expand All @@ -351,9 +353,9 @@ func writeDataToArchive(
}

if !arcW.chunkSeen(cs.chunkId) {
compressed := gozstd.CompressDict(cmpBuff, c.Data(), cg.cDict)
cmpBuff = gozstd.CompressDict(cmpBuff[:0], c.Data(), cg.cDict)

dataId, err := arcW.writeByteSpan(compressed)
dataId, err := arcW.writeByteSpan(cmpBuff)
if err != nil {
return 0, 0, 0, err
}
Expand Down
2 changes: 0 additions & 2 deletions integration-tests/bats/archive.bats
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,6 @@ mutations_and_gc_statement() {
dolt sql -q "$(update_statement)"
}

# The `dolt archive` invocation here gets "Killed" in lambda, maybe due to memory use? For now, skip.
# bats test_tags=no_lambda
@test "archive: multiple archives" {
dolt sql -q "$(mutations_and_gc_statement)"
dolt sql -q "$(mutations_and_gc_statement)"
Expand Down

0 comments on commit 41509e2

Please sign in to comment.