-
-
Notifications
You must be signed in to change notification settings - Fork 525
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
go/store/{nbs,types}: GC: Move the reference walk from types to nbs. #8752
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1598,11 +1598,11 @@ func (nbs *NomsBlockStore) EndGC() { | |
nbs.cond.Broadcast() | ||
} | ||
|
||
func (nbs *NomsBlockStore) MarkAndSweepChunks(ctx context.Context, hashes <-chan []hash.Hash, dest chunks.ChunkStore, mode chunks.GCMode) (chunks.GCFinalizer, error) { | ||
return markAndSweepChunks(ctx, hashes, nbs, nbs, dest, mode) | ||
func (nbs *NomsBlockStore) MarkAndSweepChunks(ctx context.Context, getAddrs chunks.GetAddrsCurry, filter chunks.HasManyFunc, dest chunks.ChunkStore, mode chunks.GCMode) (chunks.MarkAndSweeper, error) { | ||
return markAndSweepChunks(ctx, nbs, nbs, dest, getAddrs, filter, mode) | ||
} | ||
|
||
func markAndSweepChunks(ctx context.Context, hashes <-chan []hash.Hash, nbs *NomsBlockStore, src NBSCompressedChunkStore, dest chunks.ChunkStore, mode chunks.GCMode) (chunks.GCFinalizer, error) { | ||
func markAndSweepChunks(ctx context.Context, nbs *NomsBlockStore, src NBSCompressedChunkStore, dest chunks.ChunkStore, getAddrs chunks.GetAddrsCurry, filter chunks.HasManyFunc, mode chunks.GCMode) (chunks.MarkAndSweeper, error) { | ||
ops := nbs.SupportedOperations() | ||
if !ops.CanGC || !ops.CanPrune { | ||
return nil, chunks.ErrUnsupportedOperation | ||
|
@@ -1647,18 +1647,127 @@ func markAndSweepChunks(ctx context.Context, hashes <-chan []hash.Hash, nbs *Nom | |
destNBS = nbs | ||
} | ||
|
||
specs, err := copyMarkedChunks(ctx, hashes, src, destNBS) | ||
tfp, ok := destNBS.p.(tableFilePersister) | ||
if !ok { | ||
return nil, fmt.Errorf("NBS does not support copying garbage collection") | ||
} | ||
|
||
gcc, err := newGarbageCollectionCopier() | ||
if err != nil { | ||
return nil, err | ||
} | ||
if ctx.Err() != nil { | ||
return nil, ctx.Err() | ||
|
||
return &markAndSweeper{ | ||
src: src, | ||
dest: destNBS, | ||
getAddrs: getAddrs, | ||
filter: filter, | ||
visited: make(hash.HashSet), | ||
tfp: tfp, | ||
gcc: gcc, | ||
mode: mode, | ||
}, nil | ||
} | ||
|
||
type markAndSweeper struct { | ||
src NBSCompressedChunkStore | ||
dest *NomsBlockStore | ||
getAddrs chunks.GetAddrsCurry | ||
filter chunks.HasManyFunc | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i'm hazy on what filter does, when would we discard hashes? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Great question. It's used for generational GC. So, when we collect newgen -> oldgen, we're walking refs and we want to stop the walk anytime we walk into the old gen. Then, after those chunks are in the old gen, when we collect newgen -> newgen, we want to stop the walk once again anytime we walk into the old gen. |
||
|
||
visited hash.HashSet | ||
|
||
tfp tableFilePersister | ||
gcc *gcCopier | ||
mode chunks.GCMode | ||
} | ||
|
||
func (i *markAndSweeper) SaveHashes(ctx context.Context, hashes []hash.Hash) error { | ||
toVisit := make(hash.HashSet, len(hashes)) | ||
for _, h := range hashes { | ||
if _, ok := i.visited[h]; !ok { | ||
toVisit.Insert(h) | ||
} | ||
} | ||
var err error | ||
var mu sync.Mutex | ||
first := true | ||
for { | ||
if !first { | ||
copy := toVisit.Copy() | ||
for h := range toVisit { | ||
if _, ok := i.visited[h]; ok { | ||
delete(copy, h) | ||
} | ||
} | ||
toVisit = copy | ||
} | ||
|
||
toVisit, err = i.filter(ctx, toVisit) | ||
if err != nil { | ||
return err | ||
} | ||
if len(toVisit) == 0 { | ||
break | ||
} | ||
|
||
first = false | ||
nextToVisit := make(hash.HashSet) | ||
|
||
found := 0 | ||
var addErr error | ||
err = i.src.GetManyCompressed(ctx, toVisit, func(ctx context.Context, cc CompressedChunk) { | ||
mu.Lock() | ||
defer mu.Unlock() | ||
if addErr != nil { | ||
return | ||
} | ||
found += 1 | ||
if cc.IsGhost() { | ||
// Ghost chunks encountered on the walk can be left alone --- they | ||
// do not bring their dependencies, and because of how generational | ||
// store works, they will still be ghost chunks | ||
// in the store after the GC is finished. | ||
return | ||
} | ||
addErr = i.gcc.addChunk(ctx, cc) | ||
if addErr != nil { | ||
return | ||
} | ||
c, err := cc.ToChunk() | ||
if err != nil { | ||
addErr = err | ||
return | ||
} | ||
addErr = i.getAddrs(c)(ctx, nextToVisit, func(h hash.Hash) bool { return false }) | ||
}) | ||
if err != nil { | ||
return err | ||
} | ||
if addErr != nil { | ||
return addErr | ||
} | ||
if found != len(toVisit) { | ||
return fmt.Errorf("dangling references requested during GC. GC not successful. %v", toVisit) | ||
} | ||
|
||
i.visited.InsertAll(toVisit) | ||
|
||
toVisit = nextToVisit | ||
} | ||
return nil | ||
} | ||
|
||
func (i *markAndSweeper) Close(ctx context.Context) (chunks.GCFinalizer, error) { | ||
specs, err := i.gcc.copyTablesToDir(ctx, i.tfp) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
return gcFinalizer{ | ||
nbs: destNBS, | ||
nbs: i.dest, | ||
specs: specs, | ||
mode: mode, | ||
mode: i.mode, | ||
}, nil | ||
} | ||
|
||
|
@@ -1684,55 +1793,6 @@ func (gcf gcFinalizer) SwapChunksInStore(ctx context.Context) error { | |
return gcf.nbs.swapTables(ctx, gcf.specs, gcf.mode) | ||
} | ||
|
||
func copyMarkedChunks(ctx context.Context, keepChunks <-chan []hash.Hash, src NBSCompressedChunkStore, dest *NomsBlockStore) ([]tableSpec, error) { | ||
tfp, ok := dest.p.(tableFilePersister) | ||
if !ok { | ||
return nil, fmt.Errorf("NBS does not support copying garbage collection") | ||
} | ||
|
||
gcc, err := newGarbageCollectionCopier() | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
// TODO: We should clean up gcc on error. | ||
|
||
LOOP: | ||
for { | ||
select { | ||
case hs, ok := <-keepChunks: | ||
if !ok { | ||
break LOOP | ||
} | ||
var addErr error | ||
mu := new(sync.Mutex) | ||
hashset := hash.NewHashSet(hs...) | ||
found := 0 | ||
err := src.GetManyCompressed(ctx, hashset, func(ctx context.Context, c CompressedChunk) { | ||
mu.Lock() | ||
defer mu.Unlock() | ||
if addErr != nil { | ||
return | ||
} | ||
found += 1 | ||
addErr = gcc.addChunk(ctx, c) | ||
}) | ||
if err != nil { | ||
return nil, err | ||
} | ||
if addErr != nil { | ||
return nil, addErr | ||
} | ||
if found != len(hashset) { | ||
return nil, fmt.Errorf("dangling references requested during GC. GC not successful. %v", hashset) | ||
} | ||
case <-ctx.Done(): | ||
return nil, ctx.Err() | ||
} | ||
} | ||
return gcc.copyTablesToDir(ctx, tfp) | ||
} | ||
|
||
func (nbs *NomsBlockStore) IterateAllChunks(ctx context.Context, cb func(chunk chunks.Chunk)) error { | ||
for _, v := range nbs.tables.novel { | ||
err := v.iterateAllChunks(ctx, cb) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
somewhat unrelated, but if this embedded
tfp
their relationship might be clearerThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good suggestion! I'll take a pass and potentially send out a separate PR :)