Skip to content

Commit

Permalink
fs-v1-multipart: Remove backgroundAppend operation
Browse files Browse the repository at this point in the history
The backgroundAppend was meant as a performance optimization where
uploaded chunks would be aggregated to a complete file in the background
while more parts were uploaded at the same time.

This might be an optimization on actual filesystem paths, but on Gluster
it made the operation a lot slower because many redundant filesystem
calls were executed. The CompleteMultipartUpload request which requires
the uploaded chunks to be put together has to

1. wait for any ongoing backgroundAppend operations to complete,
2. enumerate all chunks and check if they were put together in the right
   order (otherwise start from scratch),
3. move the aggregated file.

Removing this piece of code started out as an experiment, because I
expected the chunks to not be aggregated at all. It turned out that
there is a fallback which is also necessary in case the final object
should have a different order or not contain some of the uploaded parts.
This also ensures that a final aggregate is created. At least on
GlusterFS this makes any CMU request run almost twice as fast as when
using backgroundAppend.

(cherry picked from commit e61a333)
  • Loading branch information
Robert Lützner authored and rluetzner committed Apr 20, 2022
1 parent e78146a commit 96165ac
Showing 1 changed file with 0 additions and 60 deletions.
60 changes: 0 additions & 60 deletions cmd/fs-v1-multipart.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,63 +62,6 @@ func (fs *FSObjects) decodePartFile(name string) (partNumber int, etag string, a
return partNumber, result[1], actualSize, nil
}

// Appends parts to an appendFile sequentially.
func (fs *FSObjects) backgroundAppend(ctx context.Context, bucket, object, uploadID string) {
fs.appendFileMapMu.Lock()
logger.GetReqInfo(ctx).AppendTags("uploadID", uploadID)
file := fs.appendFileMap[uploadID]
if file == nil {
file = &fsAppendFile{
filePath: fmt.Sprintf("%s.%s", uploadID, mustGetUUID()),
}
fs.appendFileMap[uploadID] = file
}
fs.appendFileMapMu.Unlock()

file.Lock()
defer file.Unlock()

// Since we append sequentially nextPartNumber will always be len(file.parts)+1
nextPartNumber := len(file.parts) + 1
uploadIDPath := fs.getUploadIDDir(bucket, object, uploadID)
parts, err := fs.getRemainingUploadedChunks(ctx, bucket, object, uploadID, nextPartNumber)
if err != nil {
logger.LogIf(ctx, err)
return
}

for _, part := range parts {
partNumber, etag, actualSize := part.Number, part.ETag, part.ActualSize
if partNumber < nextPartNumber {
// Part already appended.
continue
}
if partNumber > nextPartNumber {
// Required part number is not yet uploaded.
return
}

partPath := pathJoin(uploadIDPath, fs.encodePartFile(partNumber, etag, actualSize))
entryBuf, err := fs.disk.ReadAll(ctx, minioMetaMultipartBucket, partPath)
if err != nil {
reqInfo := logger.GetReqInfo(ctx).AppendTags("partPath", partPath)
reqInfo.AppendTags("filepath", file.filePath)
logger.LogIf(ctx, err)
return
}
err = fs.disk.AppendFile(ctx, fs.disk.MetaTmpBucket(), file.filePath, entryBuf)
if err != nil {
reqInfo := logger.GetReqInfo(ctx).AppendTags("partPath", partPath)
reqInfo.AppendTags("filepath", file.filePath)
logger.LogIf(ctx, err)
return
}

file.parts = append(file.parts, partNumber)
nextPartNumber++
}
}

// Get uploaded chunks from multipart folder in metabucket
func (fs *FSObjects) getRemainingUploadedChunks(ctx context.Context, bucket, object, uploadID string, nextPartNumber int) ([]ObjectPartInfo, error) {
// Setting count to -1 will read everything.
Expand Down Expand Up @@ -442,8 +385,6 @@ func (fs *FSObjects) PutObjectPart(rctx context.Context, bucket, object, uploadI
return pi, toObjectErr(err, minioMetaMultipartBucket, partPath)
}

go fs.backgroundAppend(wctx, bucket, object, uploadID)

return PartInfo{
PartNumber: partID,
LastModified: UTCNow(),
Expand Down Expand Up @@ -701,7 +642,6 @@ func (fs *FSObjects) CompleteMultipartUpload(ctx context.Context, bucket string,
// 1. The last PutObjectPart triggers go-routine fs.backgroundAppend, this go-routine has not started yet.
// 2. Now CompleteMultipartUpload gets called which sees that lastPart is not appended and starts appending
// from the beginning
fs.backgroundAppend(ctx, bucket, object, uploadID)

fs.appendFileMapMu.Lock()
file := fs.appendFileMap[uploadID]
Expand Down

0 comments on commit 96165ac

Please sign in to comment.