From 188f5bb314d333803ba0ce37f94dc7451ecdd9e1 Mon Sep 17 00:00:00 2001 From: Matt Joiner Date: Sat, 2 Mar 2024 13:00:28 +1100 Subject: [PATCH] Improve segment handling for discontiguous extents --- metainfo/file-tree.go | 15 ++++---- metainfo/info.go | 8 ++++- segments/index.go | 28 ++++++++++----- segments/segments.go | 45 +++++++++++++++-------- segments/segments_test.go | 14 +++++--- storage/file-misc.go | 46 +++++++++--------------- storage/file-misc_test.go | 75 +++++++++++++++++++++++++++++++++++---- 7 files changed, 159 insertions(+), 72 deletions(-) diff --git a/metainfo/file-tree.go b/metainfo/file-tree.go index 0d7e1caf3b..2b651bd446 100644 --- a/metainfo/file-tree.go +++ b/metainfo/file-tree.go @@ -11,12 +11,15 @@ import ( const FileTreePropertiesKey = "" +type FileTreeFile struct { + Length int64 `bencode:"length"` + PiecesRoot string `bencode:"pieces root"` +} + +// The fields here don't need bencode tags as the marshalling is done manually. type FileTree struct { - File struct { - Length int64 `bencode:"length"` - PiecesRoot string `bencode:"pieces root"` - } - Dir map[string]FileTree + File FileTreeFile + Dir map[string]FileTree } func (ft *FileTree) UnmarshalBencode(bytes []byte) (err error) { @@ -107,7 +110,7 @@ func (ft *FileTree) Walk(path []string, f func(path []string, ft *FileTree)) { } func (ft *FileTree) PiecesRootAsByteArray() (ret g.Option[[32]byte]) { - if ft.File.Length == 0 { + if ft.File.PiecesRoot == "" { return } n := copy(ret.Value[:], ft.File.PiecesRoot) diff --git a/metainfo/info.go b/metainfo/info.go index 8b8749c7c1..5d6300ec51 100644 --- a/metainfo/info.go +++ b/metainfo/info.go @@ -168,7 +168,13 @@ func (info *Info) UpvertedFiles() (files []FileInfo) { Path: nil, }} } - return info.Files + var offset int64 + for _, fi := range info.Files { + fi.TorrentOffset = offset + offset += fi.Length + files = append(files, fi) + } + return } func (info *Info) Piece(index int) Piece { diff --git a/segments/index.go b/segments/index.go index 888e90a813..d7ae744b12 100644 --- a/segments/index.go +++ b/segments/index.go @@ -1,6 +1,7 @@ package segments import ( + g "github.com/anacrolix/generics" "sort" ) @@ -21,15 +22,19 @@ func NewIndexFromSegments(segments []Extent) Index { return Index{segments} } -func (me Index) iterSegments() func() (Length, bool) { - return func() (Length, bool) { +func (me Index) iterSegments() func() (Extent, bool) { + var lastEnd g.Option[Int] + return func() (ret Extent, ok bool) { if len(me.segments) == 0 { - return 0, false - } else { - l := me.segments[0].Length - me.segments = me.segments[1:] - return l, true + return } + cur := me.segments[0] + me.segments = me.segments[1:] + ret.Start = cur.Start - lastEnd.UnwrapOr(cur.Start) + ret.Length = cur.Length + lastEnd.Set(cur.End()) + ok = true + return } } @@ -41,11 +46,16 @@ func (me Index) Locate(e Extent, output Callback) bool { return _e.End() > e.Start }) if first == len(me.segments) { - return false + return e.Length == 0 } e.Start -= me.segments[first].Start + // The extent is before the first segment. + if e.Start < 0 { + e.Length += e.Start + e.Start = 0 + } me.segments = me.segments[first:] - return Scan(me.iterSegments(), e, func(i int, e Extent) bool { + return ScanConsecutive(me.iterSegments(), e, func(i int, e Extent) bool { return output(i+first, e) }) } diff --git a/segments/segments.go b/segments/segments.go index 3870042589..35e7e4c8e5 100644 --- a/segments/segments.go +++ b/segments/segments.go @@ -13,33 +13,48 @@ func (e Extent) End() Int { } type ( - Callback = func(segmentIndex int, segmentBounds Extent) bool - LengthIter = func() (Length, bool) + Callback = func(segmentIndex int, segmentBounds Extent) bool + LengthIter = func() (Length, bool) + ConsecutiveExtentIter = func() (Extent, bool) ) // Returns true if callback returns false early, or all segments in the haystack for the needle are // found. func Scan(haystack LengthIter, needle Extent, callback Callback) bool { + return ScanConsecutive( + func() (Extent, bool) { + l, ok := haystack() + return Extent{0, l}, ok + }, + needle, + callback, + ) +} + +// Returns true if callback returns false early, or all segments in the haystack for the needle are +// found. +func ScanConsecutive(haystack ConsecutiveExtentIter, needle Extent, callback Callback) bool { i := 0 + // Extents have been found in the haystack and we're waiting for the needle to end. This is kind + // of for backwards compatibility for some tests that expect to have zero-length extents. + startedNeedle := false for needle.Length != 0 { l, ok := haystack() if !ok { return false } - if needle.Start < l || needle.Start == l && l == 0 { - e1 := Extent{ - Start: needle.Start, - Length: min(l, needle.End()) - needle.Start, - } - if e1.Length >= 0 { - if !callback(i, e1) { - return true - } - needle.Start = 0 - needle.Length -= e1.Length + + e1 := Extent{ + Start: max(needle.Start-l.Start, 0), + } + e1.Length = max(min(l.Length, needle.End()-l.Start)-e1.Start, 0) + needle.Start = max(0, needle.Start-l.End()) + needle.Length -= e1.Length + l.Start + if e1.Length > 0 || (startedNeedle && needle.Length != 0) { + if !callback(i, e1) { + return true } - } else { - needle.Start -= l + startedNeedle = true } i++ } diff --git a/segments/segments_test.go b/segments/segments_test.go index 9ce9164bf7..e757654f9d 100644 --- a/segments/segments_test.go +++ b/segments/segments_test.go @@ -1,9 +1,8 @@ package segments import ( + qt "github.com/frankban/quicktest" "testing" - - "github.com/stretchr/testify/assert" ) func LengthIterFromSlice(ls []Length) LengthIter { @@ -36,14 +35,21 @@ func (me *collectExtents) scanCallback(i int, e Extent) bool { type newLocater func(LengthIter) Locater -func assertLocate(t *testing.T, nl newLocater, ls []Length, needle Extent, firstExpectedIndex int, expectedExtents []Extent) { +func assertLocate( + t *testing.T, + nl newLocater, + ls []Length, + needle Extent, + firstExpectedIndex int, + expectedExtents []Extent, +) { var actual collectExtents var expected collectExtents for i, e := range expectedExtents { expected.scanCallback(firstExpectedIndex+i, e) } nl(LengthIterFromSlice(ls))(needle, actual.scanCallback) - assert.EqualValues(t, expected, actual) + qt.Check(t, actual, qt.DeepEquals, expected) } func testLocater(t *testing.T, newLocater newLocater) { diff --git a/storage/file-misc.go b/storage/file-misc.go index 8966ecbd1a..67514420f0 100644 --- a/storage/file-misc.go +++ b/storage/file-misc.go @@ -1,34 +1,20 @@ package storage -import "github.com/anacrolix/torrent/metainfo" +import ( + "github.com/anacrolix/torrent/segments" +) -type requiredLength struct { - fileIndex int - length int64 -} - -func extentCompleteRequiredLengths(info *metainfo.Info, off, n int64) (ret []requiredLength) { - if n == 0 { - return - } - for i, fi := range info.UpvertedFiles() { - if off >= fi.Length { - off -= fi.Length - continue - } - n1 := n - if off+n1 > fi.Length { - n1 = fi.Length - off - } - ret = append(ret, requiredLength{ - fileIndex: i, - length: off + n1, - }) - n -= n1 - if n == 0 { - return - } - off = 0 - } - panic("extent exceeds torrent bounds") +// Returns the minimum file lengths required for the given extent to exist on disk. Returns false if +// the extent is not covered by the files in the index. +func minFileLengthsForTorrentExtent( + fileSegmentsIndex segments.Index, + off, n int64, + each func(fileIndex int, length int64) bool, +) bool { + return fileSegmentsIndex.Locate(segments.Extent{ + Start: off, + Length: n, + }, func(fileIndex int, segmentBounds segments.Extent) bool { + return each(fileIndex, segmentBounds.Start+segmentBounds.Length) + }) } diff --git a/storage/file-misc_test.go b/storage/file-misc_test.go index f74196d047..9e97ce00ec 100644 --- a/storage/file-misc_test.go +++ b/storage/file-misc_test.go @@ -1,6 +1,9 @@ package storage import ( + "github.com/anacrolix/torrent/common" + "github.com/anacrolix/torrent/segments" + qt "github.com/frankban/quicktest" "testing" "github.com/stretchr/testify/assert" @@ -8,6 +11,60 @@ import ( "github.com/anacrolix/torrent/metainfo" ) +type requiredLength struct { + FileIndex int + Length int64 +} + +// The required file indices and file lengths for the given extent to be "complete". This is the +// outdated interface used by some tests. +func extentCompleteRequiredLengths(info *metainfo.Info, off, n int64) (ret []requiredLength) { + index := segments.NewIndexFromSegments(common.TorrentOffsetFileSegments(info)) + minFileLengthsForTorrentExtent(index, off, n, func(fileIndex int, length int64) bool { + ret = append(ret, requiredLength{fileIndex, length}) + return true + }) + return +} + +func TestExtentCompleteRequiredLengthsV2InfoWithGaps(t *testing.T) { + info := &metainfo.Info{ + MetaVersion: 2, + PieceLength: 2, + FileTree: metainfo.FileTree{ + Dir: map[string]metainfo.FileTree{ + "a": { + File: metainfo.FileTreeFile{ + Length: 2, + }, + }, + "b": { + File: metainfo.FileTreeFile{Length: 3}, + }, + // Here there's a gap where v2 torrents piece align, so the next file offset starts + // at 6. + "c": { + File: metainfo.FileTreeFile{Length: 4}, + }, + }, + }, + } + c := qt.New(t) + check := func(off, n int64, expected ...requiredLength) { + c.Check(extentCompleteRequiredLengths(info, off, n), qt.DeepEquals, expected) + } + check(0, 0) + check(0, 1, requiredLength{FileIndex: 0, Length: 1}) + check(0, 2, requiredLength{FileIndex: 0, Length: 2}) + check(0, 3, requiredLength{FileIndex: 0, Length: 2}, requiredLength{FileIndex: 1, Length: 1}) + check(2, 2, requiredLength{FileIndex: 1, Length: 2}) + check(4, 1, requiredLength{FileIndex: 1, Length: 3}) + check(5, 0) + check(4, 2, requiredLength{FileIndex: 1, Length: 3}) + check(5, 1) + check(6, 4, requiredLength{FileIndex: 2, Length: 4}) +} + func TestExtentCompleteRequiredLengths(t *testing.T) { info := &metainfo.Info{ Files: []metainfo.FileInfo{ @@ -15,23 +72,27 @@ func TestExtentCompleteRequiredLengths(t *testing.T) { {Path: []string{"b"}, Length: 3}, }, } + c := qt.New(t) + check := func(off, n int64, expected ...requiredLength) { + c.Check(extentCompleteRequiredLengths(info, off, n), qt.DeepEquals, expected) + } assert.Empty(t, extentCompleteRequiredLengths(info, 0, 0)) assert.EqualValues(t, []requiredLength{ - {fileIndex: 0, length: 1}, + {FileIndex: 0, Length: 1}, }, extentCompleteRequiredLengths(info, 0, 1)) assert.EqualValues(t, []requiredLength{ - {fileIndex: 0, length: 2}, + {FileIndex: 0, Length: 2}, }, extentCompleteRequiredLengths(info, 0, 2)) assert.EqualValues(t, []requiredLength{ - {fileIndex: 0, length: 2}, - {fileIndex: 1, length: 1}, + {FileIndex: 0, Length: 2}, + {FileIndex: 1, Length: 1}, }, extentCompleteRequiredLengths(info, 0, 3)) assert.EqualValues(t, []requiredLength{ - {fileIndex: 1, length: 2}, + {FileIndex: 1, Length: 2}, }, extentCompleteRequiredLengths(info, 2, 2)) assert.EqualValues(t, []requiredLength{ - {fileIndex: 1, length: 3}, + {FileIndex: 1, Length: 3}, }, extentCompleteRequiredLengths(info, 4, 1)) assert.Len(t, extentCompleteRequiredLengths(info, 5, 0), 0) - assert.Panics(t, func() { extentCompleteRequiredLengths(info, 6, 1) }) + check(6, 1) }