From 53bcdf7e5d94b09bf31b6e2915a7aa51d1f50b36 Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Thu, 19 Aug 2021 17:32:18 -0700 Subject: [PATCH] Fix EOF error for some files without final newline (#27) If a file was an exact multiple of 1024 bytes (the size of an internal buffer) and was missing a final newline, the LineReaderAt implementation would drop the last line, leading to an unexpected EOF error on apply. In addition to fixing the bug, slightly change the behavior of ReadLineAt to reflect how it is actually used: 1. Clarify that the return value n includes all lines instead of only lines with a final newline. This was already true except in the case of the bug fixed by this commit. 2. Only return io.EOF if fewer lines are read than requested. The previous implementation also returned io.EOF if the last line was missing a final newline, but this was confusing and didn't really serve a purpose. This is technically a breaking change for external implementations but an implementation that exactly followed the "spec" was already broken in certain edge cases. --- gitdiff/apply.go | 5 +---- gitdiff/io.go | 44 ++++++++++++++++++------------------ gitdiff/io_test.go | 56 ++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 77 insertions(+), 28 deletions(-) diff --git a/gitdiff/apply.go b/gitdiff/apply.go index 4397d99..05a4526 100644 --- a/gitdiff/apply.go +++ b/gitdiff/apply.go @@ -231,10 +231,7 @@ func (a *Applier) ApplyTextFragment(dst io.Writer, f *TextFragment) error { preimage := make([][]byte, fragEnd-start) n, err := a.lineSrc.ReadLinesAt(preimage, start) - switch { - case err == nil: - case err == io.EOF && n == len(preimage): // last line of frag has no newline character - default: + if err != nil { return applyError(err, lineNum(start+int64(n))) } diff --git a/gitdiff/io.go b/gitdiff/io.go index af5c847..8143238 100644 --- a/gitdiff/io.go +++ b/gitdiff/io.go @@ -5,21 +5,23 @@ import ( "io" ) +const ( + byteBufferSize = 32 * 1024 // from io.Copy + lineBufferSize = 32 + indexBufferSize = 1024 +) + // LineReaderAt is the interface that wraps the ReadLinesAt method. // -// ReadLinesAt reads len(lines) into lines starting at line offset in the -// input source. It returns number of full lines read (0 <= n <= len(lines)) -// and any error encountered. Line numbers are zero-indexed. +// ReadLinesAt reads len(lines) into lines starting at line offset. It returns +// the number of lines read (0 <= n <= len(lines)) and any error encountered. +// Line numbers are zero-indexed. // // If n < len(lines), ReadLinesAt returns a non-nil error explaining why more // lines were not returned. // -// Each full line includes the line ending character(s). If the last line of -// the input does not have a line ending character, ReadLinesAt returns the -// content of the line and io.EOF. -// -// If the content of the input source changes after the first call to -// ReadLinesAt, the behavior of future calls is undefined. +// Lines read by ReadLinesAt include the newline character. The last line does +// not have a final newline character if the input ends without one. type LineReaderAt interface { ReadLinesAt(lines [][]byte, offset int64) (n int, err error) } @@ -65,7 +67,7 @@ func (r *lineReaderAt) ReadLinesAt(lines [][]byte, offset int64) (n int, err err lines[n] = buf[start:end] } - if n < count || buf[len(buf)-1] != '\n' { + if n < count { return n, io.EOF } return n, nil @@ -75,13 +77,9 @@ func (r *lineReaderAt) ReadLinesAt(lines [][]byte, offset int64) (n int, err err // for line or a read returns io.EOF. It returns an error if and only if there // is an error reading data. func (r *lineReaderAt) indexTo(line int64) error { - var buf [1024]byte - - var offset int64 - if len(r.index) > 0 { - offset = r.index[len(r.index)-1] - } + var buf [indexBufferSize]byte + offset := r.lastOffset() for int64(len(r.index)) < line { n, err := r.r.ReadAt(buf[:], offset) if err != nil && err != io.EOF { @@ -94,7 +92,7 @@ func (r *lineReaderAt) indexTo(line int64) error { } } if err == io.EOF { - if n > 0 && buf[n-1] != '\n' { + if offset > r.lastOffset() { r.index = append(r.index, offset) } r.eof = true @@ -104,6 +102,13 @@ func (r *lineReaderAt) indexTo(line int64) error { return nil } +func (r *lineReaderAt) lastOffset() int64 { + if n := len(r.index); n > 0 { + return r.index[n-1] + } + return 0 +} + // readBytes reads the bytes of the n lines starting at line and returns the // bytes and the offset of the first byte in the underlying source. func (r *lineReaderAt) readBytes(line, n int64) (b []byte, offset int64, err error) { @@ -147,11 +152,6 @@ func isLen(r io.ReaderAt, n int64) (bool, error) { return false, err } -const ( - byteBufferSize = 32 * 1024 // from io.Copy - lineBufferSize = 32 -) - // copyFrom writes bytes starting from offset off in src to dst stopping at the // end of src or at the first error. copyFrom returns the number of bytes // written and any error. diff --git a/gitdiff/io_test.go b/gitdiff/io_test.go index 8d8a18b..bd242a7 100644 --- a/gitdiff/io_test.go +++ b/gitdiff/io_test.go @@ -9,6 +9,8 @@ import ( ) func TestLineReaderAt(t *testing.T) { + const lineTemplate = "generated test line %d\n" + tests := map[string]struct { InputLines int Offset int64 @@ -42,6 +44,11 @@ func TestLineReaderAt(t *testing.T) { Offset: 2, Count: 0, }, + "readAllLines": { + InputLines: 64, + Offset: 0, + Count: 64, + }, "readThroughEOF": { InputLines: 16, Offset: 12, @@ -71,8 +78,6 @@ func TestLineReaderAt(t *testing.T) { }, } - const lineTemplate = "generated test line %d\n" - for name, test := range tests { t.Run(name, func(t *testing.T) { var input bytes.Buffer @@ -114,6 +119,53 @@ func TestLineReaderAt(t *testing.T) { } }) } + + newlineTests := map[string]struct { + InputSize int + }{ + "readLinesNoFinalNewline": { + InputSize: indexBufferSize + indexBufferSize/2, + }, + "readLinesNoFinalNewlineBufferMultiple": { + InputSize: 4 * indexBufferSize, + }, + } + + for name, test := range newlineTests { + t.Run(name, func(t *testing.T) { + input := bytes.Repeat([]byte("0"), test.InputSize) + + var output [][]byte + for i := 0; i < len(input); i++ { + last := i + i += rand.Intn(80) + if i < len(input)-1 { // last character of input must not be a newline + input[i] = '\n' + output = append(output, input[last:i+1]) + } else { + output = append(output, input[last:]) + } + } + + r := &lineReaderAt{r: bytes.NewReader(input)} + lines := make([][]byte, len(output)) + + n, err := r.ReadLinesAt(lines, 0) + if err != nil { + t.Fatalf("unexpected error reading reading lines: %v", err) + } + + if n != len(output) { + t.Fatalf("incorrect number of lines read: expected %d, actual %d", len(output), n) + } + + for i, line := range lines { + if !bytes.Equal(output[i], line) { + t.Errorf("incorrect content in line %d:\nexpected: %q\nactual: %q", i, output[i], line) + } + } + }) + } } func TestCopyFrom(t *testing.T) {