Skip to content

Commit

Permalink
Fix EOF error for some files without final newline (#27)
Browse files Browse the repository at this point in the history
If a file was an exact multiple of 1024 bytes (the size of an internal
buffer) and was missing a final newline, the LineReaderAt implementation
would drop the last line, leading to an unexpected EOF error on apply.

In addition to fixing the bug, slightly change the behavior of
ReadLineAt to reflect how it is actually used:

  1. Clarify that the return value n includes all lines instead of only
     lines with a final newline. This was already true except in the
     case of the bug fixed by this commit.

  2. Only return io.EOF if fewer lines are read than requested. The
     previous implementation also returned io.EOF if the last line was
     missing a final newline, but this was confusing and didn't really
     serve a purpose.

This is technically a breaking change for external implementations but
an implementation that exactly followed the "spec" was already broken in
certain edge cases.
  • Loading branch information
bluekeyes authored Aug 20, 2021
1 parent b575654 commit 53bcdf7
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 28 deletions.
5 changes: 1 addition & 4 deletions gitdiff/apply.go
Original file line number Diff line number Diff line change
Expand Up @@ -231,10 +231,7 @@ func (a *Applier) ApplyTextFragment(dst io.Writer, f *TextFragment) error {

preimage := make([][]byte, fragEnd-start)
n, err := a.lineSrc.ReadLinesAt(preimage, start)
switch {
case err == nil:
case err == io.EOF && n == len(preimage): // last line of frag has no newline character
default:
if err != nil {
return applyError(err, lineNum(start+int64(n)))
}

Expand Down
44 changes: 22 additions & 22 deletions gitdiff/io.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,23 @@ import (
"io"
)

const (
byteBufferSize = 32 * 1024 // from io.Copy
lineBufferSize = 32
indexBufferSize = 1024
)

// LineReaderAt is the interface that wraps the ReadLinesAt method.
//
// ReadLinesAt reads len(lines) into lines starting at line offset in the
// input source. It returns number of full lines read (0 <= n <= len(lines))
// and any error encountered. Line numbers are zero-indexed.
// ReadLinesAt reads len(lines) into lines starting at line offset. It returns
// the number of lines read (0 <= n <= len(lines)) and any error encountered.
// Line numbers are zero-indexed.
//
// If n < len(lines), ReadLinesAt returns a non-nil error explaining why more
// lines were not returned.
//
// Each full line includes the line ending character(s). If the last line of
// the input does not have a line ending character, ReadLinesAt returns the
// content of the line and io.EOF.
//
// If the content of the input source changes after the first call to
// ReadLinesAt, the behavior of future calls is undefined.
// Lines read by ReadLinesAt include the newline character. The last line does
// not have a final newline character if the input ends without one.
type LineReaderAt interface {
ReadLinesAt(lines [][]byte, offset int64) (n int, err error)
}
Expand Down Expand Up @@ -65,7 +67,7 @@ func (r *lineReaderAt) ReadLinesAt(lines [][]byte, offset int64) (n int, err err
lines[n] = buf[start:end]
}

if n < count || buf[len(buf)-1] != '\n' {
if n < count {
return n, io.EOF
}
return n, nil
Expand All @@ -75,13 +77,9 @@ func (r *lineReaderAt) ReadLinesAt(lines [][]byte, offset int64) (n int, err err
// for line or a read returns io.EOF. It returns an error if and only if there
// is an error reading data.
func (r *lineReaderAt) indexTo(line int64) error {
var buf [1024]byte

var offset int64
if len(r.index) > 0 {
offset = r.index[len(r.index)-1]
}
var buf [indexBufferSize]byte

offset := r.lastOffset()
for int64(len(r.index)) < line {
n, err := r.r.ReadAt(buf[:], offset)
if err != nil && err != io.EOF {
Expand All @@ -94,7 +92,7 @@ func (r *lineReaderAt) indexTo(line int64) error {
}
}
if err == io.EOF {
if n > 0 && buf[n-1] != '\n' {
if offset > r.lastOffset() {
r.index = append(r.index, offset)
}
r.eof = true
Expand All @@ -104,6 +102,13 @@ func (r *lineReaderAt) indexTo(line int64) error {
return nil
}

func (r *lineReaderAt) lastOffset() int64 {
if n := len(r.index); n > 0 {
return r.index[n-1]
}
return 0
}

// readBytes reads the bytes of the n lines starting at line and returns the
// bytes and the offset of the first byte in the underlying source.
func (r *lineReaderAt) readBytes(line, n int64) (b []byte, offset int64, err error) {
Expand Down Expand Up @@ -147,11 +152,6 @@ func isLen(r io.ReaderAt, n int64) (bool, error) {
return false, err
}

const (
byteBufferSize = 32 * 1024 // from io.Copy
lineBufferSize = 32
)

// copyFrom writes bytes starting from offset off in src to dst stopping at the
// end of src or at the first error. copyFrom returns the number of bytes
// written and any error.
Expand Down
56 changes: 54 additions & 2 deletions gitdiff/io_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import (
)

func TestLineReaderAt(t *testing.T) {
const lineTemplate = "generated test line %d\n"

tests := map[string]struct {
InputLines int
Offset int64
Expand Down Expand Up @@ -42,6 +44,11 @@ func TestLineReaderAt(t *testing.T) {
Offset: 2,
Count: 0,
},
"readAllLines": {
InputLines: 64,
Offset: 0,
Count: 64,
},
"readThroughEOF": {
InputLines: 16,
Offset: 12,
Expand Down Expand Up @@ -71,8 +78,6 @@ func TestLineReaderAt(t *testing.T) {
},
}

const lineTemplate = "generated test line %d\n"

for name, test := range tests {
t.Run(name, func(t *testing.T) {
var input bytes.Buffer
Expand Down Expand Up @@ -114,6 +119,53 @@ func TestLineReaderAt(t *testing.T) {
}
})
}

newlineTests := map[string]struct {
InputSize int
}{
"readLinesNoFinalNewline": {
InputSize: indexBufferSize + indexBufferSize/2,
},
"readLinesNoFinalNewlineBufferMultiple": {
InputSize: 4 * indexBufferSize,
},
}

for name, test := range newlineTests {
t.Run(name, func(t *testing.T) {
input := bytes.Repeat([]byte("0"), test.InputSize)

var output [][]byte
for i := 0; i < len(input); i++ {
last := i
i += rand.Intn(80)
if i < len(input)-1 { // last character of input must not be a newline
input[i] = '\n'
output = append(output, input[last:i+1])
} else {
output = append(output, input[last:])
}
}

r := &lineReaderAt{r: bytes.NewReader(input)}
lines := make([][]byte, len(output))

n, err := r.ReadLinesAt(lines, 0)
if err != nil {
t.Fatalf("unexpected error reading reading lines: %v", err)
}

if n != len(output) {
t.Fatalf("incorrect number of lines read: expected %d, actual %d", len(output), n)
}

for i, line := range lines {
if !bytes.Equal(output[i], line) {
t.Errorf("incorrect content in line %d:\nexpected: %q\nactual: %q", i, output[i], line)
}
}
})
}
}

func TestCopyFrom(t *testing.T) {
Expand Down

0 comments on commit 53bcdf7

Please sign in to comment.