diff --git a/gitdiff/apply.go b/gitdiff/apply.go index 4397d99..05a4526 100644 --- a/gitdiff/apply.go +++ b/gitdiff/apply.go @@ -231,10 +231,7 @@ func (a *Applier) ApplyTextFragment(dst io.Writer, f *TextFragment) error { preimage := make([][]byte, fragEnd-start) n, err := a.lineSrc.ReadLinesAt(preimage, start) - switch { - case err == nil: - case err == io.EOF && n == len(preimage): // last line of frag has no newline character - default: + if err != nil { return applyError(err, lineNum(start+int64(n))) } diff --git a/gitdiff/io.go b/gitdiff/io.go index af5c847..8143238 100644 --- a/gitdiff/io.go +++ b/gitdiff/io.go @@ -5,21 +5,23 @@ import ( "io" ) +const ( + byteBufferSize = 32 * 1024 // from io.Copy + lineBufferSize = 32 + indexBufferSize = 1024 +) + // LineReaderAt is the interface that wraps the ReadLinesAt method. // -// ReadLinesAt reads len(lines) into lines starting at line offset in the -// input source. It returns number of full lines read (0 <= n <= len(lines)) -// and any error encountered. Line numbers are zero-indexed. +// ReadLinesAt reads len(lines) into lines starting at line offset. It returns +// the number of lines read (0 <= n <= len(lines)) and any error encountered. +// Line numbers are zero-indexed. // // If n < len(lines), ReadLinesAt returns a non-nil error explaining why more // lines were not returned. // -// Each full line includes the line ending character(s). If the last line of -// the input does not have a line ending character, ReadLinesAt returns the -// content of the line and io.EOF. -// -// If the content of the input source changes after the first call to -// ReadLinesAt, the behavior of future calls is undefined. +// Lines read by ReadLinesAt include the newline character. The last line does +// not have a final newline character if the input ends without one. type LineReaderAt interface { ReadLinesAt(lines [][]byte, offset int64) (n int, err error) } @@ -65,7 +67,7 @@ func (r *lineReaderAt) ReadLinesAt(lines [][]byte, offset int64) (n int, err err lines[n] = buf[start:end] } - if n < count || buf[len(buf)-1] != '\n' { + if n < count { return n, io.EOF } return n, nil @@ -75,13 +77,9 @@ func (r *lineReaderAt) ReadLinesAt(lines [][]byte, offset int64) (n int, err err // for line or a read returns io.EOF. It returns an error if and only if there // is an error reading data. func (r *lineReaderAt) indexTo(line int64) error { - var buf [1024]byte - - var offset int64 - if len(r.index) > 0 { - offset = r.index[len(r.index)-1] - } + var buf [indexBufferSize]byte + offset := r.lastOffset() for int64(len(r.index)) < line { n, err := r.r.ReadAt(buf[:], offset) if err != nil && err != io.EOF { @@ -94,7 +92,7 @@ func (r *lineReaderAt) indexTo(line int64) error { } } if err == io.EOF { - if n > 0 && buf[n-1] != '\n' { + if offset > r.lastOffset() { r.index = append(r.index, offset) } r.eof = true @@ -104,6 +102,13 @@ func (r *lineReaderAt) indexTo(line int64) error { return nil } +func (r *lineReaderAt) lastOffset() int64 { + if n := len(r.index); n > 0 { + return r.index[n-1] + } + return 0 +} + // readBytes reads the bytes of the n lines starting at line and returns the // bytes and the offset of the first byte in the underlying source. func (r *lineReaderAt) readBytes(line, n int64) (b []byte, offset int64, err error) { @@ -147,11 +152,6 @@ func isLen(r io.ReaderAt, n int64) (bool, error) { return false, err } -const ( - byteBufferSize = 32 * 1024 // from io.Copy - lineBufferSize = 32 -) - // copyFrom writes bytes starting from offset off in src to dst stopping at the // end of src or at the first error. copyFrom returns the number of bytes // written and any error. diff --git a/gitdiff/io_test.go b/gitdiff/io_test.go index 8d8a18b..bd242a7 100644 --- a/gitdiff/io_test.go +++ b/gitdiff/io_test.go @@ -9,6 +9,8 @@ import ( ) func TestLineReaderAt(t *testing.T) { + const lineTemplate = "generated test line %d\n" + tests := map[string]struct { InputLines int Offset int64 @@ -42,6 +44,11 @@ func TestLineReaderAt(t *testing.T) { Offset: 2, Count: 0, }, + "readAllLines": { + InputLines: 64, + Offset: 0, + Count: 64, + }, "readThroughEOF": { InputLines: 16, Offset: 12, @@ -71,8 +78,6 @@ func TestLineReaderAt(t *testing.T) { }, } - const lineTemplate = "generated test line %d\n" - for name, test := range tests { t.Run(name, func(t *testing.T) { var input bytes.Buffer @@ -114,6 +119,53 @@ func TestLineReaderAt(t *testing.T) { } }) } + + newlineTests := map[string]struct { + InputSize int + }{ + "readLinesNoFinalNewline": { + InputSize: indexBufferSize + indexBufferSize/2, + }, + "readLinesNoFinalNewlineBufferMultiple": { + InputSize: 4 * indexBufferSize, + }, + } + + for name, test := range newlineTests { + t.Run(name, func(t *testing.T) { + input := bytes.Repeat([]byte("0"), test.InputSize) + + var output [][]byte + for i := 0; i < len(input); i++ { + last := i + i += rand.Intn(80) + if i < len(input)-1 { // last character of input must not be a newline + input[i] = '\n' + output = append(output, input[last:i+1]) + } else { + output = append(output, input[last:]) + } + } + + r := &lineReaderAt{r: bytes.NewReader(input)} + lines := make([][]byte, len(output)) + + n, err := r.ReadLinesAt(lines, 0) + if err != nil { + t.Fatalf("unexpected error reading reading lines: %v", err) + } + + if n != len(output) { + t.Fatalf("incorrect number of lines read: expected %d, actual %d", len(output), n) + } + + for i, line := range lines { + if !bytes.Equal(output[i], line) { + t.Errorf("incorrect content in line %d:\nexpected: %q\nactual: %q", i, output[i], line) + } + } + }) + } } func TestCopyFrom(t *testing.T) {