Skip to content

Commit

Permalink
match beginning and end of line correctly in FindNext and `ReplaceR…
Browse files Browse the repository at this point in the history
…egex` (#3575)
  • Loading branch information
matthias314 authored Feb 9, 2025
1 parent bf4156c commit 5a62a8e
Show file tree
Hide file tree
Showing 2 changed files with 132 additions and 87 deletions.
17 changes: 11 additions & 6 deletions internal/buffer/loc.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,16 @@ func (l Loc) LessEqual(b Loc) bool {
return l == b
}

// Clamp clamps a loc between start and end
func (l Loc) Clamp(start, end Loc) Loc {
if l.GreaterEqual(end) {
return end
} else if l.LessThan(start) {
return start
}
return l
}

// The following functions require a buffer to know where newlines are

// Diff returns the distance between two locations
Expand Down Expand Up @@ -139,10 +149,5 @@ func ByteOffset(pos Loc, buf *Buffer) int {

// clamps a loc within a buffer
func clamp(pos Loc, la *LineArray) Loc {
if pos.GreaterEqual(la.End()) {
return la.End()
} else if pos.LessThan(la.Start()) {
return la.Start()
}
return pos
return pos.Clamp(la.Start(), la.End())
}
202 changes: 121 additions & 81 deletions internal/buffer/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,56 @@ package buffer

import (
"regexp"
"unicode/utf8"

"github.com/zyedidia/micro/v2/internal/util"
)

// We want "^" and "$" to match only the beginning/end of a line, not the
// beginning/end of the search region if it is in the middle of a line.
// In that case we use padded regexps to require a rune before or after
// the match. (This also affects other empty-string patters like "\\b".)
// The following two flags indicate the padding used.
const (
padStart = 1 << iota
padEnd
)

func findLineParams(b *Buffer, start, end Loc, i int, r *regexp.Regexp) ([]byte, int, int, *regexp.Regexp) {
l := b.LineBytes(i)
charpos := 0
padMode := 0

if i == end.Y {
nchars := util.CharacterCount(l)
end.X = util.Clamp(end.X, 0, nchars)
if end.X < nchars {
l = util.SliceStart(l, end.X+1)
padMode |= padEnd
}
}

if i == start.Y {
nchars := util.CharacterCount(l)
start.X = util.Clamp(start.X, 0, nchars)
if start.X > 0 {
charpos = start.X - 1
l = util.SliceEnd(l, charpos)
padMode |= padStart
}
}

if padMode == padStart {
r = regexp.MustCompile(".(?:" + r.String() + ")")
} else if padMode == padEnd {
r = regexp.MustCompile("(?:" + r.String() + ").")
} else if padMode == padStart|padEnd {
r = regexp.MustCompile(".(?:" + r.String() + ").")
}

return l, charpos, padMode, r
}

func (b *Buffer) findDown(r *regexp.Regexp, start, end Loc) ([2]Loc, bool) {
lastcn := util.CharacterCount(b.LineBytes(b.LinesNum() - 1))
if start.Y > b.LinesNum()-1 {
Expand All @@ -22,30 +68,19 @@ func (b *Buffer) findDown(r *regexp.Regexp, start, end Loc) ([2]Loc, bool) {
}

for i := start.Y; i <= end.Y; i++ {
l := b.LineBytes(i)
charpos := 0

if i == start.Y && start.Y == end.Y {
nchars := util.CharacterCount(l)
start.X = util.Clamp(start.X, 0, nchars)
end.X = util.Clamp(end.X, 0, nchars)
l = util.SliceStart(l, end.X)
l = util.SliceEnd(l, start.X)
charpos = start.X
} else if i == start.Y {
nchars := util.CharacterCount(l)
start.X = util.Clamp(start.X, 0, nchars)
l = util.SliceEnd(l, start.X)
charpos = start.X
} else if i == end.Y {
nchars := util.CharacterCount(l)
end.X = util.Clamp(end.X, 0, nchars)
l = util.SliceStart(l, end.X)
}
l, charpos, padMode, rPadded := findLineParams(b, start, end, i, r)

match := r.FindIndex(l)
match := rPadded.FindIndex(l)

if match != nil {
if padMode&padStart != 0 {
_, size := utf8.DecodeRune(l[match[0]:])
match[0] += size
}
if padMode&padEnd != 0 {
_, size := utf8.DecodeLastRune(l[:match[1]])
match[1] -= size
}
start := Loc{charpos + util.RunePos(l, match[0]), i}
end := Loc{charpos + util.RunePos(l, match[1]), i}
return [2]Loc{start, end}, true
Expand All @@ -70,39 +105,39 @@ func (b *Buffer) findUp(r *regexp.Regexp, start, end Loc) ([2]Loc, bool) {
}

for i := end.Y; i >= start.Y; i-- {
l := b.LineBytes(i)
charpos := 0

if i == start.Y && start.Y == end.Y {
nchars := util.CharacterCount(l)
start.X = util.Clamp(start.X, 0, nchars)
end.X = util.Clamp(end.X, 0, nchars)
l = util.SliceStart(l, end.X)
l = util.SliceEnd(l, start.X)
charpos = start.X
} else if i == start.Y {
nchars := util.CharacterCount(l)
start.X = util.Clamp(start.X, 0, nchars)
l = util.SliceEnd(l, start.X)
charpos = start.X
} else if i == end.Y {
nchars := util.CharacterCount(l)
end.X = util.Clamp(end.X, 0, nchars)
l = util.SliceStart(l, end.X)
}

allMatches := r.FindAllIndex(l, -1)
charCount := util.CharacterCount(b.LineBytes(i))
from := Loc{0, i}.Clamp(start, end)
to := Loc{charCount, i}.Clamp(start, end)

allMatches := b.findAll(r, from, to)
if allMatches != nil {
match := allMatches[len(allMatches)-1]
start := Loc{charpos + util.RunePos(l, match[0]), i}
end := Loc{charpos + util.RunePos(l, match[1]), i}
return [2]Loc{start, end}, true
return [2]Loc{match[0], match[1]}, true
}
}
return [2]Loc{}, false
}

func (b *Buffer) findAll(r *regexp.Regexp, start, end Loc) [][2]Loc {
var matches [][2]Loc
loc := start
for {
match, found := b.findDown(r, loc, end)
if !found {
break
}
matches = append(matches, match)
if match[0] != match[1] {
loc = match[1]
} else if match[1] != end {
loc = match[1].Move(1, b)
} else {
break
}
}
return matches
}

// FindNext finds the next occurrence of a given string in the buffer
// It returns the start and end location of the match (if found) and
// a boolean indicating if it was found
Expand Down Expand Up @@ -146,53 +181,58 @@ func (b *Buffer) FindNext(s string, start, end, from Loc, down bool, useRegex bo
}

// ReplaceRegex replaces all occurrences of 'search' with 'replace' in the given area
// and returns the number of replacements made and the number of runes
// and returns the number of replacements made and the number of characters
// added or removed on the last line of the range
func (b *Buffer) ReplaceRegex(start, end Loc, search *regexp.Regexp, replace []byte, captureGroups bool) (int, int) {
if start.GreaterThan(end) {
start, end = end, start
}

netrunes := 0

charsEnd := util.CharacterCount(b.LineBytes(end.Y))
found := 0
var deltas []Delta

for i := start.Y; i <= end.Y; i++ {
l := b.lines[i].data
charpos := 0

if start.Y == end.Y && i == start.Y {
l = util.SliceStart(l, end.X)
l = util.SliceEnd(l, start.X)
charpos = start.X
} else if i == start.Y {
l = util.SliceEnd(l, start.X)
charpos = start.X
} else if i == end.Y {
l = util.SliceStart(l, end.X)
}
newText := search.ReplaceAllFunc(l, func(in []byte) []byte {
var result []byte
if captureGroups {
for _, submatches := range search.FindAllSubmatchIndex(in, -1) {
result = search.Expand(result, replace, in, submatches)
l := b.LineBytes(i)
charCount := util.CharacterCount(l)
if (i == start.Y && start.X > 0) || (i == end.Y && end.X < charCount) {
// This replacement code works in general, but it creates a separate
// modification for each match. We only use it for the first and last
// lines, which may use padded regexps

from := Loc{0, i}.Clamp(start, end)
to := Loc{charCount, i}.Clamp(start, end)
matches := b.findAll(search, from, to)
found += len(matches)

for j := len(matches) - 1; j >= 0; j-- {
// if we counted upwards, the different deltas would interfere
match := matches[j]
var newText []byte
if captureGroups {
newText = search.ReplaceAll(b.Substr(match[0], match[1]), replace)
} else {
newText = replace
}
} else {
result = replace
deltas = append(deltas, Delta{newText, match[0], match[1]})
}
found++
if i == end.Y {
netrunes += util.CharacterCount(result) - util.CharacterCount(in)
}
return result
})

from := Loc{charpos, i}
to := Loc{charpos + util.CharacterCount(l), i}

deltas = append(deltas, Delta{newText, from, to})
} else {
newLine := search.ReplaceAllFunc(l, func(in []byte) []byte {
found++
var result []byte
if captureGroups {
match := search.FindSubmatchIndex(in)
result = search.Expand(result, replace, in, match)
} else {
result = replace
}
return result
})
deltas = append(deltas, Delta{newLine, Loc{0, i}, Loc{charCount, i}})
}
}

b.MultipleReplace(deltas)

return found, netrunes
return found, util.CharacterCount(b.LineBytes(end.Y)) - charsEnd
}

0 comments on commit 5a62a8e

Please sign in to comment.