From d9a3ae0b27995b629a0be8bc0383b082040f3deb Mon Sep 17 00:00:00 2001 From: Bob Matcuk Date: Sat, 24 Apr 2021 19:08:00 -0400 Subject: [PATCH] complete rewrite for performance and io/fs support --- .travis.yml | 4 +- README.md | 183 +++++++++---- UPGRADING.md | 50 +++- doublestar.go | 623 --------------------------------------------- doublestar_test.go | 328 ++++++++++++++---------- examples/find.go | 20 +- examples/go.mod | 9 + glob.go | 391 ++++++++++++++++++++++++++++ globwalk.go | 273 ++++++++++++++++++++ go.mod | 4 +- match.go | 369 +++++++++++++++++++++++++++ utils.go | 69 +++++ validate.go | 61 +++++ 13 files changed, 1566 insertions(+), 818 deletions(-) create mode 100644 examples/go.mod create mode 100644 glob.go create mode 100644 globwalk.go create mode 100644 match.go create mode 100644 utils.go create mode 100644 validate.go diff --git a/.travis.yml b/.travis.yml index 51cf057..334e0bf 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,9 +1,7 @@ language: go go: - - 1.13 - - 1.14 - - 1.15 + - 1.16 os: - linux diff --git a/README.md b/README.md index ffcc3de..67b409d 100644 --- a/README.md +++ b/README.md @@ -2,18 +2,17 @@ Path pattern matching and globbing supporting `doublestar` (`**`) patterns. -[![PkgGoDev](https://pkg.go.dev/badge/github.com/bmatcuk/doublestar)](https://pkg.go.dev/github.com/bmatcuk/doublestar/v2) +[![PkgGoDev](https://pkg.go.dev/badge/github.com/bmatcuk/doublestar)](https://pkg.go.dev/github.com/bmatcuk/doublestar/v4) [![Release](https://img.shields.io/github/release/bmatcuk/doublestar.svg?branch=master)](https://github.com/bmatcuk/doublestar/releases) [![Build Status](https://travis-ci.com/bmatcuk/doublestar.svg?branch=master)](https://travis-ci.com/bmatcuk/doublestar) [![codecov.io](https://img.shields.io/codecov/c/github/bmatcuk/doublestar.svg?branch=master)](https://codecov.io/github/bmatcuk/doublestar?branch=master) ## About -#### [Upgrading to v2? To v3?](UPGRADING.md) +#### [Upgrading?](UPGRADING.md) -**doublestar** is a [golang](http://golang.org/) implementation of path pattern -matching and globbing with support for "doublestar" (aka globstar: `**`) -patterns. +**doublestar** is a [golang] implementation of path pattern matching and +globbing with support for "doublestar" (aka globstar: `**`) patterns. doublestar patterns match files and directories recursively. For example, if you had the following directory structure: @@ -36,18 +35,22 @@ such as `/path**` is invalid and will be treated the same as `/path*`, but match all directories and files under the path directory, but `/path/**/` will only match directories. +v4 is a complete rewrite with a focus on performance. Additionally, +[doublestar] has been updated to use the new [io/fs] package for filesystem +access. As a result, it is only supported by [golang] v1.16+. + ## Installation **doublestar** can be installed via `go get`: ```bash -go get github.com/bmatcuk/doublestar/v2 +go get github.com/bmatcuk/doublestar/v4 ``` To use it in your code, you must import it: ```go -import "github.com/bmatcuk/doublestar/v2" +import "github.com/bmatcuk/doublestar/v4" ``` ## Usage @@ -58,14 +61,18 @@ import "github.com/bmatcuk/doublestar/v2" func Match(pattern, name string) (bool, error) ``` -Match returns true if `name` matches the file name `pattern` -([see below](#patterns)). `name` and `pattern` are split on forward slash (`/`) -characters and may be relative or absolute. +Match returns true if `name` matches the file name `pattern` ([see +"patterns"]). `name` and `pattern` are split on forward slash (`/`) characters +and may be relative or absolute. + +Match requires pattern to match all of name, not just a substring. The only +possible returned error is ErrBadPattern, when pattern is malformed. -Note: `Match()` is meant to be a drop-in replacement for `path.Match()`. As -such, it always uses `/` as the path separator. If you are writing code that -will run on systems where `/` is not the path separator (such as Windows), you -want to use `PathMatch()` (below) instead. +Note: this is meant as a drop-in replacement for `path.Match()` which always +uses `'/'` as the path separator. If you want to support systems which use a +different path separator (such as Windows), what you want is `PathMatch()`. +Alternatively, you can run `filepath.ToSlash()` on both pattern and name and +then use this function. ### PathMatch @@ -74,24 +81,97 @@ want to use `PathMatch()` (below) instead. func PathMatch(pattern, name string) (bool, error) ``` -PathMatch returns true if `name` matches the file name `pattern` -([see below](#patterns)). The difference between Match and PathMatch is that -PathMatch will automatically use your system's path separator to split `name` -and `pattern`. +PathMatch returns true if `name` matches the file name `pattern` ([see +"patterns"]). The difference between Match and PathMatch is that PathMatch will +automatically use your system's path separator to split `name` and `pattern`. +On systems where the path separator is `'\'`, escaping will be disabled. -`PathMatch()` is meant to be a drop-in replacement for `filepath.Match()`. +Note: this is meant as a drop-in replacement for `filepath.Match()`. It assumes +that both `pattern` and `name` are using the system's path separator. If you +can't be sure of that, use `filepath.ToSlash()` on both `pattern` and `name`, +and then use the `Match()` function instead. ### Glob ```go -func Glob(pattern string) ([]string, error) +func Glob(fsys fs.FS, pattern string) ([]string, error) +``` + +Glob returns the names of all files matching pattern or nil if there is no +matching file. The syntax of patterns is the same as in `Match()`. The pattern +may describe hierarchical names such as `usr/*/bin/ed`. + +Glob ignores file system errors such as I/O errors reading directories. The +only possible returned error is ErrBadPattern, reporting that the pattern is +malformed. + +Note: this is meant as a drop-in replacement for `io/fs.Glob()`. Like +`io/fs.Glob()`, this function assumes that your pattern uses `/` as the path +separator even if that's not correct for your OS (like Windows). If you aren't +sure if that's the case, you can use `filepath.ToSlash()` on your pattern +before calling `Glob()`. + +### GlobWalk + +```go +type GlobWalkFunc func(path string, d fs.DirEntry) error + +func GlobWalk(fsys fs.FS, pattern string, fn GlobWalkFunc) error +``` + +GlobWalk calls the callback function `fn` for every file matching pattern. The +syntax of pattern is the same as in Match(). The pattern may describe +hierarchical names such as usr/*/bin/ed. + +GlobWalk may have a small performance benefit over Glob if you do not need a +slice of matches because it can avoid allocating memory for the matches. +Additionally, GlobWalk gives you access to the `fs.DirEntry` objects for each +match, and lets you quit early by returning a non-nil error from your callback +function. + +GlobWalk ignores file system errors such as I/O errors reading directories. +GlobWalk may return ErrBadPattern, reporting that the pattern is malformed. +Additionally, if the callback function `fn` returns an error, GlobWalk will +exit immediately and return that error. + +Like Glob(), this function assumes that your pattern uses `/` as the path +separator even if that's not correct for your OS (like Windows). If you aren't +sure if that's the case, you can use filepath.ToSlash() on your pattern before +calling GlobWalk(). + +### SplitPattern + +```go +func SplitPattern(p string) (base, pattern string) +``` + +SplitPattern is a utility function. Given a pattern, SplitPattern will return +two strings: the first string is everything up to the last slash (`/`) that +appears _before_ any unescaped "meta" characters (ie, `*?[{`). The second +string is everything after that slash. For example, given the pattern: + +``` +../../path/to/meta*/** + ^----------- split here +``` + +SplitPattern returns "../../path/to" and "meta*/**". This is useful for +initializing os.DirFS() to call Glob() because Glob() will silently fail if +your pattern includes `/./` or `/../`. For example: + +```go +base, pattern := SplitPattern("../../path/to/meta*/**") +fsys := os.DirFS(base) +matches, err := Glob(fsys, pattern) ``` -Glob finds all files and directories in the filesystem that match `pattern` -([see below](#patterns)). `pattern` may be relative (to the current working -directory), or absolute. +If SplitPattern cannot find somewhere to split the pattern (for example, +`meta*/**`), it will return "." and the unaltered pattern (`meta*/**` in this +example). -`Glob()` is meant to be a drop-in replacement for `filepath.Glob()`. +Of course, it is your responsibility to decide if the returned base path is +"safe" in the context of your application. Perhaps you could use Match() to +validate against a list of approved base directories? ### Patterns @@ -100,13 +180,14 @@ directory), or absolute. Special Terms | Meaning ------------- | ------- `*` | matches any sequence of non-path-separators -`**` | matches any sequence of characters, including path separators +`/**/` | matches zero or more directories `?` | matches any single non-path-separator character -`[class]` | matches any single non-path-separator character against a class of characters ([see below](#character-classes)) +`[class]` | matches any single non-path-separator character against a class of characters ([see "character classes"]) `{alt1,...}` | matches a sequence of characters if one of the comma-separated alternatives matches Any character with a special meaning can be escaped with a backslash (`\`). +A doublestar (`**`) should appear surrounded by path separators such as `/**/`. A mid-pattern doublestar (`**`) behaves like bash's globstar option: a pattern such as `path/to/**.txt` would return the same results as `path/to/*.txt`. The pattern you're looking for is `path/to/**/*.txt`. @@ -120,28 +201,44 @@ Class | Meaning `[abc]` | matches any single character within the set `[a-z]` | matches any single character in the range `[^class]` | matches any single character which does *not* match the class +`[!class]` | same as `^`: negates the class -### Abstracting the `os` package +## Performance -**doublestar** by default uses the `Open`, `Stat`, and `Lstat`, functions and -`PathSeparator` value from the standard library's `os` package. To abstract -this, for example to be able to perform tests of Windows paths on Linux, or to -interoperate with your own filesystem code, it includes the functions `GlobOS` -and `PathMatchOS` which are identical to `Glob` and `PathMatch` except that they -operate on an `OS` interface: - -```go -type OS interface { - Lstat(name string) (os.FileInfo, error) - Open(name string) (*os.File, error) - PathSeparator() rune - Stat(name string) (os.FileInfo, error) -} ``` +goos: darwin +goarch: amd64 +pkg: github.com/bmatcuk/doublestar/v4 +cpu: Intel(R) Core(TM) i7-4870HQ CPU @ 2.50GHz +BenchmarkMatch-8 285639 3868 ns/op 0 B/op 0 allocs/op +BenchmarkGoMatch-8 286945 3726 ns/op 0 B/op 0 allocs/op +BenchmarkPathMatch-8 320511 3493 ns/op 0 B/op 0 allocs/op +BenchmarkGoPathMatch-8 304236 3434 ns/op 0 B/op 0 allocs/op +BenchmarkGlob-8 466 2501123 ns/op 190225 B/op 2849 allocs/op +BenchmarkGlobWalk-8 476 2536293 ns/op 184017 B/op 2750 allocs/op +BenchmarkGoGlob-8 463 2574836 ns/op 194249 B/op 2929 allocs/op +``` + +These benchmarks (in `doublestar_test.go`) compare Match() to path.Match(), +PathMath() to filepath.Match(), and Glob() + GlobWalk() to io/fs.Glob(). They +only run patterns that the standard go packages can understand as well (so, no +`{alts}` or `**`) for a fair comparison. Of course, alts and doublestars will +be less performant than the other pattern meta characters. -`StandardOS` is a value that implements this interface by calling functions in -the standard library's `os` package. +Alts are essentially like running multiple patterns, the number of which can +get large if your pattern has alts nested inside alts. This affects both +matching (ie, Match()) and globbing (Glob()). + +`**` performance in matching is actually pretty similar to a regular `*`, but +can cause a large number of reads when globbing as it will need to recursively +traverse your filesystem. ## License [MIT License](LICENSE) + +[doublestar]: https://github.com/bmatcuk/doublestar +[golang]: http://golang.org/ +[io/fs]: https://golang.org/pkg/io/fs/ +[see "character classes"]: #character-classes +[see "patterns"]: #patterns diff --git a/UPGRADING.md b/UPGRADING.md index 9a3b82d..25aace3 100644 --- a/UPGRADING.md +++ b/UPGRADING.md @@ -1,3 +1,40 @@ +# Upgrading from v3 to v4 + +v4 is a complete rewrite with a focus on performance. Additionally, +[doublestar] has been updated to use the new [io/fs] package for filesystem +access. As a result, it is only supported by [golang] v1.16+. + +`Match()` and `PathMatch()` mostly did not change, besides big performance +improvements. Their API is the same. However, note the following corner cases: + +* In previous versions of [doublestar], `PathMatch()` could accept patterns + that used either platform-specific path separators, or `/`. This was + undocumented and didn't match `filepath.Match()`. In v4, both `pattern` and + `name` must be using appropriate path separators for the platform. You can + use `filepath.FromSlash()` to change `/` to platform-specific separators if + you aren't sure. +* In previous versions of [doublestar], a pattern such as `path/to/a/**` would + _not_ match `path/to/a`. In v4, this pattern _will_ match because if `a` was + a directory, `Glob()` would return it. In other words, the following returns + true: `Match("path/to/a/**", "path/to/a")` + +`Glob()` changed from using a [doublestar]-specific filesystem abstraction (the +`OS` interface) to the [io/fs] package. As a result, it now takes a `fs.FS` as +its first argument. This change has a couple ramifications: + +* Like `io/fs.Glob`, `pattern` must use a `/` as path separator, even on + platforms that use something else. You can use `filepath.ToSlash()` on your + patterns if you aren't sure. +* Patterns that contain `/./` or `/../` are invalid. The [io/fs] package + rejects them, returning an IO error. Since `Glob()` ignores IO errors, it'll + end up being silently rejected. You can run `path.Clean()` to ensure they are + removed from the pattern. + +v4 also added a `GlobWalk()` function that is slightly more performant than +`Glob()` if you just need to iterate over the results and don't need a string +slice. You also get `fs.DirEntry` objects for each result, and can quit early +if your callback returns an error. + # Upgrading from v2 to v3 v3 introduced using `!` to negate character classes, in addition to `^`. If any @@ -12,10 +49,15 @@ The change from v1 to v2 was fairly minor: the return type of the `Open` method on the `OS` interface was changed from `*os.File` to `File`, a new interface exported by doublestar. The new `File` interface only defines the functionality doublestar actually needs (`io.Closer` and `Readdir`), making it easier to use -doublestar with [go-billy](https://github.com/src-d/go-billy), -[afero](https://github.com/spf13/afero), or something similar. If you were -using this functionality, updating should be as easy as updating `Open's` -return type, since `os.File` already implements `doublestar.File`. +doublestar with [go-billy], [afero], or something similar. If you were using +this functionality, updating should be as easy as updating `Open's` return +type, since `os.File` already implements `doublestar.File`. If you weren't using this functionality, updating should be as easy as changing your dependencies to point to v2. + +[afero]: https://github.com/spf13/afero +[doublestar]: https://github.com/bmatcuk/doublestar +[go-billy]: https://github.com/src-d/go-billy +[golang]: http://golang.org/ +[io/fs]: https://golang.org/pkg/io/fs/ diff --git a/doublestar.go b/doublestar.go index 36919e5..269f6f0 100644 --- a/doublestar.go +++ b/doublestar.go @@ -1,631 +1,8 @@ package doublestar import ( - "fmt" - "io" - "os" "path" - "path/filepath" - "sort" - "strings" - "unicode/utf8" ) -// File defines a subset of file operations -type File interface { - io.Closer - Readdir(count int) ([]os.FileInfo, error) -} - -// An OS abstracts functions in the standard library's os package. -type OS interface { - Lstat(name string) (os.FileInfo, error) - Open(name string) (File, error) - PathSeparator() rune - Stat(name string) (os.FileInfo, error) -} - -// A standardOS implements OS by calling functions in the standard library's os -// package. -type standardOS struct{} - -func (standardOS) Lstat(name string) (os.FileInfo, error) { return os.Lstat(name) } -func (standardOS) Open(name string) (File, error) { return os.Open(name) } -func (standardOS) PathSeparator() rune { return os.PathSeparator } -func (standardOS) Stat(name string) (os.FileInfo, error) { return os.Stat(name) } - -// StandardOS is a value that implements the OS interface by calling functions -// in the standard libray's os package. -var StandardOS OS = standardOS{} - // ErrBadPattern indicates a pattern was malformed. var ErrBadPattern = path.ErrBadPattern - -// Find the first index of a rune in a string, -// ignoring any times the rune is escaped using "\". -func indexRuneWithEscaping(s string, r rune) int { - end := strings.IndexRune(s, r) - if end == -1 || r == '\\' { - return end - } - if end > 0 && s[end-1] == '\\' { - start := end + utf8.RuneLen(r) - end = indexRuneWithEscaping(s[start:], r) - if end != -1 { - end += start - } - } - return end -} - -// Find the last index of a rune in a string, -// ignoring any times the rune is escaped using "\". -func lastIndexRuneWithEscaping(s string, r rune) int { - end := strings.LastIndex(s, string(r)) - if end == -1 { - return -1 - } - if end > 0 && s[end-1] == '\\' { - end = lastIndexRuneWithEscaping(s[:end-1], r) - } - return end -} - -// Find the index of the first instance of one of the unicode characters in -// chars, ignoring any times those characters are escaped using "\". -func indexAnyWithEscaping(s, chars string) int { - end := strings.IndexAny(s, chars) - if end == -1 { - return -1 - } - if end > 0 && s[end-1] == '\\' { - _, adj := utf8.DecodeRuneInString(s[end:]) - start := end + adj - end = indexAnyWithEscaping(s[start:], chars) - if end != -1 { - end += start - } - } - return end -} - -// Split a set of alternatives such as {alt1,alt2,...} and returns the index of -// the rune after the closing curly brace. Respects nested alternatives and -// escaped runes. -func splitAlternatives(s string) (ret []string, idx int) { - ret = make([]string, 0, 2) - idx = 0 - slen := len(s) - braceCnt := 1 - esc := false - start := 0 - for braceCnt > 0 { - if idx >= slen { - return nil, -1 - } - - sRune, adj := utf8.DecodeRuneInString(s[idx:]) - if esc { - esc = false - } else if sRune == '\\' { - esc = true - } else if sRune == '{' { - braceCnt++ - } else if sRune == '}' { - braceCnt-- - } else if sRune == ',' && braceCnt == 1 { - ret = append(ret, s[start:idx]) - start = idx + adj - } - - idx += adj - } - ret = append(ret, s[start:idx-1]) - return -} - -// Returns true if the pattern is "zero length", meaning -// it could match zero or more characters. -func isZeroLengthPattern(pattern string) (ret bool, err error) { - // * can match zero - if pattern == "" || pattern == "*" || pattern == "**" { - return true, nil - } - - // an alternative with zero length can match zero, for example {,x} - the - // first alternative has zero length - r, adj := utf8.DecodeRuneInString(pattern) - if r == '{' { - options, endOptions := splitAlternatives(pattern[adj:]) - if endOptions == -1 { - return false, ErrBadPattern - } - if ret, err = isZeroLengthPattern(pattern[adj+endOptions:]); !ret || err != nil { - return - } - for _, o := range options { - if ret, err = isZeroLengthPattern(o); ret || err != nil { - return - } - } - } - - return false, nil -} - -// Match returns true if name matches the shell file name pattern. -// The pattern syntax is: -// -// pattern: -// { term } -// term: -// '*' matches any sequence of non-path-separators -// '**' matches any sequence of characters, including -// path separators. -// '?' matches any single non-path-separator character -// '[' [ '^' '!' ] { character-range } ']' -// character class (must be non-empty) -// '{' { term } [ ',' { term } ... ] '}' -// c matches character c (c != '*', '?', '\\', '[') -// '\\' c matches character c -// -// character-range: -// c matches character c (c != '\\', '-', ']') -// '\\' c matches character c -// lo '-' hi matches character c for lo <= c <= hi -// -// Match requires pattern to match all of name, not just a substring. -// The path-separator defaults to the '/' character. The only possible -// returned error is ErrBadPattern, when pattern is malformed. -// -// Note: this is meant as a drop-in replacement for path.Match() which -// always uses '/' as the path separator. If you want to support systems -// which use a different path separator (such as Windows), what you want -// is the PathMatch() function below. -// -func Match(pattern, name string) (bool, error) { - return doMatching(pattern, name, '/') -} - -// PathMatch is like Match except that it uses your system's path separator. -// For most systems, this will be '/'. However, for Windows, it would be '\\'. -// Note that for systems where the path separator is '\\', escaping is -// disabled. -// -// Note: this is meant as a drop-in replacement for filepath.Match(). -// -func PathMatch(pattern, name string) (bool, error) { - return PathMatchOS(StandardOS, pattern, name) -} - -// PathMatchOS is like PathMatch except that it uses vos's path separator. -func PathMatchOS(vos OS, pattern, name string) (bool, error) { - pattern = filepath.ToSlash(pattern) - return doMatching(pattern, name, vos.PathSeparator()) -} - -func doMatching(pattern, name string, separator rune) (matched bool, err error) { - // check for some base-cases - patternLen, nameLen := len(pattern), len(name) - if patternLen == 0 { - return nameLen == 0, nil - } else if nameLen == 0 { - return isZeroLengthPattern(pattern) - } - - separatorAdj := utf8.RuneLen(separator) - - patIdx := indexRuneWithEscaping(pattern, '/') - lastPat := patIdx == -1 - if lastPat { - patIdx = len(pattern) - } - if pattern[:patIdx] == "**" { - // if our last pattern component is a doublestar, we're done - - // doublestar will match any remaining name components, if any. - if lastPat { - return true, nil - } - - // otherwise, try matching remaining components - nameIdx := 0 - patIdx += 1 - for { - if m, _ := doMatching(pattern[patIdx:], name[nameIdx:], separator); m { - return true, nil - } - - nextNameIdx := 0 - if nextNameIdx = indexRuneWithEscaping(name[nameIdx:], separator); nextNameIdx == -1 { - break - } - nameIdx += separatorAdj + nextNameIdx - } - return false, nil - } - - nameIdx := indexRuneWithEscaping(name, separator) - lastName := nameIdx == -1 - if lastName { - nameIdx = nameLen - } - - var matches []string - matches, err = matchComponent(pattern, name[:nameIdx]) - if matches == nil || err != nil { - return - } - if len(matches) == 0 && lastName { - return true, nil - } - - if !lastName { - nameIdx += separatorAdj - for _, alt := range matches { - matched, err = doMatching(alt, name[nameIdx:], separator) - if matched || err != nil { - return - } - } - } - - return false, nil -} - -// Glob returns the names of all files matching pattern or nil -// if there is no matching file. The syntax of pattern is the same -// as in Match. The pattern may describe hierarchical names such as -// /usr/*/bin/ed (assuming the Separator is '/'). -// -// Glob ignores file system errors such as I/O errors reading directories. -// The only possible returned error is ErrBadPattern, when pattern -// is malformed. -// -// Your system path separator is automatically used. This means on -// systems where the separator is '\\' (Windows), escaping will be -// disabled. -// -// Note: this is meant as a drop-in replacement for filepath.Glob(). -// -func Glob(pattern string) (matches []string, err error) { - return GlobOS(StandardOS, pattern) -} - -// GlobOS is like Glob except that it operates on vos. -func GlobOS(vos OS, pattern string) (matches []string, err error) { - if len(pattern) == 0 { - return nil, nil - } - - // if the pattern starts with alternatives, we need to handle that here - the - // alternatives may be a mix of relative and absolute - if pattern[0] == '{' { - options, endOptions := splitAlternatives(pattern[1:]) - if endOptions == -1 { - return nil, ErrBadPattern - } - for _, o := range options { - m, e := GlobOS(vos, o+pattern[endOptions+1:]) - if e != nil { - return nil, e - } - matches = append(matches, m...) - } - return matches, nil - } - - // If the pattern is relative or absolute and we're on a non-Windows machine, - // volumeName will be an empty string. If it is absolute and we're on a - // Windows machine, volumeName will be a drive letter ("C:") for filesystem - // paths or \\\ for UNC paths. - isAbs := filepath.IsAbs(pattern) || pattern[0] == '\\' || pattern[0] == '/' - volumeName := filepath.VolumeName(pattern) - isWindowsUNC := strings.HasPrefix(volumeName, `\\`) - if isWindowsUNC || isAbs { - startIdx := len(volumeName) + 1 - return doGlob(vos, fmt.Sprintf("%s%s", volumeName, string(vos.PathSeparator())), filepath.ToSlash(pattern[startIdx:]), matches) - } - - // otherwise, it's a relative pattern - return doGlob(vos, ".", filepath.ToSlash(pattern), matches) -} - -// Perform a glob -func doGlob(vos OS, basedir, pattern string, matches []string) (m []string, e error) { - m = matches - e = nil - - // if the pattern starts with any path components that aren't globbed (ie, - // `path/to/glob*`), we can skip over the un-globbed components (`path/to` in - // our example). - globIdx := indexAnyWithEscaping(pattern, "*?[{\\") - if globIdx > 0 { - globIdx = lastIndexRuneWithEscaping(pattern[:globIdx], '/') - } else if globIdx == -1 { - globIdx = lastIndexRuneWithEscaping(pattern, '/') - } - if globIdx > 0 { - basedir = filepath.Join(basedir, pattern[:globIdx]) - pattern = pattern[globIdx+1:] - } - - // Lstat will return an error if the file/directory doesn't exist - fi, err := vos.Lstat(basedir) - if err != nil { - return - } - - // if the pattern is empty, we've found a match - if len(pattern) == 0 { - m = append(m, basedir) - return - } - - // otherwise, we need to check each item in the directory... - - // first, if basedir is a symlink, follow it... - if (fi.Mode() & os.ModeSymlink) != 0 { - fi, err = vos.Stat(basedir) - if err != nil { - return - } - } - - // confirm it's a directory... - if !fi.IsDir() { - return - } - - files, err := filesInDir(vos, basedir) - if err != nil { - return - } - - sort.Slice(files, func(i, j int) bool { return files[i].Name() < files[j].Name() }) - - slashIdx := indexRuneWithEscaping(pattern, '/') - lastComponent := slashIdx == -1 - if lastComponent { - slashIdx = len(pattern) - } - if pattern[:slashIdx] == "**" { - // if the current component is a doublestar, we'll try depth-first - for _, file := range files { - // if symlink, we may want to follow - if (file.Mode() & os.ModeSymlink) != 0 { - file, err = vos.Stat(filepath.Join(basedir, file.Name())) - if err != nil { - continue - } - } - - if file.IsDir() { - // recurse into directories - if lastComponent { - m = append(m, filepath.Join(basedir, file.Name())) - } - m, e = doGlob(vos, filepath.Join(basedir, file.Name()), pattern, m) - } else if lastComponent { - // if the pattern's last component is a doublestar, we match filenames, too - m = append(m, filepath.Join(basedir, file.Name())) - } - } - if lastComponent { - return // we're done - } - - pattern = pattern[slashIdx+1:] - } - - // check items in current directory and recurse - var match []string - for _, file := range files { - match, e = matchComponent(pattern, file.Name()) - if e != nil { - return - } - if match != nil { - if len(match) == 0 { - m = append(m, filepath.Join(basedir, file.Name())) - } else { - for _, alt := range match { - m, e = doGlob(vos, filepath.Join(basedir, file.Name()), alt, m) - } - } - } - } - return -} - -func filesInDir(vos OS, dirPath string) (files []os.FileInfo, e error) { - dir, err := vos.Open(dirPath) - if err != nil { - return nil, nil - } - defer func() { - if err := dir.Close(); e == nil { - e = err - } - }() - - files, err = dir.Readdir(-1) - if err != nil { - return nil, nil - } - - return -} - -// Attempt to match a single path component with a pattern. Note that the -// pattern may include multiple components but that the "name" is just a single -// path component. The return value is a slice of patterns that should be -// checked against subsequent path components or nil, indicating that the -// pattern does not match this path. It is assumed that pattern components are -// separated by '/' -func matchComponent(pattern, name string) ([]string, error) { - // check for matches one rune at a time - patternLen, nameLen := len(pattern), len(name) - patIdx, nameIdx := 0, 0 - for patIdx < patternLen && nameIdx < nameLen { - patRune, patAdj := utf8.DecodeRuneInString(pattern[patIdx:]) - nameRune, nameAdj := utf8.DecodeRuneInString(name[nameIdx:]) - if patRune == '/' { - patIdx++ - break - } else if patRune == '\\' { - // handle escaped runes, only if separator isn't '\\' - patIdx += patAdj - patRune, patAdj = utf8.DecodeRuneInString(pattern[patIdx:]) - if patRune == utf8.RuneError { - return nil, ErrBadPattern - } else if patRune == nameRune { - patIdx += patAdj - nameIdx += nameAdj - } else { - return nil, nil - } - } else if patRune == '*' { - // handle stars - a star at the end of the pattern or before a separator - // will always match the rest of the path component - if patIdx += patAdj; patIdx >= patternLen { - return []string{}, nil - } - if patRune, patAdj = utf8.DecodeRuneInString(pattern[patIdx:]); patRune == '/' { - return []string{pattern[patIdx+patAdj:]}, nil - } - - // check if we can make any matches - for ; nameIdx < nameLen; nameIdx += nameAdj { - if m, e := matchComponent(pattern[patIdx:], name[nameIdx:]); m != nil || e != nil { - return m, e - } - _, nameAdj = utf8.DecodeRuneInString(name[nameIdx:]) - } - return nil, nil - } else if patRune == '[' { - // handle character sets - patIdx += patAdj - endClass := indexRuneWithEscaping(pattern[patIdx:], ']') - if endClass == -1 { - return nil, ErrBadPattern - } - endClass += patIdx - classRunes := []rune(pattern[patIdx:endClass]) - classRunesLen := len(classRunes) - if classRunesLen > 0 { - classIdx := 0 - matchClass := false - negate := classRunes[0] == '^' || classRunes[0] == '!' - if negate { - classIdx++ - } - for classIdx < classRunesLen { - low := classRunes[classIdx] - if low == '-' { - return nil, ErrBadPattern - } - classIdx++ - if low == '\\' { - if classIdx < classRunesLen { - low = classRunes[classIdx] - classIdx++ - } else { - return nil, ErrBadPattern - } - } - high := low - if classIdx < classRunesLen && classRunes[classIdx] == '-' { - // we have a range of runes - if classIdx++; classIdx >= classRunesLen { - return nil, ErrBadPattern - } - high = classRunes[classIdx] - if high == '-' { - return nil, ErrBadPattern - } - classIdx++ - if high == '\\' { - if classIdx < classRunesLen { - high = classRunes[classIdx] - classIdx++ - } else { - return nil, ErrBadPattern - } - } - } - if low <= nameRune && nameRune <= high { - matchClass = true - } - } - if matchClass == negate { - return nil, nil - } - } else { - return nil, ErrBadPattern - } - patIdx = endClass + 1 - nameIdx += nameAdj - } else if patRune == '{' { - // handle alternatives such as {alt1,alt2,...} - patIdx += patAdj - options, endOptions := splitAlternatives(pattern[patIdx:]) - if endOptions == -1 { - return nil, ErrBadPattern - } - patIdx += endOptions - - results := make([][]string, 0, len(options)) - totalResults := 0 - for _, o := range options { - m, e := matchComponent(o+pattern[patIdx:], name[nameIdx:]) - if e != nil { - return nil, e - } - if m != nil { - results = append(results, m) - totalResults += len(m) - } - } - if len(results) > 0 { - lst := make([]string, 0, totalResults) - for _, m := range results { - lst = append(lst, m...) - } - return lst, nil - } - - return nil, nil - } else if patRune == '?' || patRune == nameRune { - // handle single-rune wildcard - patIdx += patAdj - nameIdx += nameAdj - } else { - return nil, nil - } - } - if nameIdx >= nameLen { - if patIdx >= patternLen { - return []string{}, nil - } - - pattern = pattern[patIdx:] - slashIdx := indexRuneWithEscaping(pattern, '/') - testPattern := pattern - if slashIdx >= 0 { - testPattern = pattern[:slashIdx] - } - - zeroLength, err := isZeroLengthPattern(testPattern) - if err != nil { - return nil, err - } - if zeroLength { - if slashIdx == -1 { - return []string{}, nil - } else { - return []string{pattern[slashIdx+1:]}, nil - } - } - } - return nil, nil -} diff --git a/doublestar_test.go b/doublestar_test.go index e72d8c0..6dfb296 100644 --- a/doublestar_test.go +++ b/doublestar_test.go @@ -3,12 +3,12 @@ package doublestar import ( + "io/fs" "log" "os" "path" "path/filepath" "runtime" - "strings" "testing" ) @@ -18,117 +18,145 @@ type MatchTest struct { expectedErr error // an expected error isStandard bool // pattern doesn't use any doublestar features testOnDisk bool // true: test pattern against files in "test" directory + numResults int // number of glob results if testing on disk } // Tests which contain escapes and symlinks will not work on Windows var onWindows = runtime.GOOS == "windows" var matchTests = []MatchTest{ - {"*", "", true, nil, true, false}, - {"*", "/", false, nil, true, false}, - {"/*", "/", true, nil, true, false}, - {"/*", "/debug/", false, nil, true, false}, - {"/*", "//", false, nil, true, false}, - {"abc", "abc", true, nil, true, true}, - {"*", "abc", true, nil, true, true}, - {"*c", "abc", true, nil, true, true}, - {"*/", "a/", true, nil, true, false}, - {"a*", "a", true, nil, true, true}, - {"a*", "abc", true, nil, true, true}, - {"a*", "ab/c", false, nil, true, true}, - {"a*/b", "abc/b", true, nil, true, true}, - {"a*/b", "a/c/b", false, nil, true, true}, - {"a*b*c*d*e*", "axbxcxdxe", true, nil, true, true}, - {"a*b*c*d*e*/f", "axbxcxdxe/f", true, nil, true, true}, - {"a*b*c*d*e*/f", "axbxcxdxexxx/f", true, nil, true, true}, - {"a*b*c*d*e*/f", "axbxcxdxe/xxx/f", false, nil, true, true}, - {"a*b*c*d*e*/f", "axbxcxdxexxx/fff", false, nil, true, true}, - {"a*b?c*x", "abxbbxdbxebxczzx", true, nil, true, true}, - {"a*b?c*x", "abxbbxdbxebxczzy", false, nil, true, true}, - {"ab[c]", "abc", true, nil, true, true}, - {"ab[b-d]", "abc", true, nil, true, true}, - {"ab[e-g]", "abc", false, nil, true, true}, - {"ab[^c]", "abc", false, nil, true, true}, - {"ab[^b-d]", "abc", false, nil, true, true}, - {"ab[^e-g]", "abc", true, nil, true, true}, - {"a\\*b", "ab", false, nil, true, true}, - {"a?b", "a☺b", true, nil, true, true}, - {"a[^a]b", "a☺b", true, nil, true, true}, - {"a[!a]b", "a☺b", true, nil, false, true}, - {"a???b", "a☺b", false, nil, true, true}, - {"a[^a][^a][^a]b", "a☺b", false, nil, true, true}, - {"[a-ζ]*", "α", true, nil, true, true}, - {"*[a-ζ]", "A", false, nil, true, true}, - {"a?b", "a/b", false, nil, true, true}, - {"a*b", "a/b", false, nil, true, true}, - {"[\\]a]", "]", true, nil, true, !onWindows}, - {"[\\-]", "-", true, nil, true, !onWindows}, - {"[x\\-]", "x", true, nil, true, !onWindows}, - {"[x\\-]", "-", true, nil, true, !onWindows}, - {"[x\\-]", "z", false, nil, true, !onWindows}, - {"[\\-x]", "x", true, nil, true, !onWindows}, - {"[\\-x]", "-", true, nil, true, !onWindows}, - {"[\\-x]", "a", false, nil, true, !onWindows}, - {"[]a]", "]", false, ErrBadPattern, true, true}, - {"[-]", "-", false, ErrBadPattern, true, true}, - {"[x-]", "x", false, ErrBadPattern, true, true}, - {"[x-]", "-", false, ErrBadPattern, true, true}, - {"[x-]", "z", false, ErrBadPattern, true, true}, - {"[-x]", "x", false, ErrBadPattern, true, true}, - {"[-x]", "-", false, ErrBadPattern, true, true}, - {"[-x]", "a", false, ErrBadPattern, true, true}, - {"\\", "a", false, ErrBadPattern, true, !onWindows}, - {"[a-b-c]", "a", false, ErrBadPattern, true, true}, - {"[", "a", false, ErrBadPattern, true, true}, - {"[^", "a", false, ErrBadPattern, true, true}, - {"[^bc", "a", false, ErrBadPattern, true, true}, - {"a[", "a", false, nil, true, false}, - {"a[", "ab", false, ErrBadPattern, true, true}, - {"*x", "xxx", true, nil, true, true}, - {"[abc]", "b", true, nil, true, true}, - {"**", "", true, nil, false, false}, - {"a/**", "a", false, nil, false, true}, - {"a/**", "a/b", true, nil, false, true}, - {"a/**", "a/b/c", true, nil, false, true}, - {"**/c", "c", true, nil, false, true}, - {"**/c", "b/c", true, nil, false, true}, - {"**/c", "a/b/c", true, nil, false, true}, - {"**/c", "a/b", false, nil, false, true}, - {"**/c", "abcd", false, nil, false, true}, - {"**/c", "a/abc", false, nil, false, true}, - {"a/**/b", "a/b", true, nil, false, true}, - {"a/**/c", "a/b/c", true, nil, false, true}, - {"a/**/d", "a/b/c/d", true, nil, false, true}, - {"a/\\**", "a/b/c", false, nil, false, !onWindows}, + {"*", "", true, nil, true, false, 0}, + {"*", "/", false, nil, true, false, 0}, + {"/*", "/", true, nil, true, false, 0}, + {"/*", "/debug/", false, nil, true, false, 0}, + {"/*", "//", false, nil, true, false, 0}, + {"abc", "abc", true, nil, true, true, 1}, + {"*", "abc", true, nil, true, true, 19}, + {"*c", "abc", true, nil, true, true, 2}, + {"*/", "a/", true, nil, true, false, 0}, + {"a*", "a", true, nil, true, true, 9}, + {"a*", "abc", true, nil, true, true, 9}, + {"a*", "ab/c", false, nil, true, true, 9}, + {"a*/b", "abc/b", true, nil, true, true, 2}, + {"a*/b", "a/c/b", false, nil, true, true, 2}, + {"a*b*c*d*e*", "axbxcxdxe", true, nil, true, true, 3}, + {"a*b*c*d*e*/f", "axbxcxdxe/f", true, nil, true, true, 2}, + {"a*b*c*d*e*/f", "axbxcxdxexxx/f", true, nil, true, true, 2}, + {"a*b*c*d*e*/f", "axbxcxdxe/xxx/f", false, nil, true, true, 2}, + {"a*b*c*d*e*/f", "axbxcxdxexxx/fff", false, nil, true, true, 2}, + {"a*b?c*x", "abxbbxdbxebxczzx", true, nil, true, true, 2}, + {"a*b?c*x", "abxbbxdbxebxczzy", false, nil, true, true, 2}, + {"ab[c]", "abc", true, nil, true, true, 1}, + {"ab[b-d]", "abc", true, nil, true, true, 1}, + {"ab[e-g]", "abc", false, nil, true, true, 0}, + {"ab[^c]", "abc", false, nil, true, true, 0}, + {"ab[^b-d]", "abc", false, nil, true, true, 0}, + {"ab[^e-g]", "abc", true, nil, true, true, 1}, + {"a\\*b", "ab", false, nil, true, true, 0}, + {"a?b", "a☺b", true, nil, true, true, 1}, + {"a[^a]b", "a☺b", true, nil, true, true, 1}, + {"a[!a]b", "a☺b", true, nil, false, true, 1}, + {"a???b", "a☺b", false, nil, true, true, 0}, + {"a[^a][^a][^a]b", "a☺b", false, nil, true, true, 0}, + {"[a-ζ]*", "α", true, nil, true, true, 17}, + {"*[a-ζ]", "A", false, nil, true, true, 17}, + {"a?b", "a/b", false, nil, true, true, 1}, + {"a*b", "a/b", false, nil, true, true, 1}, + {"[\\]a]", "]", true, nil, true, !onWindows, 2}, + {"[\\-]", "-", true, nil, true, !onWindows, 1}, + {"[x\\-]", "x", true, nil, true, !onWindows, 2}, + {"[x\\-]", "-", true, nil, true, !onWindows, 2}, + {"[x\\-]", "z", false, nil, true, !onWindows, 2}, + {"[\\-x]", "x", true, nil, true, !onWindows, 2}, + {"[\\-x]", "-", true, nil, true, !onWindows, 2}, + {"[\\-x]", "a", false, nil, true, !onWindows, 2}, + {"[]a]", "]", false, ErrBadPattern, true, true, 0}, + // doublestar, like bash, allows these when path.Match() does not + {"[-]", "-", true, nil, false, true, 1}, + {"[x-]", "x", true, nil, false, true, 2}, + {"[x-]", "-", true, nil, false, true, 2}, + {"[x-]", "z", false, nil, false, true, 2}, + {"[-x]", "x", true, nil, false, true, 2}, + {"[-x]", "-", true, nil, false, true, 2}, + {"[-x]", "a", false, nil, false, true, 2}, + {"[a-b-d]", "a", true, nil, false, true, 3}, + {"[a-b-d]", "b", true, nil, false, true, 3}, + {"[a-b-d]", "-", true, nil, false, true, 3}, + {"[a-b-d]", "c", false, nil, false, true, 3}, + {"[a-b-x]", "x", true, nil, false, true, 4}, + {"\\", "a", false, ErrBadPattern, true, !onWindows, 0}, + {"[", "a", false, ErrBadPattern, true, true, 0}, + {"[^", "a", false, ErrBadPattern, true, true, 0}, + {"[^bc", "a", false, ErrBadPattern, true, true, 0}, + {"a[", "a", false, ErrBadPattern, true, true, 0}, + {"a[", "ab", false, ErrBadPattern, true, true, 0}, + {"ad[", "ab", false, ErrBadPattern, true, true, 0}, + {"*x", "xxx", true, nil, true, true, 4}, + {"[abc]", "b", true, nil, true, true, 3}, + {"**", "", true, nil, false, false, 38}, + {"a/**", "a", true, nil, false, true, 7}, + {"a/**", "a/", true, nil, false, false, 7}, + {"a/**", "a/b", true, nil, false, true, 7}, + {"a/**", "a/b/c", true, nil, false, true, 7}, + {"**/c", "c", true, nil, false, true, 5}, + {"**/c", "b/c", true, nil, false, true, 5}, + {"**/c", "a/b/c", true, nil, false, true, 5}, + {"**/c", "a/b", false, nil, false, true, 5}, + {"**/c", "abcd", false, nil, false, true, 5}, + {"**/c", "a/abc", false, nil, false, true, 5}, + {"a/**/b", "a/b", true, nil, false, true, 2}, + {"a/**/c", "a/b/c", true, nil, false, true, 2}, + {"a/**/d", "a/b/c/d", true, nil, false, true, 1}, + {"a/\\**", "a/b/c", false, nil, false, !onWindows, 0}, // this is an odd case: filepath.Glob() will return results - {"a//b/c", "a/b/c", false, nil, true, false}, - {"a/b/c", "a/b//c", false, nil, true, true}, + {"a//b/c", "a/b/c", false, nil, true, false, 0}, + {"a/b/c", "a/b//c", false, nil, true, true, 1}, // also odd: Glob + filepath.Glob return results - {"a/", "a", false, nil, true, false}, - {"ab{c,d}", "abc", true, nil, false, true}, - {"ab{c,d,*}", "abcde", true, nil, false, true}, - {"ab{c,d}[", "abcd", false, ErrBadPattern, false, true}, - {"a{,bc}", "a", true, nil, false, true}, - {"a{,bc}", "abc", true, nil, false, true}, - {"a/{b/c,c/b}", "a/b/c", true, nil, false, true}, - {"a/{b/c,c/b}", "a/c/b", true, nil, false, true}, - {"{a/{b,c},abc}", "a/b", true, nil, false, true}, - {"{a/{b,c},abc}", "a/c", true, nil, false, true}, - {"{a/{b,c},abc}", "abc", true, nil, false, true}, - {"{a/{b,c},abc}", "a/b/c", false, nil, false, true}, - {"{a/ab*}", "a/abc", true, nil, false, true}, - {"{a/*}", "a/b", true, nil, false, true}, - {"{a/abc}", "a/abc", true, nil, false, true}, - {"{a/b,a/c}", "a/c", true, nil, false, true}, - {"abc/**", "abc/b", true, nil, false, true}, - {"**/abc", "abc", true, nil, false, true}, - {"abc**", "abc/b", false, nil, false, true}, - {"**/*.txt", "abc/【test】.txt", true, nil, false, true}, - {"**/【*", "abc/【test】.txt", true, nil, false, true}, - {"broken-symlink", "broken-symlink", true, nil, true, !onWindows}, - {"working-symlink/c/*", "working-symlink/c/d", true, nil, true, !onWindows}, - {"working-sym*/*", "working-symlink/c", true, nil, true, !onWindows}, - {"b/**/f", "b/symlink-dir/f", true, nil, false, !onWindows}, + {"a/", "a", false, nil, true, false, 0}, + {"ab{c,d}", "abc", true, nil, false, true, 1}, + {"ab{c,d,*}", "abcde", true, nil, false, true, 5}, + {"ab{c,d}[", "abcd", false, ErrBadPattern, false, true, 0}, + {"a{,bc}", "a", true, nil, false, true, 2}, + {"a{,bc}", "abc", true, nil, false, true, 2}, + {"a/{b/c,c/b}", "a/b/c", true, nil, false, true, 2}, + {"a/{b/c,c/b}", "a/c/b", true, nil, false, true, 2}, + {"{a/{b,c},abc}", "a/b", true, nil, false, true, 3}, + {"{a/{b,c},abc}", "a/c", true, nil, false, true, 3}, + {"{a/{b,c},abc}", "abc", true, nil, false, true, 3}, + {"{a/{b,c},abc}", "a/b/c", false, nil, false, true, 3}, + {"{a/ab*}", "a/abc", true, nil, false, true, 1}, + {"{a/*}", "a/b", true, nil, false, true, 3}, + {"{a/abc}", "a/abc", true, nil, false, true, 1}, + {"{a/b,a/c}", "a/c", true, nil, false, true, 2}, + {"abc/**", "abc/b", true, nil, false, true, 3}, + {"**/abc", "abc", true, nil, false, true, 2}, + {"abc**", "abc/b", false, nil, false, true, 3}, + {"**/*.txt", "abc/【test】.txt", true, nil, false, true, 1}, + {"**/【*", "abc/【test】.txt", true, nil, false, true, 1}, + // unfortunately, io/fs can't handle this, so neither can Glob =( + {"broken-symlink", "broken-symlink", true, nil, true, false, 1}, + {"working-symlink/c/*", "working-symlink/c/d", true, nil, true, !onWindows, 1}, + {"working-sym*/*", "working-symlink/c", true, nil, true, !onWindows, 1}, + {"b/**/f", "b/symlink-dir/f", true, nil, false, !onWindows, 2}, +} + +func TestValidatePattern(t *testing.T) { + for idx, tt := range matchTests { + testValidatePatternWith(t, idx, tt) + } +} + +func testValidatePatternWith(t *testing.T, idx int, tt MatchTest) { + defer func() { + if r := recover(); r != nil { + t.Errorf("#%v. Validate(%#q) panicked: %#v", idx, tt.pattern, r) + } + }() + + result := ValidatePattern(tt.pattern) + if result != (tt.expectedErr == nil) { + t.Errorf("#%v. ValidatePattern(%#q) = %v want %v", idx, tt.pattern, result, !result) + } } func TestMatch(t *testing.T) { @@ -243,87 +271,108 @@ func BenchmarkGoPathMatch(b *testing.B) { } func TestGlob(t *testing.T) { - abspath, err := os.Getwd() - if err != nil { - t.Errorf("Error getting current working directory: %v", err) - return + fsys := os.DirFS("test") + for idx, tt := range matchTests { + if tt.testOnDisk { + testGlobWith(t, idx, tt, fsys) + } } +} - abspath = filepath.Join(abspath, "test") +func testGlobWith(t *testing.T, idx int, tt MatchTest, fsys fs.FS) { + defer func() { + if r := recover(); r != nil { + t.Errorf("#%v. Glob(%#q) panicked: %#v", idx, tt.pattern, r) + } + }() - abspathWithoutVolume := "" - volumeName := filepath.VolumeName(abspath) - if volumeName != "" && !strings.HasPrefix(volumeName, `\\`) { - abspathWithoutVolume = strings.TrimPrefix(abspath, volumeName) - } + matches, err := Glob(fsys, tt.pattern) + verifyGlobResults(t, idx, "Glob", tt, fsys, matches, err) +} +func TestGlobWalk(t *testing.T) { + fsys := os.DirFS("test") for idx, tt := range matchTests { if tt.testOnDisk { - // test both relative paths and absolute paths - testGlobWith(t, idx, tt, "test") - testGlobWith(t, idx, tt, abspath) - if abspathWithoutVolume != "" { - testGlobWith(t, idx, tt, abspathWithoutVolume) - } + testGlobWalkWith(t, idx, tt, fsys) } } } -func testGlobWith(t *testing.T, idx int, tt MatchTest, basepath string) { +func testGlobWalkWith(t *testing.T, idx int, tt MatchTest, fsys fs.FS) { defer func() { if r := recover(); r != nil { t.Errorf("#%v. Glob(%#q) panicked: %#v", idx, tt.pattern, r) } }() - pattern := joinWithoutClean(basepath, filepath.FromSlash(tt.pattern)) - testPath := joinWithoutClean(basepath, filepath.FromSlash(tt.testPath)) - matches, err := Glob(pattern) - if inSlice(testPath, matches) != tt.shouldMatch { + var matches []string + err := GlobWalk(fsys, tt.pattern, func(p string, d fs.DirEntry) error { + matches = append(matches, p) + return nil + }) + verifyGlobResults(t, idx, "GlobWalk", tt, fsys, matches, err) +} + +func verifyGlobResults(t *testing.T, idx int, fn string, tt MatchTest, fsys fs.FS, matches []string, err error) { + if len(matches) != tt.numResults { + t.Errorf("#%v. %v(%#q) = %#v - should have %#v results", idx, fn, tt.pattern, matches, tt.numResults) + } + if inSlice(tt.testPath, matches) != tt.shouldMatch { if tt.shouldMatch { - t.Errorf("#%v. Glob(%#q) = %#v - doesn't contain %v, but should", idx, pattern, matches, tt.testPath) + t.Errorf("#%v. %v(%#q) = %#v - doesn't contain %v, but should", idx, fn, tt.pattern, matches, tt.testPath) } else { - t.Errorf("#%v. Glob(%#q) = %#v - contains %v, but shouldn't", idx, pattern, matches, tt.testPath) + t.Errorf("#%v. %v(%#q) = %#v - contains %v, but shouldn't", idx, fn, tt.pattern, matches, tt.testPath) } } if err != tt.expectedErr { - t.Errorf("#%v. Glob(%#q) has error %v, but should be %v", idx, pattern, err, tt.expectedErr) + t.Errorf("#%v. %v(%#q) has error %v, but should be %v", idx, fn, tt.pattern, err, tt.expectedErr) } if tt.isStandard { - stdMatches, stdErr := filepath.Glob(pattern) + stdMatches, stdErr := fs.Glob(fsys, tt.pattern) if !compareSlices(matches, stdMatches) || !compareErrors(err, stdErr) { - t.Errorf("#%v. Glob(%#q) != filepath.Glob(...). Got %#v, %v want %#v, %v", idx, pattern, matches, err, stdMatches, stdErr) + t.Errorf("#%v. %v(%#q) != fs.Glob(...). Got %#v, %v want %#v, %v", idx, fn, tt.pattern, matches, err, stdMatches, stdErr) } } } func BenchmarkGlob(b *testing.B) { + fsys := os.DirFS("test") b.ReportAllocs() for i := 0; i < b.N; i++ { for _, tt := range matchTests { if tt.isStandard && tt.testOnDisk { - pattern := joinWithoutClean("test", filepath.FromSlash(tt.pattern)) - Glob(pattern) + Glob(fsys, tt.pattern) } } } } -func BenchmarkGoGlob(b *testing.B) { +func BenchmarkGlobWalk(b *testing.B) { + fsys := os.DirFS("test") b.ReportAllocs() for i := 0; i < b.N; i++ { for _, tt := range matchTests { if tt.isStandard && tt.testOnDisk { - pattern := joinWithoutClean("test", filepath.FromSlash(tt.pattern)) - filepath.Glob(pattern) + GlobWalk(fsys, tt.pattern, func(p string, d fs.DirEntry) error { + return nil + }) } } } } -func joinWithoutClean(elem ...string) string { - return strings.Join(elem, string(os.PathSeparator)) +func BenchmarkGoGlob(b *testing.B) { + fsys := os.DirFS("test") + b.ReportAllocs() + for i := 0; i < b.N; i++ { + for _, tt := range matchTests { + if tt.isStandard && tt.testOnDisk { + fs.Glob(fsys, tt.pattern) + } + } + } } func compareErrors(a, b error) bool { @@ -393,8 +442,9 @@ func symlink(oldname, newname string) { } func TestGlobSorted(t *testing.T) { + fsys := os.DirFS("test") expected := []string{"a", "abc", "abcd", "abcde", "abxbbxdbxebxczzx", "abxbbxdbxebxczzy", "axbxcxdxe", "axbxcxdxexxx", "a☺b"} - matches, err := Glob(joinWithoutClean("test", "a*")) + matches, err := Glob(fsys, "a*") if err != nil { t.Errorf("Unexpected error %v", err) return @@ -405,7 +455,7 @@ func TestGlobSorted(t *testing.T) { return } for idx, match := range matches { - if match != joinWithoutClean("test", expected[idx]) { + if match != expected[idx] { t.Errorf("Glob returned %#v; expected %#v", matches, expected) return } diff --git a/examples/find.go b/examples/find.go index ebddbca..7cb507b 100644 --- a/examples/find.go +++ b/examples/find.go @@ -5,20 +5,32 @@ import ( "os" "strings" - "github.com/bmatcuk/doublestar/v3" + "github.com/bmatcuk/doublestar/v4" ) // To run: // $ go run find.go - +// // For example: -// $ go run find.go '/usr/bin/*' # Make sure to escape as necessary for your shell +// $ go run find.go '/usr/bin/* +// +// Make sure to escape the pattern as necessary for your shell, otherwise the +// shell will expand the pattern! Additionally, you should use `/` as the path +// separator even if your OS (like Windows) does not! +// +// Patterns that include `.` or `..` after any meta characters (*, ?, [, or {) +// will not work because io/fs will reject them. If they appear _before_ any +// meta characters _and_ before a `/`, the `splitPattern` function below will +// take care of them correctly. func main() { pattern := os.Args[1] fmt.Printf("Searching on disk for pattern: %s\n\n", pattern) - matches, err := doublestar.Glob(pattern) + var basepath string + basepath, pattern = doublestar.SplitPattern(pattern) + fsys := os.DirFS(basepath) + matches, err := doublestar.Glob(fsys, pattern) if err != nil { fmt.Printf("Error: %v", err) os.Exit(1) diff --git a/examples/go.mod b/examples/go.mod new file mode 100644 index 0000000..28f7b84 --- /dev/null +++ b/examples/go.mod @@ -0,0 +1,9 @@ +module github.com/bmatcuk/doublestar/examples/find + +replace github.com/bmatcuk/doublestar/v4 => ../ + +require ( + github.com/bmatcuk/doublestar/v4 v4.0.0 +) + +go 1.16 diff --git a/glob.go b/glob.go new file mode 100644 index 0000000..d84813d --- /dev/null +++ b/glob.go @@ -0,0 +1,391 @@ +package doublestar + +import ( + "io/fs" + "path" +) + +// Glob returns the names of all files matching pattern or nil if there is no +// matching file. The syntax of pattern is the same as in Match(). The pattern +// may describe hierarchical names such as usr/*/bin/ed. +// +// Glob ignores file system errors such as I/O errors reading directories. +// The only possible returned error is ErrBadPattern, reporting that the +// pattern is malformed. +// +// Note: this is meant as a drop-in replacement for io/fs.Glob(). Like +// io/fs.Glob(), this function assumes that your pattern uses `/` as the path +// separator even if that's not correct for your OS (like Windows). If you +// aren't sure if that's the case, you can use filepath.ToSlash() on your +// pattern before calling Glob(). +// +func Glob(fsys fs.FS, pattern string) ([]string, error) { + if !ValidatePattern(pattern) { + return nil, ErrBadPattern + } + if hasMidDoubleStar(pattern) { + // If the pattern has a `**` anywhere but the very end, GlobWalk is more + // performant because it can get away with less allocations. If the pattern + // ends in a `**`, both methods are pretty much the same, but Glob has a + // _very_ slight advantage because of lower function call overhead. + var matches []string + err := doGlobWalk(fsys, pattern, true, func(p string, d fs.DirEntry) error { + matches = append(matches, p) + return nil + }) + return matches, err + } + return doGlob(fsys, pattern, nil, true) +} + +// Does the actual globbin' +func doGlob(fsys fs.FS, pattern string, m []string, firstSegment bool) (matches []string, err error) { + matches = m + patternStart := indexMeta(pattern) + if patternStart == -1 { + // pattern doesn't contain any meta characters - does a file matching the + // pattern exist? + if exists(fsys, pattern) { + matches = append(matches, pattern) + return + } else { + return + } + } + + dir := "." + splitIdx := lastIndexSlashOrAlt(pattern) + if splitIdx != -1 { + if pattern[splitIdx] == '}' { + openingIdx := indexMatchedOpeningAlt(pattern[:splitIdx]) + if openingIdx == -1 { + // if there's no matching opening index, technically Match() will treat + // an unmatched `}` as nothing special, so... we will, too! + splitIdx = lastIndexSlash(pattern[:splitIdx]) + } else { + // otherwise, we have to handle the alts: + return globAlts(fsys, pattern, openingIdx, splitIdx, matches, firstSegment) + } + } + + dir = pattern[:splitIdx] + pattern = pattern[splitIdx+1:] + } + + // if `splitIdx` is less than `patternStart`, we know `dir` has no meta + // characters. They would be equal if they are both -1, which means `dir` + // will be ".", and we know that doesn't have meta characters either. + if splitIdx <= patternStart{ + return globDir(fsys, dir, pattern, matches, firstSegment) + } + + var dirs []string + dirs, err = doGlob(fsys, dir, matches, false) + if err != nil { + return + } + for _, d := range dirs { + matches, err = globDir(fsys, d, pattern, matches, firstSegment) + if err != nil { + return + } + } + + return +} + +// handle alts in the glob pattern - `openingIdx` and `closingIdx` are the +// indexes of `{` and `}`, respectively +func globAlts(fsys fs.FS, pattern string, openingIdx, closingIdx int, m []string, firstSegment bool) (matches []string, err error) { + matches = m + + var dirs []string + startIdx := 0 + afterIdx := closingIdx + 1 + splitIdx := lastIndexSlashOrAlt(pattern[:openingIdx]) + if splitIdx == -1 || pattern[splitIdx] == '}' { + // no common prefix + dirs = []string{""} + } else { + // our alts have a common prefix that we can process first + dirs, err = doGlob(fsys, pattern[:splitIdx], matches, false) + if err != nil { + return + } + + startIdx = splitIdx + 1 + } + + for _, d := range dirs { + patIdx := openingIdx + 1 + altResultsStartIdx := len(matches) + thisResultStartIdx := altResultsStartIdx + for patIdx < closingIdx { + nextIdx := indexNextAlt(pattern[patIdx:closingIdx], true) + if nextIdx == -1 { + nextIdx = closingIdx + } else { + nextIdx += patIdx + } + + alt := buildAlt(d, pattern, startIdx, openingIdx, patIdx, nextIdx, afterIdx) + matches, err = doGlob(fsys, alt, matches, firstSegment) + if err != nil { + return + } + + matchesLen := len(matches) + if altResultsStartIdx != thisResultStartIdx && thisResultStartIdx != matchesLen { + // Alts can result in matches that aren't sorted, or, worse, duplicates + // (consider the trivial pattern `path/to/{a,*}`). Since doGlob returns + // sorted results, we can do a sort of in-place merge and remove + // duplicates. But, we only need to do this if this isn't the first alt + // (ie, `altResultsStartIdx != thisResultsStartIdx`) and if the latest + // alt actually added some matches (`thisResultStartIdx != + // len(matches)`) + matches = sortAndRemoveDups(matches, altResultsStartIdx, thisResultStartIdx, matchesLen) + + // length of matches may have changed + thisResultStartIdx = len(matches) + } else { + thisResultStartIdx = matchesLen + } + + patIdx = nextIdx + 1 + } + } + + return +} + +// find files/subdirectories in the given `dir` that match `pattern` +func globDir(fsys fs.FS, dir, pattern string, matches []string, canMatchFiles bool) (m []string, e error) { + m = matches + + if pattern == "" { + // pattern can be an empty string if the original pattern ended in a slash, + // in which case, we should just return dir, but only if it actually exists + // and it's a directory (or a symlink to a directory) + if isPathDir(fsys, dir) { + m = append(m, dir) + } + return + } + + if pattern == "**" { + m = globDoubleStar(fsys, dir, m, canMatchFiles) + return + } + + dirs, err := fs.ReadDir(fsys, dir) + if err != nil { + // ignore IO errors + return + } + + var matched bool + for _, info := range dirs { + name := info.Name() + if canMatchFiles || isDir(fsys, dir, name, info) { + matched, e = matchWithSeparator(pattern, name, '/', false) + if e != nil { + return + } + if matched { + m = append(m, path.Join(dir, name)) + } + } + } + + return +} + +func globDoubleStar(fsys fs.FS, dir string, matches []string, canMatchFiles bool) []string { + dirs, err := fs.ReadDir(fsys, dir) + if err != nil { + // ignore IO errors + return matches + } + + // `**` can match *this* dir, so add it + matches = append(matches, dir) + for _, info := range dirs { + name := info.Name() + if isDir(fsys, dir, name, info) { + matches = globDoubleStar(fsys, path.Join(dir, name), matches, canMatchFiles) + } else if canMatchFiles { + matches = append(matches, path.Join(dir, name)) + } + } + + return matches +} + +// Returns true if the pattern has a doublestar in the middle of the pattern. +// In this case, GlobWalk is faster because it can get away with less +// allocations. However, Glob has a _very_ slight edge if the pattern ends in +// `**`. +func hasMidDoubleStar(p string) bool { + // subtract 3: 2 because we want to return false if the pattern ends in `**` + // (Glob is _very_ slightly faster in that case), and the extra 1 because our + // loop checks p[i] and p[i+1]. + l := len(p) - 3 + for i := 0; i < l; i++ { + if p[i] == '\\' { + // escape next byte + i++ + } else if p[i] == '*' && p[i+1] == '*' { + return true + } + } + return false +} + +// Returns the index of the first unescaped meta character, or negative 1. +func indexMeta(s string) int { + var c byte + l := len(s) + for i := 0; i < l; i++ { + c = s[i] + if c == '*' || c == '?' || c == '[' || c == '{' { + return i + } else if c == '\\' { + // skip next byte + i++ + } + } + return -1 +} + +// Returns the index of the last unescaped slash or closing alt (`}`) in the +// string, or negative 1. +func lastIndexSlashOrAlt(s string) int { + for i := len(s) - 1; i >= 0; i-- { + if (s[i] == '/' || s[i] == '}') && (i == 0 || s[i-1] != '\\') { + return i + } + } + return -1 +} + +// Returns the index of the last unescaped slash in the string, or negative 1. +func lastIndexSlash(s string) int { + for i := len(s) - 1; i >= 0; i-- { + if s[i] == '/' && (i == 0 || s[i-1] != '\\') { + return i + } + } + return -1 +} + +// Assuming the byte after the end of `s` is a closing `}`, this function will +// find the index of the matching `{`. That is, it'll skip over any nested `{}` +// and account for escaping. +func indexMatchedOpeningAlt(s string) int { + alts := 1 + for i := len(s) - 1; i >= 0; i-- { + if s[i] == '}' && (i == 0 || s[i-1] != '\\') { + alts++ + } else if s[i] == '{' && (i == 0 || s[i-1] != '\\') { + if alts--; alts == 0 { + return i + } + } + } + return -1 +} + +// Returns true if the path exists +func exists(fsys fs.FS, name string) bool { + if _, err := fs.Stat(fsys, name); err != nil { + return false + } + return true +} + +// Returns true if the path is a directory, or a symlink to a directory +func isPathDir(fsys fs.FS, name string) bool { + info, err := fs.Stat(fsys, name) + if err != nil { + return false + } + return info.IsDir() +} + +// Returns whether or not the given DirEntry is a directory. If the DirEntry +// represents a symbolic link, the link is followed by running fs.Stat() on +// `path.Join(dir, name)` +func isDir(fsys fs.FS, dir string, name string, info fs.DirEntry) bool { + if (info.Type() & fs.ModeSymlink) > 0 { + finfo, err := fs.Stat(fsys, path.Join(dir, name)) + if err != nil { + return false + } + return finfo.IsDir() + } + return info.IsDir() +} + +// Builds a string from an alt +func buildAlt(prefix, pattern string, startIdx, openingIdx, currentIdx, nextIdx, afterIdx int) string { + // pattern: + // ignored/start{alts,go,here}remaining - len = 36 + // | | | | ^--- afterIdx = 27 + // | | | \--------- nextIdx = 21 + // | | \----------- currentIdx = 19 + // | \----------------- openingIdx = 13 + // \---------------------- startIdx = 8 + // + // result: + // prefix/startgoremaining - len = 7 + 5 + 2 + 9 = 23 + var buf []byte + patLen := len(pattern) + size := (openingIdx - startIdx) + (nextIdx - currentIdx) + (patLen - afterIdx) + if prefix != "" { + buf = make([]byte, 0, size + len(prefix) + 1) + buf = append(buf, prefix...) + buf = append(buf, '/') + } else { + buf = make([]byte, 0, size) + } + buf = append(buf, pattern[startIdx:openingIdx]...) + buf = append(buf, pattern[currentIdx:nextIdx]...) + if afterIdx < patLen { + buf = append(buf, pattern[afterIdx:]...) + } + return string(buf) +} + +// Running alts can produce results that are not sorted, and, worse, can cause +// duplicates (consider the trivial pattern `path/to/{a,*}`). Since we know +// each run of doGlob is sorted, we can basically do the "merge" step of a +// merge sort in-place. +func sortAndRemoveDups(matches []string, idx1, idx2, l int) []string { + var tmp string + for ; idx1 < idx2; idx1++ { + if matches[idx1] < matches[idx2] { + // order is correct + continue + } else if matches[idx1] > matches[idx2] { + // need to swap and then re-sort matches above idx2 + tmp = matches[idx1] + matches[idx1] = matches[idx2] + + shft := idx2 + 1 + for ; shft < l && matches[shft] < tmp; shft++ { + matches[shft - 1] = matches[shft] + } + matches[shft - 1] = tmp + } else { + // duplicate - shift matches above idx2 down one and decrement l + for shft := idx2 + 1; shft < l; shft++ { + matches[shft - 1] = matches[shft] + } + if l--; idx2 == l { + // nothing left to do... matches[idx2:] must have been full of dups + break + } + } + } + return matches[:l] +} diff --git a/globwalk.go b/globwalk.go new file mode 100644 index 0000000..f536576 --- /dev/null +++ b/globwalk.go @@ -0,0 +1,273 @@ +package doublestar + +import ( + "io/fs" + "path" +) + +// Callback function for GlobWalk(). If the function returns an error, GlobWalk +// will end immediately and return the same error. +type GlobWalkFunc func(path string, d fs.DirEntry) error + +// GlobWalk calls the callback function `fn` for every file matching pattern. +// The syntax of pattern is the same as in Match(). The pattern may describe +// hierarchical names such as usr/*/bin/ed. +// +// GlobWalk may have a small performance benefit over Glob if you do not need a +// slice of matches because it can avoid allocating memory for the matches. +// Additionally, GlobWalk gives you access to the `fs.DirEntry` objects for +// each match, and lets you quit early by returning a non-nil error from your +// callback function. +// +// GlobWalk ignores file system errors such as I/O errors reading directories. +// GlobWalk may return ErrBadPattern, reporting that the pattern is malformed. +// Additionally, if the callback function `fn` returns an error, GlobWalk will +// exit immediately and return that error. +// +// Like Glob(), this function assumes that your pattern uses `/` as the path +// separator even if that's not correct for your OS (like Windows). If you +// aren't sure if that's the case, you can use filepath.ToSlash() on your +// pattern before calling GlobWalk(). +// +func GlobWalk(fsys fs.FS, pattern string, fn GlobWalkFunc) error { + if !ValidatePattern(pattern) { + return ErrBadPattern + } + return doGlobWalk(fsys, pattern, true, fn) +} + +// Actually execute GlobWalk +func doGlobWalk(fsys fs.FS, pattern string, firstSegment bool, fn GlobWalkFunc) error { + patternStart := indexMeta(pattern) + if patternStart == -1 { + // pattern doesn't contain any meta characters - does a file matching the + // pattern exist? + info, err := fs.Stat(fsys, pattern) + if err == nil { + err = fn(pattern, dirEntryFromFileInfo(info)) + return err + } else { + // ignore IO errors + return nil + } + } + + dir := "." + splitIdx := lastIndexSlashOrAlt(pattern) + if splitIdx != -1 { + if pattern[splitIdx] == '}' { + openingIdx := indexMatchedOpeningAlt(pattern[:splitIdx]) + if openingIdx == -1 { + // if there's no matching opening index, technically Match() will treat + // an unmatched `}` as nothing special, so... we will, too! + splitIdx = lastIndexSlash(pattern[:splitIdx]) + } else { + // otherwise, we have to handle the alts: + return globAltsWalk(fsys, pattern, openingIdx, splitIdx, firstSegment, fn) + } + } + + dir = pattern[:splitIdx] + pattern = pattern[splitIdx+1:] + } + + // if `splitIdx` is less than `patternStart`, we know `dir` has no meta + // characters. They would be equal if they are both -1, which means `dir` + // will be ".", and we know that doesn't have meta characters either. + if splitIdx <= patternStart { + return globDirWalk(fsys, dir, pattern, firstSegment, fn) + } + + return doGlobWalk(fsys, dir, false, func(p string, d fs.DirEntry) error { + if err := globDirWalk(fsys, p, pattern, firstSegment, fn); err != nil { + return err + } + return nil + }) +} + +// handle alts in the glob pattern - `openingIdx` and `closingIdx` are the +// indexes of `{` and `}`, respectively +func globAltsWalk(fsys fs.FS, pattern string, openingIdx, closingIdx int, firstSegment bool, fn GlobWalkFunc) (err error) { + var matches []DirEntryWithFullPath + startIdx := 0 + afterIdx := closingIdx + 1 + splitIdx := lastIndexSlashOrAlt(pattern[:openingIdx]) + if splitIdx == -1 || pattern[splitIdx] == '}' { + // no common prefix + matches, err = doGlobAltsWalk(fsys, "", pattern, startIdx, openingIdx, closingIdx, afterIdx, firstSegment, matches) + if err != nil { + return + } + } else { + // our alts have a common prefix that we can process first + startIdx = splitIdx + 1 + err = doGlobWalk(fsys, pattern[:splitIdx], false, func(p string, d fs.DirEntry) (e error) { + matches, e = doGlobAltsWalk(fsys, p, pattern, startIdx, openingIdx, closingIdx, afterIdx, firstSegment, matches) + return e + }) + if err != nil { + return + } + } + + for _, m := range matches { + if err = fn(m.Path, m.Entry); err != nil { + return + } + } + + return +} + +// runs actual matching for alts +func doGlobAltsWalk(fsys fs.FS, d, pattern string, startIdx, openingIdx, closingIdx, afterIdx int, firstSegment bool, m []DirEntryWithFullPath) (matches []DirEntryWithFullPath, err error) { + matches = m + matchesLen := len(m) + patIdx := openingIdx + 1 + for patIdx < closingIdx { + nextIdx := indexNextAlt(pattern[patIdx:closingIdx], true) + if nextIdx == -1 { + nextIdx = closingIdx + } else { + nextIdx += patIdx + } + + alt := buildAlt(d, pattern, startIdx, openingIdx, patIdx, nextIdx, afterIdx) + err = doGlobWalk(fsys, alt, firstSegment, func(p string, d fs.DirEntry) error { + // insertion sort, ignoring dups + insertIdx := matchesLen + for insertIdx > 0 && matches[insertIdx-1].Path > p { + insertIdx-- + } + if insertIdx > 0 && matches[insertIdx-1].Path == p { + // dup + return nil + } + + // append to grow the slice, then insert + entry := DirEntryWithFullPath{d, p} + matches = append(matches, entry) + for i := matchesLen; i > insertIdx; i-- { + matches[i] = matches[i-1] + } + matches[insertIdx] = entry + matchesLen++ + + return nil + }) + if err != nil { + return + } + + patIdx = nextIdx + 1 + } + + return +} + +func globDirWalk(fsys fs.FS, dir, pattern string, canMatchFiles bool, fn GlobWalkFunc) (e error) { + if pattern == "" { + // pattern can be an empty string if the original pattern ended in a slash, + // in which case, we should just return dir, but only if it actually exists + // and it's a directory (or a symlink to a directory) + info, err := fs.Stat(fsys, dir) + if err != nil || !info.IsDir() { + return nil + } + return fn(dir, dirEntryFromFileInfo(info)) + } + + if pattern == "**" { + // `**` can match *this* dir + info, err := fs.Stat(fsys, dir) + if err != nil || !info.IsDir() { + return nil + } + if e = fn(dir, dirEntryFromFileInfo(info)); e != nil { + return + } + return globDoubleStarWalk(fsys, dir, canMatchFiles, fn) + } + + dirs, err := fs.ReadDir(fsys, dir) + if err != nil { + // ignore IO errors + return nil + } + + var matched bool + for _, info := range dirs { + name := info.Name() + if canMatchFiles || isDir(fsys, dir, name, info) { + matched, e = matchWithSeparator(pattern, name, '/', false) + if e != nil { + return + } + if matched { + if e = fn(path.Join(dir, name), info); e != nil { + return + } + } + } + } + + return +} + +func globDoubleStarWalk(fsys fs.FS, dir string, canMatchFiles bool, fn GlobWalkFunc) (e error) { + dirs, err := fs.ReadDir(fsys, dir) + if err != nil { + // ignore IO errors + return + } + + // `**` can match *this* dir, so add it + for _, info := range dirs { + name := info.Name() + if isDir(fsys, dir, name, info) { + p := path.Join(dir, name) + if e = fn(p, info); e != nil { + return + } + if e = globDoubleStarWalk(fsys, p, canMatchFiles, fn); e != nil { + return + } + } else if canMatchFiles { + if e = fn(path.Join(dir, name), info); e != nil { + return + } + } + } + + return +} + +type DirEntryFromFileInfo struct { + fi fs.FileInfo +} + +func (d *DirEntryFromFileInfo) Name() string { + return d.fi.Name() +} + +func (d *DirEntryFromFileInfo) IsDir() bool { + return d.fi.IsDir() +} + +func (d *DirEntryFromFileInfo) Type() fs.FileMode { + return d.fi.Mode().Type() +} + +func (d *DirEntryFromFileInfo) Info() (fs.FileInfo, error) { + return d.fi, nil +} + +func dirEntryFromFileInfo(fi fs.FileInfo) fs.DirEntry { + return &DirEntryFromFileInfo{fi} +} + +type DirEntryWithFullPath struct { + Entry fs.DirEntry + Path string +} diff --git a/go.mod b/go.mod index cf77e57..2d915a4 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,3 @@ -module github.com/bmatcuk/doublestar/v3 +module github.com/bmatcuk/doublestar/v4 -go 1.12 +go 1.16 diff --git a/match.go b/match.go new file mode 100644 index 0000000..f616126 --- /dev/null +++ b/match.go @@ -0,0 +1,369 @@ +package doublestar + +import ( + "path/filepath" + "unicode/utf8" +) + +// Match reports whether name matches the shell pattern. +// The pattern syntax is: +// +// pattern: +// { term } +// term: +// '*' matches any sequence of non-path-separators +// '/**/' matches zero or more directories +// '?' matches any single non-path-separator character +// '[' [ '^' '!' ] { character-range } ']' +// character class (must be non-empty) +// starting with `^` or `!` negates the class +// '{' { term } [ ',' { term } ... ] '}' +// alternatives +// c matches character c (c != '*', '?', '\\', '[') +// '\\' c matches character c +// +// character-range: +// c matches character c (c != '\\', '-', ']') +// '\\' c matches character c +// lo '-' hi matches character c for lo <= c <= hi +// +// Match returns true if `name` matches the file name `pattern`. `name` and +// `pattern` are split on forward slash (`/`) characters and may be relative or +// absolute. +// +// Match requires pattern to match all of name, not just a substring. +// The only possible returned error is ErrBadPattern, when pattern +// is malformed. +// +// A doublestar (`**`) should appear surrounded by path separators such as +// `/**/`. A mid-pattern doublestar (`**`) behaves like bash's globstar +// option: a pattern such as `path/to/**.txt` would return the same results as +// `path/to/*.txt`. The pattern you're looking for is `path/to/**/*.txt`. +// +// Note: this is meant as a drop-in replacement for path.Match() which +// always uses '/' as the path separator. If you want to support systems +// which use a different path separator (such as Windows), what you want +// is PathMatch(). Alternatively, you can run filepath.ToSlash() on both +// pattern and name and then use this function. +// +func Match(pattern, name string) (bool, error) { + return matchWithSeparator(pattern, name, '/', true) +} + +// PathMatch returns true if `name` matches the file name `pattern`. The +// difference between Match and PathMatch is that PathMatch will automatically +// use your system's path separator to split `name` and `pattern`. On systems +// where the path separator is `'\'`, escaping will be disabled. +// +// Note: this is meant as a drop-in replacement for filepath.Match(). It +// assumes that both `pattern` and `name` are using the system's path +// separator. If you can't be sure of that, use filepath.ToSlash() on both +// `pattern` and `name`, and then use the Match() function instead. +// +func PathMatch(pattern, name string) (bool, error) { + return matchWithSeparator(filepath.ToSlash(pattern), name, filepath.Separator, true) +} + +func matchWithSeparator(pattern, name string, separator rune, validate bool) (matched bool, err error) { + doublestarPatternBacktrack := -1 + doublestarNameBacktrack := -1 + starPatternBacktrack := -1 + starNameBacktrack := -1 + patIdx := 0 + nameIdx := 0 + patLen := len(pattern) + nameLen := len(name) + startOfSegment := true +MATCH: + for nameIdx < nameLen { + if patIdx < patLen { + switch pattern[patIdx] { + case '*': + if patIdx++; patIdx < patLen && pattern[patIdx] == '*' { + // doublestar - must begin with a path separator, otherwise we'll + // treat it like a single star like bash + patIdx++ + if startOfSegment { + if patIdx >= patLen { + // pattern ends in `/**`: return true + return true, nil + } + + // doublestar must also end with a path separator, otherwise we're + // just going to treat the doublestar as a single star like bash + patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:]) + if patRune == separator { + patIdx += patRuneLen + + doublestarPatternBacktrack = patIdx + doublestarNameBacktrack = nameIdx + starPatternBacktrack = -1 + starNameBacktrack = -1 + continue + } + } + } + startOfSegment = false + + starPatternBacktrack = patIdx + starNameBacktrack = nameIdx + continue + + case '?': + startOfSegment = false + nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:]) + if nameRune == separator { + // `?` cannot match the separator + break + } + + patIdx++ + nameIdx += nameRuneLen + continue + + case '[': + startOfSegment = false + if patIdx++; patIdx >= patLen { + // class didn't end + return false, ErrBadPattern + } + nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:]) + + matched := false + negate := pattern[patIdx] == '!' || pattern[patIdx] == '^' + if negate { + patIdx++ + } + + if patIdx >= patLen || pattern[patIdx] == ']' { + // class didn't end or empty character class + return false, ErrBadPattern + } + + last := utf8.MaxRune + for patIdx < patLen && pattern[patIdx] != ']' { + patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:]) + patIdx += patRuneLen + + // match a range + if last < utf8.MaxRune && patRune == '-' && patIdx < patLen && pattern[patIdx] != ']' { + if separator != '\\' && pattern[patIdx] == '\\' { + // next character is escaped + patIdx++ + } + patRune, patRuneLen = utf8.DecodeRuneInString(pattern[patIdx:]) + patIdx += patRuneLen + + if last <= nameRune && nameRune <= patRune { + matched = true + break + } + + // didn't match range - reset `last` + last = utf8.MaxRune + continue + } + + // not a range - check if the next rune is escaped + if separator != '\\' && patRune == '\\' { + patRune, patRuneLen = utf8.DecodeRuneInString(pattern[patIdx:]) + patIdx += patRuneLen + } + + // check if the rune matches + if patRune == nameRune { + matched = true + break + } + + // no matches yet + last = patRune + } + + if matched == negate { + // failed to match - if we reached the end of the pattern, that means + // we never found a closing `]` + if patIdx >= patLen { + return false, ErrBadPattern + } + break + } + + closingIdx := indexUnescapedByte(pattern[patIdx:], ']', separator != '\\') + if closingIdx == -1 { + // no closing `]` + return false, ErrBadPattern + } + + patIdx += closingIdx + 1 + nameIdx += nameRuneLen + continue + + case '{': + startOfSegment = false + patIdx++ + closingIdx := indexMatchedClosingAlt(pattern[patIdx:], separator != '\\') + if closingIdx == -1 { + // no closing `}` + return false, ErrBadPattern + } + closingIdx += patIdx + + for ;; { + commaIdx := indexNextAlt(pattern[patIdx:closingIdx], separator != '\\') + if commaIdx == -1 { + break + } + commaIdx += patIdx + + result, err := matchWithSeparator(pattern[patIdx:commaIdx] + pattern[closingIdx+1:], name[nameIdx:], separator, validate) + if result || err != nil { + return result, err + } + + patIdx = commaIdx + 1 + } + return matchWithSeparator(pattern[patIdx:closingIdx] + pattern[closingIdx+1:], name[nameIdx:], separator, validate) + + case '\\': + if separator != '\\' { + // next rune is "escaped" in the pattern - literal match + if patIdx++; patIdx >= patLen { + // pattern ended + return false, ErrBadPattern + } + } + fallthrough + + default: + patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:]) + nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:]) + if patRune != nameRune { + break + } + + patIdx += patRuneLen + nameIdx += nameRuneLen + startOfSegment = patRune == separator + continue + } + } + + if starPatternBacktrack >= 0 { + // `*` backtrack, but only if the `name` rune isn't the separator + nameRune, nameRuneLen := utf8.DecodeRuneInString(name[starNameBacktrack:]) + if nameRune != separator { + starNameBacktrack += nameRuneLen + patIdx = starPatternBacktrack + nameIdx = starNameBacktrack + startOfSegment = false + continue + } + } + + if doublestarPatternBacktrack >= 0 { + // `**` backtrack, advance `name` past next separator + nameIdx = doublestarNameBacktrack + for nameIdx < nameLen { + nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:]) + nameIdx += nameRuneLen + if nameRune == separator { + doublestarNameBacktrack = nameIdx + patIdx = doublestarPatternBacktrack + startOfSegment = true + continue MATCH + } + } + } + + if validate && patIdx < patLen && !ValidatePattern(pattern[patIdx:]) { + return false, ErrBadPattern + } + return false, nil + } + + if nameIdx < nameLen { + // we reached the end of `pattern` before the end of `name` + return false, nil + } + + // we've reached the end of `name`; we've successfully matched if we've also + // reached the end of `pattern`, or if the rest of `pattern` can match a + // zero-length string + return isZeroLengthPattern(pattern[patIdx:], separator) +} + +func isZeroLengthPattern(pattern string, separator rune) (ret bool, err error) { + // `/**` is a special case - a pattern such as `path/to/a/**` *should* match + // `path/to/a` because `a` might be a directory + if pattern == "" || pattern == "*" || pattern == "**" || pattern == string(separator) + "**" { + return true, nil + } + + if pattern[0] == '{' { + closingIdx := indexMatchedClosingAlt(pattern[1:], separator != '\\') + if closingIdx == -1 { + // no closing '}' + return false, ErrBadPattern + } + closingIdx += 1 + + patIdx := 1 + for ;; { + commaIdx := indexNextAlt(pattern[patIdx:closingIdx], separator != '\\') + if commaIdx == -1 { + break + } + commaIdx += patIdx + + ret, err = isZeroLengthPattern(pattern[patIdx:commaIdx] + pattern[closingIdx+1:], separator) + if ret || err != nil { + return + } + + patIdx = commaIdx + 1 + } + return isZeroLengthPattern(pattern[patIdx:closingIdx] + pattern[closingIdx+1:], separator) + } + + // no luck - validate the rest of the pattern + if !ValidatePattern(pattern) { + return false, ErrBadPattern + } + return false, nil +} + +// Finds the index of the first unescaped byte `c`, or negative 1. +func indexUnescapedByte(s string, c byte, allowEscaping bool) int { + l := len(s) + for i := 0; i < l; i++ { + if allowEscaping && s[i] == '\\' { + // skip next byte + i++ + } else if s[i] == c { + return i + } + } + return -1 +} + +// Assuming the byte before the beginning of `s` is an opening `{`, this +// function will find the index of the matching `}`. That is, it'll skip over +// any nested `{}` and account for escaping +func indexMatchedClosingAlt(s string, allowEscaping bool) int { + alts := 1 + l := len(s) + for i := 0; i < l; i++ { + if allowEscaping && s[i] == '\\' { + // skip next byte + i++ + } else if s[i] == '{' { + alts++ + } else if s[i] == '}' { + if alts--; alts == 0 { + return i + } + } + } + return -1 +} diff --git a/utils.go b/utils.go new file mode 100644 index 0000000..af7e647 --- /dev/null +++ b/utils.go @@ -0,0 +1,69 @@ +package doublestar + +// SplitPattern is a utility function. Given a pattern, SplitPattern will +// return two strings: the first string is everything up to the last slash +// (`/`) that appears _before_ any unescaped "meta" characters (ie, `*?[{`). +// The second string is everything after that slash. For example, given the +// pattern: +// +// ../../path/to/meta*/** +// ^----------- split here +// +// SplitPattern returns "../../path/to" and "meta*/**". This is useful for +// initializing os.DirFS() to call Glob() because Glob() will silently fail if +// your pattern includes `/./` or `/../`. For example: +// +// base, pattern := SplitPattern("../../path/to/meta*/**") +// fsys := os.DirFS(base) +// matches, err := Glob(fsys, pattern) +// +// If SplitPattern cannot find somewhere to split the pattern (for example, +// `meta*/**`), it will return "." and the unaltered pattern (`meta*/**` in +// this example). +// +// Of course, it is your responsibility to decide if the returned base path is +// "safe" in the context of your application. Perhaps you could use Match() to +// validate against a list of approved base directories? +// +func SplitPattern(p string) (base, pattern string) { + base = "." + pattern = p + + splitIdx := -1 + for i := 0; i < len(p); i++ { + c := p[i] + if c == '\\' { + i++ + } else if c == '/' { + splitIdx = i + } else if c == '*' || c == '?' || c == '[' || c == '{' { + break + } + } + + if splitIdx >= 0 { + return p[:splitIdx], p[splitIdx+1:] + } + + return +} + +// Finds the next comma, but ignores any commas that appear inside nested `{}`. +// Assumes that each opening bracket has a corresponding closing bracket. +func indexNextAlt(s string, allowEscaping bool) int { + alts := 1 + l := len(s) + for i := 0; i < l; i++ { + if allowEscaping && s[i] == '\\' { + // skip next byte + i++ + } else if s[i] == '{' { + alts++ + } else if s[i] == '}' { + alts-- + } else if s[i] == ',' && alts == 1 { + return i + } + } + return -1 +} diff --git a/validate.go b/validate.go new file mode 100644 index 0000000..49be158 --- /dev/null +++ b/validate.go @@ -0,0 +1,61 @@ +package doublestar + +// Validate a pattern. Patterns are validated while they run in Match(), +// PathMatch(), and Glob(), so, you normally wouldn't need to call this. +// However, there are cases where this might be useful: for example, if your +// program allows a user to enter a pattern that you'll run at a later time, +// you might want to validate it. +// +func ValidatePattern(s string) bool { + altDepth := 0 + l := len(s) +VALIDATE: + for i := 0; i < l; i++ { + switch s[i] { + case '\\': + // skip the next byte - return false if there is no next byte + if i++; i >= l { + return false + } + continue + + case '[': + if i++; i >= l { + // class didn't end + return false + } + if s[i] == '^' || s[i] == '!' { + i++ + } + if i >= l || s[i] == ']' { + // class didn't end or empty character class + return false + } + + for ; i < l; i++ { + if s[i] == ']' { + // looks good + continue VALIDATE + } + } + + // class didn't end + return false + + case '{': + altDepth++ + continue + + case '}': + if altDepth == 0 { + // alt end without a corresponding start + return false + } + altDepth-- + continue + } + } + + // valid as long as all alts are closed + return altDepth == 0 +}