Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add filewalks #32

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@
nimcache/
nimblecache/
htmldocs/

/build
126 changes: 126 additions & 0 deletions src/fusion/filewalks.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#[
## Design rationale
* an intermediate `WalkOpt` is used to allow easier proc forwarding
* a yieldFilter, regex match etc isn't needed because caller can filter at
call site, without loss of generality, unlike `follow`; this simplifies the API.

## Future work:
* provide a way to do error reporting, which is tricky because iteration cannot be resumed
]#

import std/[os, algorithm, deques, macros]

type
PathEntrySub* = object
kind*: PathComponent
path*: string
PathEntry* = object
kind*: PathComponent
path*: string
## absolute or relative path with respect to walked dir
depth*: int
## depth with respect to WalkOpt.dir (which is at depth 0)
epilogue*: bool
WalkMode* = enum
dfs ## depth first search
bfs ## breadth first search
FollowCallback* = proc(entry: PathEntry): bool
timotheecour marked this conversation as resolved.
Show resolved Hide resolved
SortCmpCallback* = proc(x, y: PathEntrySub): int
WalkOpt* = object
dir*: string ## root of walk
relative: bool ## when true, paths are returned relative to `dir`. Otherwise they start with `dir`
checkDir: bool ## if true, raises `OSError` when `dir` can't be listed. Deeper
## directories do not cause `OSError`, and currently no error reporting is done for those.
walkMode: WalkMode ## controls how paths are returned
includeRoot: bool ## whether to include root `dir`
includeEpilogue: bool
## when false, yields: someDir, <children of someDir>
## when true, yields: someDir, <children of someDir>, someDir: each dir is
## yielded a 2nd time. This is useful in applications that aggregate data over dirs.
followSymlinks: bool ## whether to follow symlinks
follow: FollowCallback
## if not `nil`, `walkPath` visits `entry` if `follow(entry) == true`.
sortCmp: SortCmpCallback
## if not `nil`, immediate children of a dir are sorted using `sortCmp`

macro ctor(obj: untyped, a: varargs[untyped]): untyped =
##[
Generates an object constructor call from a list of fields.
]##
# xxx expose in some `fusion/macros`
runnableExamples:
type Foo = object
a, b: int
doAssert Foo.ctor(a,b) == Foo(a: a, b: b)
result = nnkObjConstr.newTree(obj)
for ai in a: result.add nnkExprColonExpr.newTree(ai, ai)

proc initWalkOpt*(
dir: string, relative = false, checkDir = true, walkMode = dfs,
includeRoot = false, includeEpilogue = false, followSymlinks = false,
follow: FollowCallback = nil, sortCmp: SortCmpCallback = nil): WalkOpt =
WalkOpt.ctor(dir, relative, checkDir, walkMode, includeRoot, includeEpilogue, followSymlinks, follow, sortCmp)

iterator walkPathsOpt*(opt: WalkOpt): PathEntry =
##[
Recursively walks `dir`.
This is more flexible than `os.walkDirRec`.
]##
runnableExamples:
import os,sugar
if false: # see also `tfilewalks.nim`
# list hidden files of depth <= 2 + 1 in your home.
for e in walkPaths(getHomeDir(), follow = a=>a.path.isHidden and a.depth <= 2):
if e.kind in {pcFile, pcLinkToFile}: echo e.path

var entry = PathEntry(depth: 0, path: ".")
when nimvm: entry.kind = pcDir
else: # xxx support `symlinkExists` in nimvm
entry.kind = if symlinkExists(opt.dir): pcLinkToDir else: pcDir
var stack = initDeque[PathEntry]()

var checkDir = opt.checkDir
if dirExists(opt.dir):
stack.addLast entry
elif checkDir:
raise newException(OSError, "invalid root dir: " & opt.dir)
timotheecour marked this conversation as resolved.
Show resolved Hide resolved

var dirsLevel: seq[PathEntrySub]
while stack.len > 0:
let current = if opt.walkMode == dfs: stack.popLast() else: stack.popFirst()
entry.epilogue = current.epilogue
entry.depth = current.depth
entry.kind = current.kind
entry.path = if opt.relative: current.path else: opt.dir / current.path
normalizePath(entry.path) # pending https://github.com/timotheecour/Nim/issues/343

if opt.includeRoot or current.depth > 0:
yield entry # single `yield` to avoid code bloat

let isSort = opt.sortCmp != nil
if (current.kind == pcDir or current.kind == pcLinkToDir and opt.followSymlinks) and not current.epilogue:
if opt.follow == nil or opt.follow(current):
if isSort:
dirsLevel.setLen 0
if opt.includeEpilogue:
stack.addLast PathEntry(depth: current.depth, path: current.path, kind: current.kind, epilogue: true)
# `checkDir` is still needed here in first iteration because things could
# fail for reasons other than `not dirExists`.
for k, p in walkDir(opt.dir / current.path, relative = true, checkDir = checkDir):
if isSort:
dirsLevel.add PathEntrySub(kind: k, path: p)
else:
stack.addLast PathEntry(depth: current.depth + 1, path: current.path / p, kind: k)
checkDir = false
# We only check top-level dir, otherwise if a subdir is invalid (eg. wrong
# permissions), it'll abort iteration and there would be no way to resume iteration.
if isSort:
sort(dirsLevel, opt.sortCmp)
for i in 0..<dirsLevel.len:
let j = if opt.walkMode == dfs: dirsLevel.len-1-i else: i
let ai = dirsLevel[j]
stack.addLast PathEntry(depth: current.depth + 1, path: current.path / ai.path, kind: ai.kind)

template walkPaths*(args: varargs[untyped]): untyped =
## Convenience wrapper around `walkPathsOpt`.
walkPathsOpt(initWalkOpt(args))
1 change: 1 addition & 0 deletions tests/config.nims
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
switch("path", "$projectDir/../src")
switch("path", "$projectDir/lib")
switch("styleCheck", "hint")
15 changes: 15 additions & 0 deletions tests/lib/tfusion/osutils.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import std/[os,strutils]

proc genTestPaths*(dir: string, paths: seq[string]) =
## generates a filesystem rooted under `dir` from given relative `paths`.
## `paths` ending in `/` are treated as directories.
# xxx use this in tos.nim
for a in paths:
doAssert not a.isAbsolute
doAssert a.len > 0
let a2 = dir / a
if a.endsWith("/"):
createDir(a2)
else:
createDir(a2.parentDir)
writeFile(a2, "")
8 changes: 8 additions & 0 deletions tests/lib/tfusion/paths.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import std/os

const
fusionRoot* = currentSourcePath.parentDir.parentDir.parentDir.parentDir
buildDir* = fusionRoot / "build"
nimbleFile* = fusionRoot / "fusion.nimble"

static: doAssert nimbleFile.fileExists # sanity check
73 changes: 73 additions & 0 deletions tests/tfilewalks.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import std/[os,sequtils,sugar]
from tfusion/paths import buildDir

const dir = buildDir/"tfilewalks"

when defined(fusionTfilewalksTesting):
import std/[sugar,os,sequtils,algorithm]
from std/private/globs import nativeToUnixPath
import fusion/filewalks

proc processAux[T](a: T): seq[string] =
a.mapIt(it.path.nativeToUnixPath)

proc process[T](a: T): seq[string] =
a.processAux.sorted

proc test() =
block: # follow
# filter by pcFile
doAssert toSeq(walkPaths(dir, follow = a=>a.path.lastPathPart != "d1b", relative = true))
.filterIt(it.kind == pcFile).process == @["d1/d1a/f2.txt", "d1/d1a/f3", "d1/f1.txt", "f5"]
# filter by pcDir
doAssert toSeq(walkPaths(dir, relative = true))
.filterIt(it.kind == pcDir).process == @["d1", "d1/d1a", "d1/d1a/d1a1", "d1/d1b", "d1/d1b/d1b1", "d2"]

block: # includeRoot
doAssert toSeq(walkPaths(dir, relative = true, includeRoot = true))
.filterIt(it.kind == pcDir).process == @[".", "d1", "d1/d1a", "d1/d1a/d1a1", "d1/d1b", "d1/d1b/d1b1", "d2"]

block: # checkDir
doAssertRaises(OSError): discard toSeq(walkPaths("nonexistant"))
doAssertRaises(OSError): discard toSeq(walkPaths("f5"))
doAssert toSeq(walkPaths("nonexistant", checkDir = false)) == @[]

# sortCmp
proc mySort(a, b: PathEntrySub): int = cmp(a.path, b.path)
doAssert toSeq(walkPaths(dir, relative = true, sortCmp = mySort)).processAux ==
@["d1", "d1/d1a", "d1/d1a/d1a1", "d1/d1a/f2.txt", "d1/d1a/f3", "d1/d1b", "d1/d1b/d1b1", "d1/d1b/d1b1/f4", "d1/f1.txt", "d2", "f5"]

# bfs
doAssert toSeq(walkPaths(dir, relative = true, sortCmp = mySort, walkMode = bfs)).processAux ==
@["d1", "d2", "f5", "d1/d1a", "d1/d1b", "d1/f1.txt", "d1/d1a/d1a1", "d1/d1a/f2.txt", "d1/d1a/f3", "d1/d1b/d1b1", "d1/d1b/d1b1/f4"]

# includeEpilogue
doAssert toSeq(walkPaths(dir, relative = true, sortCmp = mySort, includeEpilogue = true, includeRoot = true)).processAux ==
@[".", "d1", "d1/d1a", "d1/d1a/d1a1", "d1/d1a/d1a1", "d1/d1a/f2.txt", "d1/d1a/f3", "d1/d1a", "d1/d1b", "d1/d1b/d1b1", "d1/d1b/d1b1/f4", "d1/d1b/d1b1", "d1/d1b", "d1/f1.txt", "d1", "d2", "d2", "f5", "."]

when (NimMajor, NimMinor, NimPatch) >= (1, 5, 1):
static: test()
test()
else:
from tfusion/osutils import genTestPaths
import std/[strformat,strutils]
proc main() =
defer: removeDir(dir)
let paths = """
d1/f1.txt
d1/d1a/f2.txt
d1/d1a/f3
d1/d1a/d1a1/
d1/d1b/d1b1/f4
d2/
f5
""".splitLines.filter(a=>a.len>0)
genTestPaths(dir, paths)
const nim = getCurrentCompilerExe()
const input = currentSourcePath()
let cmd = &"{nim} r -d:fusionTfilewalksTesting {input}"
when (NimMajor, NimMinor, NimPatch) >= (1, 4, 0):
# for `nativeToUnixPath`
let status = execShellCmd(cmd)
doAssert status == 0
main()