Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support more arguments in FindFilesBase #1188

Merged
merged 14 commits into from
Nov 16, 2024
93 changes: 87 additions & 6 deletions pyinfra/facts/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@

from typing_extensions import Literal, NotRequired, TypedDict

from pyinfra.api import StringCommand
from pyinfra.api.command import QuoteString, make_formatted_string_command
from pyinfra.api.facts import FactBase
from pyinfra.api.util import try_int
from pyinfra.facts.util.units import parse_size

LINUX_STAT_COMMAND = "stat -c 'user=%U group=%G mode=%A atime=%X mtime=%Y ctime=%Z size=%s %N'"
BSD_STAT_COMMAND = "stat -f 'user=%Su group=%Sg mode=%Sp atime=%a mtime=%m ctime=%c size=%z %N%SY'"
Expand Down Expand Up @@ -322,12 +324,91 @@ class FindFilesBase(FactBase):
def process(self, output):
return output

def command(self, path, quote_path=True):
return make_formatted_string_command(
"find {0} -type {type_flag} || true",
QuoteString(path) if quote_path else path,
type_flag=self.type_flag,
)
def command(
self,
path: str,
size: Optional[str | int] = None,
min_size: Optional[str | int] = None,
max_size: Optional[str | int] = None,
maxdepth: Optional[int] = None,
fname: Optional[str] = None,
iname: Optional[str] = None,
regex: Optional[str] = None,
args: Optional[List[str]] = None,
quote_path=True,
):
"""
@param path: the path to start the search from
@param size: exact size in bytes or human-readable format.
GB means 1e9 bytes, GiB means 2^30 bytes
@param min_size: minimum size in bytes or human-readable format
@param max_size: maximum size in bytes or human-readable format
@param maxdepth: maximum depth to descend to
@param name: True if the last component of the pathname being examined matches pattern.
Special shell pattern matching characters (“[”, “]”, “*”, and “?”)
may be used as part of pattern.
These characters may be matched explicitly
by escaping them with a backslash (“\\”).

@param iname: Like -name, but the match is case insensitive.
@param regex: True if the whole path of the file matches pattern using regular expression.
@param args: additional arguments to pass to find
@param quote_path: if the path should be quoted
@return:
"""
if args is None:
args = []

def maybe_quote(value):
return QuoteString(value) if quote_path else value

command = [
"find",
maybe_quote(path),
"-type",
self.type_flag,
]

"""
Why we need special handling for size:
https://unix.stackexchange.com/questions/275925/why-does-find-size-1g-not-find-any-files
In short, 'c' means bytes, without it, it means 512-byte blocks.
If we use any units other than 'c', it has a weird rounding behavior,
and is implementation-specific. So, we always use 'c'
"""
if "-size" not in args:
if min_size is not None:
command.append("-size")
command.append("+{0}c".format(parse_size(min_size)))

if max_size is not None:
command.append("-size")
command.append("-{0}c".format(parse_size(max_size)))

if size is not None:
command.append("-size")
command.append("{0}c".format(size))

if maxdepth is not None and "-maxdepth" not in args:
command.append("-maxdepth")
command.append("{0}".format(maxdepth))

if fname is not None and "-fname" not in args:
command.append("-name")
command.append(maybe_quote(fname))

if iname is not None and "-iname" not in args:
command.append("-iname")
command.append(maybe_quote(iname))

if regex is not None and "-regex" not in args:
command.append("-regex")
command.append(maybe_quote(regex))

command.append("||")
command.append("true")

return StringCommand(*command)


class FindFiles(FindFilesBase):
Expand Down
30 changes: 30 additions & 0 deletions pyinfra/facts/util/units.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# from https://stackoverflow.com/a/60708339, but with a few modifications
from __future__ import annotations # for | in type hints

import re

units = {
"B": 1,
"KB": 10**3,
"MB": 10**6,
"GB": 10**9,
"TB": 10**12,
"KIB": 2**10,
"MIB": 2**20,
"GIB": 2**30,
"TIB": 2**40,
}


def parse_human_readable_size(size: str) -> int:
size = size.upper()
if not re.match(r" ", size):
size = re.sub(r"([KMGT]?I?[B])", r" \1", size)
number, unit = [string.strip() for string in size.split()]
return int(float(number) * units[unit])


def parse_size(size: str | int) -> int:
if isinstance(size, int):
return size
return parse_human_readable_size(size)
2 changes: 1 addition & 1 deletion scripts/dev-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@
set -euo pipefail

echo "Execute pytest..."
pytest
pytest $@

echo "Tests complete!"
4 changes: 3 additions & 1 deletion tests/facts/files.FindDirectories/directories.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,7 @@
"output": [
"anotherdir"
],
"fact": ["anotherdir"]
"fact": [
"anotherdir"
]
}
4 changes: 3 additions & 1 deletion tests/facts/files.FindFiles/files.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,7 @@
"output": [
"myfile"
],
"fact": ["myfile"]
"fact": [
"myfile"
]
}
13 changes: 13 additions & 0 deletions tests/facts/files.FindFiles/files_with_name.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"arg": {
"path": "mydir",
"fname": "myfile"
},
"command": "find mydir -type f -name myfile || true",
"output": [
"myfile"
],
"fact": [
"myfile"
]
}
13 changes: 13 additions & 0 deletions tests/facts/files.FindFiles/files_with_name_wildcard.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"arg": {
"path": "mydir",
"fname": "myfile*"
},
"command": "find mydir -type f -name 'myfile*' || true",
"output": [
"myfile"
],
"fact": [
"myfile"
]
}
4 changes: 3 additions & 1 deletion tests/facts/files.FindLinks/links.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,7 @@
"output": [
"mylink"
],
"fact": ["mylink"]
"fact": [
"mylink"
]
}
2 changes: 1 addition & 1 deletion tests/operations/files.sync/sync_delete_posix.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
"path=/home/somedir/underthat": null
},
"files.FindFiles": {
"path=/home/somedir, quote_path=True": [
"args=None, fname=None, iname=None, max_size=None, maxdepth=None, min_size=None, path=/home/somedir, quote_path=True, regex=None, size=None": [
"/home/somedir/deleteme.txt",
"/home/somedir/nodelete.pyc"
],
Expand Down
2 changes: 1 addition & 1 deletion tests/operations/files.sync/sync_delete_windows.json
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
"path=/home/somedir/underthat/evendeeper/a-very-deep-file.txt": null
},
"files.FindFiles": {
"path=/home/somedir, quote_path=True": [
"args=None, fname=None, iname=None, max_size=None, maxdepth=None, min_size=None, path=/home/somedir, quote_path=True, regex=None, size=None": [
"/home/somedir/deleteme.txt",
"/home/somedir/nodelete.pyc"
],
Expand Down
2 changes: 1 addition & 1 deletion tests/operations/sysvinit.service/disabled.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"nginx": true
},
"files.FindLinks": {
"path=/etc/rc*.d/S*nginx, quote_path=False": [
"args=None, fname=None, iname=None, max_size=None, maxdepth=None, min_size=None, path=/etc/rc*.d/S*nginx, quote_path=False, regex=None, size=None": [
"somelink"
]
}
Expand Down
2 changes: 1 addition & 1 deletion tests/operations/sysvinit.service/enabled_chkconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"nginx": true
},
"files.FindLinks": {
"path=/etc/rc*.d/S*nginx, quote_path=False": []
"args=None, fname=None, iname=None, max_size=None, maxdepth=None, min_size=None, path=/etc/rc*.d/S*nginx, quote_path=False, regex=None, size=None": []
},
"server.LinuxDistribution": {
"name": "CentOS"
Expand Down
2 changes: 1 addition & 1 deletion tests/operations/sysvinit.service/enabled_rc-update.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"nginx": true
},
"files.FindLinks": {
"path=/etc/rc*.d/S*nginx, quote_path=False": []
"args=None, fname=None, iname=None, max_size=None, maxdepth=None, min_size=None, path=/etc/rc*.d/S*nginx, quote_path=False, regex=None, size=None": []
},
"server.LinuxDistribution": {
"name": "Gentoo"
Expand Down
2 changes: 1 addition & 1 deletion tests/operations/sysvinit.service/enabled_update-rc.d.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"nginx": true
},
"files.FindLinks": {
"path=/etc/rc*.d/S*nginx, quote_path=False": []
"args=None, fname=None, iname=None, max_size=None, maxdepth=None, min_size=None, path=/etc/rc*.d/S*nginx, quote_path=False, regex=None, size=None": []
},
"server.LinuxDistribution": {
"name": "Ubuntu"
Expand Down
3 changes: 3 additions & 0 deletions tests/test_facts.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@

def _make_command(command_attribute, args):
if callable(command_attribute):
if isinstance(args, dict):
return command_attribute(**args)

if not isinstance(args, list):
args = [args]

Expand Down
30 changes: 30 additions & 0 deletions tests/test_units.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from pyinfra.facts.util.units import parse_human_readable_size


def test_parse_human_readable_size():
example_strings = [
"1024b",
"10.43 KB",
"11 GB",
"343.1 MB",
"10.43KB",
"11GB",
"343.1MB",
"10.43 kb",
"11 gb",
"343.1 mb",
"10.43kb",
"11gb",
"343.1mb",
"1024Kib",
"10.43 KiB",
"11 GiB",
"343.1 MiB",
"10.43KiB",
"11GiB",
"343.1MiB",
"10.43 kib",
"11 gib",
]
for example_string in example_strings:
print(example_string, parse_human_readable_size(example_string))
Loading