diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 21b230c0..c8917737 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -9,7 +9,7 @@ jobs: fail-fast: true matrix: os: ["ubuntu-latest"] - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.9", "3.10", "3.11"] env: PYTHON_VERSION: ${{ matrix.python-version }} diff --git a/bin/remove_it.py b/bin/remove_it.py index 229a50b6..43a3d2c0 100644 --- a/bin/remove_it.py +++ b/bin/remove_it.py @@ -1,12 +1,9 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # -# Copyright (c) 2015, 2016, 2019 Martin Raspaud -# -# Author(s): -# -# Martin Raspaud +# Copyright (c) 2015 - 2023 Pytroll Developers # + # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or @@ -23,17 +20,16 @@ """Remove files, and send messages about it.""" from configparser import RawConfigParser, NoOptionError -from datetime import datetime, timedelta -from glob import glob -import os import time import argparse import logging import logging.handlers import getpass import socket +from trollmoves.filescleaner import (get_config_items, + clean_section) -LOGGER = logging.getLogger("remove_it") +LOGGER = logging.getLogger(__name__) try: from posttroll.publisher import Publish @@ -41,23 +37,30 @@ except ImportError: class Publish(object): + """Dummy publish class to handle the case when Posttroll is not being used or not available.""" def __enter__(self): + """Enter the dummy publisher.""" return self def __exit__(self, etype, value, traceback): + """Exit the dummy publisher.""" pass def send(self, msg): + """Fake send message - however here nothing is being sent.""" pass def Message(*args, **kwargs): + """Handle messaging in case posttroll is not avalable.""" del args, kwargs class BufferingSMTPHandler(logging.handlers.BufferingHandler): + """Handle buffering of logging info for the SMTP log-handler.""" def __init__(self, mailhost, fromaddr, toaddrs, subject, capacity): + """Set up buffer log-handling.""" logging.handlers.BufferingHandler.__init__(self, capacity) self.mailhost = mailhost self.mailport = None @@ -68,6 +71,7 @@ def __init__(self, mailhost, fromaddr, toaddrs, subject, capacity): logging.Formatter("[%(asctime)s %(levelname)-5s] %(message)s")) def flush(self): + """Flush the buffer.""" if len(self.buffer) > 0: try: import smtplib @@ -88,6 +92,7 @@ def flush(self): def parse_args(): + """Parse command line arguments.""" parser = argparse.ArgumentParser() parser.add_argument("configuration_file", help="the configuration file to use") @@ -114,29 +119,29 @@ def parse_args(): def setup_logger(args): - global LOGGER - LOGGER = logging.getLogger("remove_it") - - if args.verbose: - LOGGER.setLevel(logging.DEBUG) - elif args.quiet: - LOGGER.setLevel(logging.ERROR) - else: - LOGGER.setLevel(logging.INFO) + """Set up logging.""" + msgformat = '[%(asctime)-15s %(levelname)-8s] %(message)s' if args.logfile: handler = logging.handlers.RotatingFileHandler( args.logfile, maxBytes=1000000, backupCount=10) else: handler = logging.StreamHandler() + handler.setLevel(logging.DEBUG) handler.setFormatter( logging.Formatter('[%(asctime)-15s %(levelname)-8s] %(message)s')) - LOGGER.addHandler(handler) + if args.verbose: + logging.basicConfig(level=logging.DEBUG, handlers=[handler], format=msgformat) + elif args.quiet: + logging.basicConfig(level=logging.DEBUG, handlers=[handler], format=msgformat) + else: + logging.basicConfig(level=logging.DEBUG, handlers=[handler], format=msgformat) def setup_mailing(args, conf, info): + """Set up log-handler to deal with sending messages/logs as mails.""" if args.mail: try: mailhandler = BufferingSMTPHandler( @@ -152,97 +157,8 @@ def setup_mailing(args, conf, info): LOGGER.addHandler(mailhandler) -def get_config_items(args, conf): - config_items = [] - - if args.config_item: - for config_item in args.config_item: - if config_item not in conf.sections(): - LOGGER.error("No section named %s in %s", - config_item, args.configuration_file) - else: - config_items.append(config_item) - else: - config_items = conf.sections() - - return config_items - - -def remove_file(filename, pub): - try: - if os.path.isdir(filename): - if not os.listdir(filename): - os.rmdir(filename) - else: - LOGGER.info("%s not empty.", filename) - else: - os.remove(filename) - msg = Message("deletion", "del", {"uri": filename}) - pub.send(str(msg)) - LOGGER.debug("Removed %s", filename) - except (IOError, OSError) as err: - LOGGER.warning("Can't remove %s: %s", filename, - str(err)) - return False - return True - - -def clean_dir(pub, ref_time, pathname, is_dry_run): - section_files = 0 - section_size = 0 - LOGGER.info("Cleaning %s", pathname) - flist = glob(pathname) - for filename in flist: - if not os.path.exists(filename): - continue - try: - stat = os.lstat(filename) - except OSError: - LOGGER.warning("Couldn't lstat path=%s", str(filename)) - continue - - if datetime.fromtimestamp(stat.st_ctime) < ref_time: - was_removed = False - if not is_dry_run: - was_removed = remove_file(filename, pub) - else: - LOGGER.debug("Would remove %s", filename) - if was_removed: - section_files += 1 - section_size += stat.st_size - - return (section_size, section_files) - - -def clean_section(pub, section, conf, is_dry_run=True): - section_files = 0 - section_size = 0 - info = dict(conf.items(section)) - base_dir = info.get("base_dir", "") - if not os.path.exists(base_dir): - LOGGER.warning("Path %s missing, skipping section %s", - base_dir, section) - return (section_size, section_files) - LOGGER.info("Cleaning in %s", base_dir) - templates = (item.strip() for item in info["templates"].split(",")) - kws = {} - for key in ["days", "hours", "minutes", "seconds"]: - try: - kws[key] = int(info[key]) - except KeyError: - pass - ref_time = datetime.utcnow() - timedelta(**kws) - - for template in templates: - pathname = os.path.join(base_dir, template) - size, num_files = clean_dir(pub, ref_time, pathname, is_dry_run) - section_files += num_files - section_size += size - - return (section_size, section_files) - - def run(args, conf): + """Perform files cleaning and publish accordingly - called from main().""" config_items = get_config_items(args, conf) LOGGER.debug("Setting up posttroll connection...") with Publish("remover") as pub: @@ -261,6 +177,7 @@ def run(args, conf): def main(): + """Take command line arguments and do the files cleaning.""" conf = RawConfigParser() args = parse_args() diff --git a/trollmoves/filescleaner.py b/trollmoves/filescleaner.py new file mode 100644 index 00000000..cf66ba75 --- /dev/null +++ b/trollmoves/filescleaner.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright (c) 2023 Pytroll Developers + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +"""Utility functions for cleaning files and directories.""" + +from datetime import datetime, timedelta +from glob import glob +import os +import logging +from posttroll.message import Message + + +LOGGER = logging.getLogger(__name__) + + +def remove_file(filename, pub): + """Remove a file given its filename, and publish when removed. + + Removing an empty directory is not published. + """ + try: + if os.path.isdir(filename): + if not os.listdir(filename): + os.rmdir(filename) + else: + LOGGER.info("%s not empty.", filename) + else: + os.remove(filename) + msg = Message("deletion", "del", {"uri": filename}) + pub.send(str(msg)) + LOGGER.debug("Removed %s", filename) + except OSError as err: + LOGGER.warning("Can't remove %s: %s", filename, + str(err)) + return False + return True + + +def clean_dir(pub, ref_time, pathname_template, is_dry_run, **kwargs): + """Clean directory of files given a path name and a time threshold. + + Only files older than a given time threshold are removed/cleaned. + """ + filetime_checker_type = kwargs.get('filetime_checker_type') + stat_time_checker = {'ctime': 'st_ctime', + 'mtime': 'st_mtime'}.get(filetime_checker_type) + recursive = kwargs.get("recursive") + + LOGGER.info("Cleaning under %s", pathname_template) + + if not recursive: + filepaths = glob(pathname_template) + return clean_files_and_dirs(pub, filepaths, ref_time, stat_time_checker, is_dry_run) + + section_files = 0 + section_size = 0 + + for pathname in glob(pathname_template): + for dirpath, _dirnames, filenames in os.walk(pathname): + files_in_dir = glob(os.path.join(dirpath, '*')) + if len(files_in_dir) == 0: + if is_dry_run: + LOGGER.info("Would remove empty directory: %s", dirpath) + else: + try: + os.rmdir(dirpath) + except OSError: + LOGGER.debug("Was trying to remove empty directory, but failed. Should not have come here!") + + filepaths = [os.path.join(dirpath, fname) for fname in filenames] + + s_size, s_files = clean_files_and_dirs(pub, filepaths, ref_time, stat_time_checker, is_dry_run) + section_files += s_files + section_size = section_size + s_size + + return (section_size, section_files) + + +def clean_files_and_dirs(pub, filepaths, ref_time, stat_time_checker, is_dry_run): + """From a list of file paths and a reference time clean files and directories.""" + section_files = 0 + section_size = 0 + for filepath in filepaths: + if not os.path.exists(filepath): + continue + try: + stat = os.lstat(filepath) + except OSError: + LOGGER.warning("Couldn't lstat path=%s", str(filepath)) + continue + + if datetime.fromtimestamp(getattr(stat, stat_time_checker)) < ref_time: + was_removed = False + if not is_dry_run: + was_removed = remove_file(filepath, pub) + else: + # print("Would remove %s" % filepath) + LOGGER.info("Would remove %s" % filepath) + + if was_removed: + section_files += 1 + section_size += stat.st_size + + return (section_size, section_files) + + +def clean_section(pub, section, conf, is_dry_run=True): + """Do the files cleaning given a list of directory paths and time thresholds. + + This calls the clean_dir function in this module. + """ + section_files = 0 + section_size = 0 + info = dict(conf.items(section)) + recursive = info.get('recursive') + if recursive and recursive == 'true': + recursive = True + else: + recursive = False + + base_dir = info.get("base_dir", "") + if not os.path.exists(base_dir): + LOGGER.warning("Path %s missing, skipping section %s", base_dir, section) + return (section_size, section_files) + LOGGER.info("Cleaning in %s", base_dir) + + templates = (item.strip() for item in info["templates"].split(",")) + kws = {} + for key in ["days", "hours", "minutes", "seconds"]: + try: + kws[key] = int(info[key]) + except KeyError: + pass + + ref_time = datetime.utcnow() - timedelta(**kws) + for template in templates: + pathname = os.path.join(base_dir, template) + size, num_files = clean_dir(pub, ref_time, pathname, is_dry_run, + filetime_checker_type=info.get('filetime_checker_type', 'ctime'), + recursive=recursive) + section_files += num_files + section_size += size + + return (section_size, section_files) + + +def get_config_items(args, conf): + """Get items from ini configuration.""" + config_items = [] + + if args.config_item: + for config_item in args.config_item: + if config_item not in conf.sections(): + LOGGER.error("No section named %s in %s", + config_item, args.configuration_file) + else: + config_items.append(config_item) + else: + config_items = conf.sections() + + return config_items diff --git a/trollmoves/tests/test_filescleaner.py b/trollmoves/tests/test_filescleaner.py new file mode 100644 index 00000000..e19e1e6b --- /dev/null +++ b/trollmoves/tests/test_filescleaner.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright (c) 2023 Pytroll Developers + + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +"""Testing the functions for cleaning files in and below a directory structure.""" + +import pytest +from datetime import datetime, timedelta +import os +import logging + +from trollmoves.filescleaner import clean_dir + +DUMMY_CONTENT = "some dummy content" + +OLD_FILES_TIME = datetime(2023, 5, 25, 12, 0) + + +class FakePublisher(): + """Fake Publisher class to be used for testing only.""" + + def __enter__(self): + """Enter method.""" + return self + + def __exit__(self, etype, value, traceback): + """Exit.""" + pass + + def send(self, msg): + """Fake a send method.""" + pass + + +@pytest.fixture(params=[OLD_FILES_TIME]) +def fake_tree_of_some_files(request, tmp_path_factory) -> list[str]: + """Create a directory tree of dummy (empty) files.""" + filepaths = [] + fn = tmp_path_factory.mktemp("data") / "dummy1.txt" + fn.write_text(DUMMY_CONTENT) + filepaths.append(fn) + + fn = tmp_path_factory.mktemp("data") / "dummy2.txt" + fn.write_text(DUMMY_CONTENT) + filepaths.append(fn) + + fn = tmp_path_factory.mktemp("data") / "another_subdir" + fn.mkdir() + fn = fn / "dummy3.txt" + fn.write_text(DUMMY_CONTENT) + + # Alter the times of the last file and it's sub directory + dtobj = request.param + atime, mtime = (dtobj.timestamp(), dtobj.timestamp()) + os.utime(fn, times=(atime, mtime)) + os.utime(fn.parent, times=(atime, mtime)) + filepaths.append(fn) + + yield filepaths + + +def test_clean_dir_non_recursive(fake_tree_of_some_files, tmp_path, caplog): + """Test cleaning a directory for files of a certain age.""" + list_of_files_to_clean = fake_tree_of_some_files + ref_time = OLD_FILES_TIME + timedelta(hours=1) + kws = {'filetime_checker_type': 'ctime'} + pathname = str(tmp_path.parent / '*') + + with FakePublisher() as pub, caplog.at_level(logging.INFO): + _ = clean_dir(pub, ref_time, pathname, False, **kws) + + assert f"Cleaning under {pathname}" in caplog.text + + assert list_of_files_to_clean[0].exists() + assert list_of_files_to_clean[1].exists() + assert list_of_files_to_clean[2].exists() + + +def test_clean_dir_recursive_mtime_real(fake_tree_of_some_files, tmp_path, caplog): + """Test cleaning a directory tree for files of a certain age. + + Here we test using the modification time to determine when the file has been 'created'. + """ + list_of_files_to_clean = fake_tree_of_some_files + ref_time = OLD_FILES_TIME + timedelta(hours=1) + kws = {'filetime_checker_type': 'mtime', + 'recursive': True} + pathname = str(tmp_path.parent) + + with FakePublisher() as pub, caplog.at_level(logging.DEBUG): + res = clean_dir(pub, ref_time, pathname, False, **kws) + + section_size, section_files = res + + assert section_size == 36 + assert section_files == 2 + + assert list_of_files_to_clean[0].exists() + assert list_of_files_to_clean[1].exists() + + removed_file = list_of_files_to_clean[2] + assert f"Removed {removed_file}" in caplog.text + assert not removed_file.exists() + + +def test_clean_dir_recursive_mtime_dryrun(fake_tree_of_some_files, tmp_path, caplog): + """Test cleaning a directory tree for files of a certain age. + + Here we test using the modification time to determine when the file has been 'created'. + """ + list_of_files_to_clean = fake_tree_of_some_files + ref_time = OLD_FILES_TIME + timedelta(hours=1) + kws = {'filetime_checker_type': 'mtime', + 'recursive': True} + pathname = str(tmp_path.parent) + + with FakePublisher() as pub, caplog.at_level(logging.INFO): + res = clean_dir(pub, ref_time, pathname, True, **kws) + + section_size, section_files = res + + assert section_size == 0 + assert section_files == 0 + assert list_of_files_to_clean[0].exists() + assert list_of_files_to_clean[1].exists() + + removed_file = list_of_files_to_clean[2] + assert f"Would remove {removed_file}" in caplog.text + assert removed_file.exists()