From 37259d3e8c90d71c016c2528d57d4ec1899d8507 Mon Sep 17 00:00:00 2001 From: patrykgruszka Date: Thu, 29 Feb 2024 22:49:53 +0100 Subject: [PATCH] filter messages by date --- README.md | 6 +++++- imapfetch.py | 26 +++++++++++++++++++++++--- 2 files changed, 28 insertions(+), 4 deletions(-) mode change 100755 => 100644 imapfetch.py diff --git a/README.md b/README.md index 5ed5972..02bdf23 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ Configure your accounts using the provided [configuration sample](assets/setting Use `--help` to see a list of possible options: - imapfetch [-h] [--full] [--list] [--verbose] config [section ...] + imapfetch [-h] [--full] [--list] [--verbose] [--start-date START_DATE] [--end-date END_DATE] config [section ...] The configuration file is passed as the first and only required positional argument. Any further positional arguments are section names from the configuration file, which will be run exclusively; for example if you want to archive only a single account at a time. @@ -39,6 +39,10 @@ The configuration file is passed as the first and only required positional argum - `--verbose`: Show more verbose logging. Can be passed multiple times. ++ `--start-date START_DATE`: Start date for filtering messages (YYYY-MM-DD) + +* `--end-date END_DATE`: End date for filtering messages (YYYY-MM-DD) + ## CONFIGURATION The available configuration options are mostly explained in the provided sample. diff --git a/imapfetch.py b/imapfetch.py old mode 100755 new mode 100644 index 57fb978..ca8a1c2 --- a/imapfetch.py +++ b/imapfetch.py @@ -10,6 +10,8 @@ import contextlib, functools, urllib.parse import mailbox, email.policy import imapclient +import datetime + # register a signal handler for clean(er) exits def interrupt(sig, frame): @@ -58,8 +60,20 @@ def cd(self, folder): return self.client.select_folder(folder, readonly=True) # get new mail uids in current folder, starting with uid start - def mails(self, start=1): - return self.client.search("UID {}:*". format(start)) + def mails(self, start=1, start_date=None, end_date=None): + # build search criteria + criteria = ["UID {}:*".format(start)] + if start_date: + # IMAP date format is "01-Jan-2000" + start_date_str = start_date.strftime('%d-%b-%Y') + criteria.append('SINCE {}'.format(start_date_str)) + if end_date: + end_date_str = end_date.strftime('%d-%b-%Y') + criteria.append('BEFORE {}'.format(end_date_str)) + + # join criteria with space + search_criteria = ' '.join(criteria) + return self.client.search(search_criteria) # chunk sizes for partial fetches, first flight and remaining chunks # a sufficiently large firstflight chunk can fetch messages in one go @@ -288,8 +302,14 @@ def commandline(): parser.add_argument("--full", "-f", help="do full backups", action="store_true") parser.add_argument("--list", "-l", help="only list folders", action="store_true") parser.add_argument("--verbose", "-v", help="increase logging verbosity", action="count", default=1) + parser.add_argument("--start-date", dest="start_date", help="start date for filtering messages (YYYY-MM-DD)") + parser.add_argument("--end-date", dest="end_date", help="end date for filtering messages (YYYY-MM-DD)") args = parser.parse_args() + # calculate start_date and end_date if provided + start_date = datetime.datetime.strptime(args.start_date, "%Y-%m-%d").date() if args.start_date else None + end_date = datetime.datetime.strptime(args.end_date, "%Y-%m-%d").date() if args.end_date else None + # configure logging format and verbosity level = [ERROR, WARNING, INFO, VERBOSE, DEBUG] level = level[min(len(level) - 1, args.verbose)] @@ -357,7 +377,7 @@ def checkskip(rules, folder): log(INFO, "starting at uid 1") # iterate over all uids >= highest - for uid in mailserver.mails(1 if args.full else highest): + for uid in mailserver.mails(1 if args.full else highest, start_date=start_date, end_date=end_date): header, size, generator = mailserver.message(uid) # check if the email is stored already