Skip to content

Commit

Permalink
Add tags to jobs and a command line arg to select them
Browse files Browse the repository at this point in the history
This allows the user to selectively run jobs based on a tag for use
cases where you might want to run urlwatch on different schedules for
different jobs and other use cases.

Signed-off-by: James Hewitt <[email protected]>
  • Loading branch information
Jamstah committed Feb 16, 2024
1 parent 0bc4abd commit 8b6b90e
Show file tree
Hide file tree
Showing 9 changed files with 123 additions and 9 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ The format mostly follows [Keep a Changelog](http://keepachangelog.com/en/1.0.0/
### Added

- New `enabled` option for all jobs. Set to false to disable a job without needing to remove it or comment it out (Requested in #625 by snowman, contributed in #785 by jamstah)
- Added tags to jobs and the ability to select them at the command line (#789 by jamstah)

### Changed

Expand Down
1 change: 1 addition & 0 deletions docs/source/jobs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ Optional keys for all job types
-------------------------------

- ``name``: Human-readable name/label of the job
- ``tags``: Array of tags
- ``filter``: :doc:`filters` (if any) to apply to the output (can be tested with ``--test-filter``)
- ``max_tries``: After this many sequential failed runs, the error will be reported rather than ignored
- ``diff_tool``: Command to a custom tool for generating diff text
Expand Down
6 changes: 5 additions & 1 deletion docs/source/manpage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,17 @@ This manpage describes the CLI tool.

positional arguments:
JOB
index of job(s) to run, as numbered according to the --list command.
indexes or tags of job(s) to run, depending on --tags.
If using indexes, they are as numbered according to the --list command.
If none are specified, then all jobs will be run.

optional arguments:
-h, --help
show this help message and exit

--tags
use tags instead of indexes to select jobs to run

--version
show program's version number and exit

Expand Down
17 changes: 14 additions & 3 deletions lib/urlwatch/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,13 @@ def __init__(self, args, pkgname, urlwatch_dir, prefix, config, urls, hooks, cac
self.parse_args(args)

def parse_args(self, cmdline_args):

parser = argparse.ArgumentParser(description=urlwatch.__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('joblist', metavar='JOB', type=int, nargs="*", help='index of job(s) to run, as numbered according to the --list command. If none specified, then all jobs will be run.')
parser.add_argument('joblist', metavar='JOB', type=str, nargs="*", help='indexes or tags of job(s) to run, depending on --tags. If using indexes, they are as numbered according to the --list command. If none are specified, then all jobs will be run.')
parser.add_argument('--tags', action='store_true', help='Use tags instead of indexes to select jobs to run')
parser.add_argument('--version', action='version', version='%(prog)s {}'.format(urlwatch.__version__))
parser.add_argument('-v', '--verbose', action='store_true', help='show debug output')

group = parser.add_argument_group('files and directories')
group.add_argument('--urls', metavar='FILE', help='read job list (URLs) from FILE',
default=self.urls)
Expand All @@ -95,17 +96,27 @@ def parse_args(self, cmdline_args):
group.add_argument('--test-diff-filter', metavar='JOB',
help='test diff filter output of job by location or index (needs at least 2 snapshots)')
group.add_argument('--dump-history', metavar='JOB', help='dump historical cached data for a job')

group = parser.add_argument_group('interactive commands ($EDITOR/$VISUAL)')
group.add_argument('--edit', action='store_true', help='edit URL/job list')
group.add_argument('--edit-config', action='store_true', help='edit configuration file')
group.add_argument('--edit-hooks', action='store_true', help='edit hooks script')

group = parser.add_argument_group('miscellaneous')
group.add_argument('--features', action='store_true', help='list supported jobs/filters/reporters')
group.add_argument('--gc-cache', metavar='RETAIN_LIMIT', type=int, help='remove old cache entries, keeping the latest RETAIN_LIMIT (default: 1)',
nargs='?', const=1)

args = parser.parse_args(cmdline_args)

for i, arg in enumerate(vars(args)):
if args.tags:
self.tag_set = frozenset(args.joblist)
else:
try:
self.idx_set = frozenset([int(s) for s in args.joblist])
except ValueError as e:
parser.error(e)

for arg in vars(args):
argval = getattr(args, arg)
setattr(self, arg, argval)
17 changes: 16 additions & 1 deletion lib/urlwatch/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import re
import subprocess
import textwrap
from typing import Iterable, Optional

import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
Expand Down Expand Up @@ -196,7 +197,10 @@ def ignore_error(self, exception):

class Job(JobBase):
__required__ = ()
__optional__ = ('name', 'filter', 'max_tries', 'diff_tool', 'compared_versions', 'diff_filter', 'enabled', 'treat_new_as_changed', 'user_visible_url')
__optional__ = ('name', 'tags', 'filter', 'max_tries', 'diff_tool', 'compared_versions', 'diff_filter', 'enabled', 'treat_new_as_changed', 'user_visible_url')

def matches_tags(self, tags: set[str]):
return self.tags & tags

# determine if hyperlink "a" tag is used in HtmlReporter
def location_is_url(self):
Expand All @@ -208,6 +212,17 @@ def pretty_name(self):
def is_enabled(self):
return self.enabled is None or self.enabled

@property
def tags(self) -> Optional[frozenset[str]]:
return self._tags

@tags.setter
def tags(self, value: Optional[Iterable[str]]):
if value is None:
self._tags = None
else:
self._tags = frozenset(value)


class ShellJob(Job):
"""Run a shell command and get its standard output"""
Expand Down
14 changes: 14 additions & 0 deletions lib/urlwatch/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,20 @@ def __init__(self, urlwatch_config, config_storage, cache_storage, urls_storage)
if hasattr(self.urlwatch_config, 'migrate_urls'):
self.urlwatch_config.migrate_cache(self)

def should_run(self, idx, job):
if not job.is_enabled():
return False

if self.urlwatch_config.tags:
# If we're using tags, check the job matches the tags
return job.matches_tags(self.urlwatch_config.tag_set)
else:
# If we're using indexes, no indexes means all jobs
if self.urlwatch_config.idx_set:
return idx in self.urlwatch_config.idx_set
else:
return True

def check_directories(self):
if not os.path.exists(self.urlwatch_config.config):
self.config_storage.write_default_config(self.urlwatch_config.config)
Expand Down
17 changes: 17 additions & 0 deletions lib/urlwatch/tests/data/jobs-with-tags.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
---
name: UTC
command: date -u
tags:
- arg
- utc
---
name: RFC
command: date -R
tags:
- arg
- rfc
---
name: Local
command: date
tags:
- local
53 changes: 51 additions & 2 deletions lib/urlwatch/tests/test_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ def py_files():


class ConfigForTest(CommandConfig):
def __init__(self, config, urls, cache, hooks, verbose):
super().__init__([], 'urlwatch', os.path.dirname(__file__), root, config, urls, hooks, cache, verbose)
def __init__(self, config, urls, cache, hooks, verbose, args=[]):
super().__init__(args, 'urlwatch', os.path.dirname(__file__), root, config, urls, hooks, cache, verbose)


@contextlib.contextmanager
Expand Down Expand Up @@ -133,6 +133,55 @@ def test_run_watcher():
cache_storage.close()


def prepare_tags_test(args):
urls = os.path.join(here, 'data', 'jobs-with-tags.yaml')
config = os.path.join(here, 'data', 'urlwatch.yaml')
cache = os.path.join(here, 'data', 'cache.db')
hooks = ''

config_storage = YamlConfigStorage(config)
urls_storage = UrlsYaml(urls)
cache_storage = CacheMiniDBStorage(cache)

urlwatch_config = ConfigForTest(config, urls, cache, hooks, True, args=args)
urlwatcher = Urlwatch(urlwatch_config, config_storage, cache_storage, urls_storage)

return urlwatcher, cache_storage


def test_tags_none():
with teardown_func():
urlwatcher, cache_storage = prepare_tags_test([])
try:
urlwatcher.run_jobs()

assert len(urlwatcher.report.job_states) == 3
finally:
cache_storage.close()


def test_tags_single():
with teardown_func():
urlwatcher, cache_storage = prepare_tags_test(['--tags', 'arg'])
try:
urlwatcher.run_jobs()

assert len(urlwatcher.report.job_states) == 2
finally:
cache_storage.close()


def test_tags_multiple():
with teardown_func():
urlwatcher, cache_storage = prepare_tags_test(['--tags', 'utc', 'local'])
try:
urlwatcher.run_jobs()

assert len(urlwatcher.report.job_states) == 2
finally:
cache_storage.close()


def test_disabled_job():
with teardown_func():
urls = os.path.join(here, 'data', 'disabled-job.yaml')
Expand Down
6 changes: 4 additions & 2 deletions lib/urlwatch/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,13 @@ def run_parallel(func, items):


def run_jobs(urlwatcher):
if not all(1 <= idx <= len(urlwatcher.jobs) for idx in urlwatcher.urlwatch_config.joblist):
if not urlwatcher.urlwatch_config.tags and not all(1 <= idx <= len(urlwatcher.jobs) for idx in urlwatcher.urlwatch_config.idx_set):
raise ValueError(f'All job indices must be between 1 and {len(urlwatcher.jobs)}: {urlwatcher.urlwatch_config.joblist}')
cache_storage = urlwatcher.cache_storage
jobs = [job.with_defaults(urlwatcher.config_storage.config)
for (idx, job) in enumerate(urlwatcher.jobs) if job.is_enabled() and ((idx + 1) in urlwatcher.urlwatch_config.joblist or (not urlwatcher.urlwatch_config.joblist))]
for (idx, job) in enumerate(urlwatcher.jobs)
if urlwatcher.should_run(idx, job)
]
report = urlwatcher.report

logger.debug('Processing %d jobs (out of %d)', len(jobs), len(urlwatcher.jobs))
Expand Down

0 comments on commit 8b6b90e

Please sign in to comment.