Skip to content

Commit

Permalink
Fix reclaim collector, tweak messages
Browse files Browse the repository at this point in the history
  • Loading branch information
dbutenhof committed Nov 28, 2023
1 parent 70c5f94 commit ebf281b
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 9 deletions.
5 changes: 4 additions & 1 deletion lib/pbench/cli/server/tree_manage.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import click
import humanfriendly
import humanize

from pbench.cli import pass_cli_context
from pbench.cli.server import config_setup
Expand Down Expand Up @@ -89,7 +90,9 @@ def tree_manage(
if reclaim_percent or reclaim_size:
target_size = humanfriendly.parse_size(reclaim_size) if reclaim_size else 0
target_pct = reclaim_percent if reclaim_percent else 20.0
click.echo(f"Reclaiming {target_pct}% or {target_size} bytes")
click.echo(
f"Reclaiming {target_pct}% or {humanize.naturalsize(target_size)} bytes"
)
outcome = cache_m.reclaim_cache(goal_pct=target_pct, goal_bytes=target_size)
un = "" if outcome else "un"
click.echo(f"The cache manager was {un}able to free the requested space")
Expand Down
31 changes: 23 additions & 8 deletions lib/pbench/server/cache_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import errno
import fcntl
from logging import Logger
import math
from pathlib import Path
import shlex
import shutil
Expand All @@ -19,6 +20,8 @@
from pbench.server.database.models.datasets import Dataset, DatasetNotFound, Metadata
from pbench.server.utils import get_tarball_md5

RECLAIM_BYTES_PAD = 1024 # Pad unpack reclaim requests by this much


class CacheManagerError(Exception):
"""Base class for exceptions raised from this module."""
Expand Down Expand Up @@ -939,7 +942,7 @@ def get_results(self, lock: LockManager) -> Path:
# enough room.
if self.controller and self.controller.cache_manager:
self.controller.cache_manager.reclaim_cache(
goal_bytes=self.get_unpacked_size()
goal_bytes=self.get_unpacked_size() + RECLAIM_BYTES_PAD
)

audit = None
Expand Down Expand Up @@ -1534,8 +1537,13 @@ class Candidate:

def reached_goal():
usage = shutil.disk_usage(self.cache_root)
available = float(usage.free) / float(usage.total) * 100.0
return bool(available >= goal_pct and usage.free >= goal_bytes)
return bool(usage.free >= goal)

# Our reclamation goal can be expressed as % of total, absolute types,
# or both. We normalize to a single "bytes free" goal.
usage = shutil.disk_usage(self.cache_root)
pct_as_bytes = math.ceil(usage.total * goal_pct / 100.0)
goal = max(pct_as_bytes, goal_bytes)

if reached_goal():
return True
Expand All @@ -1544,8 +1552,10 @@ def reached_goal():
reclaimed = 0
reclaim_failed = 0
self.logger.info(
"RECLAIM: looking for {}% or {} free",
"RECLAIM: looking for {} free (based on {}% of {} or {})",
humanize.naturalsize(goal),
goal_pct,
humanize.naturalsize(usage.total),
humanize.naturalsize(goal_bytes),
)

Expand All @@ -1557,11 +1567,16 @@ def reached_goal():
continue
total_count += 1
last_ref = 0.0
unpacked = None
for f in d.iterdir():
if f.name == "last_ref":
last_ref = f.stat().st_mtime
elif f.is_dir():
candidates.append(Candidate(last_ref, f))
unpacked = f
if last_ref and unpacked:
break
if unpacked:
candidates.append(Candidate(last_ref, unpacked))

# Sort the candidates by last_ref timestamp, putting the oldest at
# the head of the queue. We'll flush each cache tree until we reach
Expand Down Expand Up @@ -1606,11 +1621,11 @@ def reached_goal():
self.logger.error("RECLAIM {} failed with '{}'", name, error)
reached = reached_goal()
self.logger.info(
"RECLAIM summary: goal {}%, {}b {}met: "
"RECLAIM summary: goal {}%, {} {}: "
"{} datasets, {} had cache: {} reclaimed and {} errors",
goal_pct,
goal_bytes,
"" if reached else "not ",
humanize.naturalsize(goal_bytes),
"achieved" if reached else "failed",
total_count,
has_cache,
reclaimed,
Expand Down

0 comments on commit ebf281b

Please sign in to comment.