Skip to content

Commit

Permalink
Merge pull request #2025 from ceph/nuke-unlock
Browse files Browse the repository at this point in the history
supervisor: Unlock nodes after reimage failure
  • Loading branch information
zmc authored Feb 13, 2025
2 parents 5eda30d + f688892 commit 62c9c10
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 1 deletion.
7 changes: 6 additions & 1 deletion scripts/node_cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@ def main():
if args.verbose:
teuthology.log.setLevel(logging.DEBUG)
log = logging.getLogger(__name__)
stale = query.find_stale_locks(args.owner)
try:
stale = query.find_stale_locks(args.owner)
except Exception:
log.exception(f"Error while check for stale locks held by {args.owner}")
return
if not stale:
return
by_owner = {}
Expand All @@ -30,6 +34,7 @@ def main():
else:
for owner, nodes in by_owner.items():
ops.unlock_safe([node["name"] for node in nodes], owner)
log.info(f"unlocked {len(stale)} nodes")

def parse_args(argv):
parser = argparse.ArgumentParser(
Expand Down
1 change: 1 addition & 0 deletions teuthology/dispatcher/supervisor.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ def reimage(job_config):
reimaged = lock_ops.reimage_machines(ctx, targets, job_config['machine_type'])
except Exception as e:
log.exception('Reimaging error. Nuking machines...')
unlock_targets(job_config)
# Reimage failures should map to the 'dead' status instead of 'fail'
report.try_push_job_info(
ctx.config,
Expand Down
4 changes: 4 additions & 0 deletions teuthology/lock/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,5 +160,9 @@ def node_active_job(name: str, status: Union[dict, None] = None) -> Union[str, N
if resp.ok:
job_status = resp.json()["status"]
break
elif resp.status_code == 404:
break
else:
log.debug(f"Error {resp.status_code} listing job {run_name}/{job_id} for {name}: {resp.text}")
if job_status and job_status not in ('pass', 'fail', 'dead'):
return description

0 comments on commit 62c9c10

Please sign in to comment.