From 83ba81f9bc118352fe4abbba9d58a30ddaf50821 Mon Sep 17 00:00:00 2001 From: hwikle-lanl <154543628+hwikle-lanl@users.noreply.github.com> Date: Thu, 6 Feb 2025 12:55:40 -0700 Subject: [PATCH] Prevent tests from transitioning to BUILD_CREATED state after failure (#813) * Prevent tests from transition to BUILD_CREATED state after failure Previously, tests were failing during the build phase and correctly logging a `BUILD_ERROR` state, but then incorrectly continuing on to log a `BUILD_COMPLETE` state, sometimes multiple times. This commit fixes that. It also fixes a double logging of an ABORTED state (albeit with different messages). Several issues with logging states remain: - In the aforementioned double ABORTED state, one log statement should probably be removed. - Nearly identical tests will show either an ENV_FAILED state, or an ENV_FAILED state followed by a BUILD_FAILED state. This inconsistency needs to be resolved, but a decision needs to be made as to the intended behavior. - Some log states appear to be used inappropriately, which should eventually be fixed. * Fix failing unit tests --- lib/pavilion/builder.py | 2 +- lib/pavilion/series/test_set.py | 2 +- lib/pavilion/status_file.py | 22 ++++++++++++++++++++++ 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/lib/pavilion/builder.py b/lib/pavilion/builder.py index 3aebca3dc..e6f1443db 100644 --- a/lib/pavilion/builder.py +++ b/lib/pavilion/builder.py @@ -103,7 +103,7 @@ def __init__(self, pav_cfg: pavilion.config.PavConfig, working_dir: Path, config current_status = status.current() - if not self.path.exists() and "ERROR" not in current_status.state: + if not self.path.exists() and not STATES.is_fatal(current_status.state): status.set(state=STATES.BUILD_CREATED, note="Builder created.") self.tmp_log_path = self.path.with_suffix('.log') diff --git a/lib/pavilion/series/test_set.py b/lib/pavilion/series/test_set.py index 533318de3..e534d66ca 100644 --- a/lib/pavilion/series/test_set.py +++ b/lib/pavilion/series/test_set.py @@ -630,7 +630,7 @@ def _abort_builds(self, tests: List[TestRun]): for test in tests: if (test.status.current().state not in - (STATES.BUILD_FAILED, STATES.BUILD_ERROR)): + (STATES.BUILD_FAILED, STATES.BUILD_ERROR, STATES.ABORTED)): test.status.set( STATES.ABORTED, "Run aborted due to failures in other builds.") diff --git a/lib/pavilion/status_file.py b/lib/pavilion/status_file.py index 499e9cd8d..9835e14a4 100644 --- a/lib/pavilion/status_file.py +++ b/lib/pavilion/status_file.py @@ -71,6 +71,7 @@ def __init__(self): """Validate all of the constants.""" self._help = {} + self._fatal_states = {self.STATUS_ERROR} # Validate the built-in states for key in dir(self): @@ -109,6 +110,11 @@ def list(self): """List all the known state names.""" return self._help.keys() + def is_fatal(self, state) -> bool: + """Determine whether the given state is fatal.""" + + return state in self._fatal_states + class TestStatesStruct(StatesStruct): """Struct containing pre-defined test states.""" @@ -157,6 +163,15 @@ class TestStatesStruct(StatesStruct): SKIPPED = "The test has been skipped due to an invalid condition." COMPLETE = "For when the test is completely complete." + def __init__(self): + super().__init__() + + self._fatal_states = self._fatal_states.union( + {self.ABORTED, self.CREATION_ERROR, self.SCHED_CANCELLED, + self.SCHED_ERROR, self.BUILD_FAILED, self.BUILD_TIMEOUT, + self.BUILD_ERROR, self.CANCELLED, self.ENV_FAILED, + self.RUN_TIMEOUT, self.RUN_ERROR, self.RESULTS_ERROR}) + class SeriesStatesStruct(StatesStruct): """States for series objects.""" @@ -183,6 +198,13 @@ class SeriesStatesStruct(StatesStruct): ERROR = "General (fatal) error status." COMPLETE = "For when the test is completely complete." + def __init__(self): + super().__init__() + + self._fatal_states = self._fatal_states.union( + {self.CREATION_ERROR, self.BUILD_ERROR, self.KICKOFF_ERROR, + self.ERROR}) + # There is one predefined, global status object defined at module load time. STATES = TestStatesStruct()