Skip to content

Commit

Permalink
Implement stdout & stderr redirection for salish
Browse files Browse the repository at this point in the history
This commit adds redirection of stdout and stderr to files within
`${RESULTS_DIR}` for the `salish` system, ensuring logging of output and error
streams. New tests were added to verify the functionality for both the
`--deflate` and non-deflate cases. Necessary changes were made in the
`_execute()` function and corresponding test cases to support this redirection.
  • Loading branch information
douglatornell committed Nov 12, 2024
1 parent 7b0a114 commit 40dc91c
Show file tree
Hide file tree
Showing 2 changed files with 161 additions and 79 deletions.
72 changes: 43 additions & 29 deletions salishsea_cmd/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -670,12 +670,15 @@ def _build_batch_script(
f"{_pbs_directives(run_desc, nemo_processors + xios_processors, email, results_dir, procs_per_node, cpu_arch, )}\n",
)
)
redirect_stdout_stderr = True if SYSTEM == "salish" else False
script = "\n".join(
(
script,
f"{_definitions(run_desc, desc_file, run_dir, results_dir, deflate)}\n"
f"{_modules()}\n"
f"{_execute(nemo_processors, xios_processors, deflate, max_deflate_jobs, separate_deflate)}\n"
f"{_execute(
nemo_processors, xios_processors, deflate, max_deflate_jobs, separate_deflate,
redirect_stdout_stderr)}\n"
f"{_fix_permissions()}\n"
f"{_cleanup()}",
)
Expand Down Expand Up @@ -1034,8 +1037,18 @@ def _modules():


def _execute(
nemo_processors, xios_processors, deflate, max_deflate_jobs, separate_deflate
nemo_processors,
xios_processors,
deflate,
max_deflate_jobs,
separate_deflate,
redirect_stdout_stderr,
):
redirect = (
""
if not redirect_stdout_stderr
else " >>${RESULTS_DIR}/stdout 2>>${RESULTS_DIR}/stderr"
)
mpirun = {
"beluga": "mpirun",
"cedar": "mpirun",
Expand Down Expand Up @@ -1066,34 +1079,35 @@ def _execute(
}.get(SYSTEM, f"{mpirun} -np {nemo_processors} ./nemo.exe")
if xios_processors:
mpirun = {
"beluga": f"{mpirun} : -np {xios_processors} ./xios_server.exe",
"cedar": f"{mpirun} : -np {xios_processors} ./xios_server.exe",
"delta": f"{mpirun} : --bind-to core -np {xios_processors} ./xios_server.exe",
"graham": f"{mpirun} : -np {xios_processors} ./xios_server.exe",
"omega": f"{mpirun} : --bind-to core -np {xios_processors} ./xios_server.exe",
"orcinus": f"{mpirun} : -np {xios_processors} ./xios_server.exe",
"salish": f"{mpirun} : --bind-to none -np {xios_processors} ./xios_server.exe",
"seawolf1": f"{mpirun} : -np {xios_processors} ./xios_server.exe",
"seawolf2": f"{mpirun} : -np {xios_processors} ./xios_server.exe",
"seawolf3": f"{mpirun} : -np {xios_processors} ./xios_server.exe",
"sigma": f"{mpirun} : --bind-to core -np {xios_processors} ./xios_server.exe",
"sockeye": f"{mpirun} : --bind-to core -np {xios_processors} ./xios_server.exe",
"beluga": f"{mpirun} : -np {xios_processors} ./xios_server.exe{redirect}",
"cedar": f"{mpirun} : -np {xios_processors} ./xios_server.exe{redirect}",
"delta": f"{mpirun} : --bind-to core -np {xios_processors} ./xios_server.exe{redirect}",
"graham": f"{mpirun} : -np {xios_processors} ./xios_server.exe{redirect}",
"omega": f"{mpirun} : --bind-to core -np {xios_processors} ./xios_server.exe{redirect}",
"orcinus": f"{mpirun} : -np {xios_processors} ./xios_server.exe{redirect}",
"salish": f"{mpirun} : --bind-to none -np {xios_processors} ./xios_server.exe{redirect}",
"seawolf1": f"{mpirun} : -np {xios_processors} ./xios_server.exe{redirect}",
"seawolf2": f"{mpirun} : -np {xios_processors} ./xios_server.exe{redirect}",
"seawolf3": f"{mpirun} : -np {xios_processors} ./xios_server.exe{redirect}",
"sigma": f"{mpirun} : --bind-to core -np {xios_processors} ./xios_server.exe{redirect}",
"sockeye": f"{mpirun} : --bind-to core -np {xios_processors} ./xios_server.exe{redirect}",
}.get(
SYSTEM,
f"{mpirun} : -np {xios_processors} ./xios_server.exe",
f"{mpirun} : -np {xios_processors} ./xios_server.exe{redirect}",
)
redirect = "" if not redirect_stdout_stderr else " >>${RESULTS_DIR}/stdout"
script = textwrap.dedent(
f"""\
mkdir -p ${{RESULTS_DIR}}
cd ${{WORK_DIR}}
echo "working dir: $(pwd)"
echo "working dir: $(pwd)"{redirect}
echo "Starting run at $(date)"
echo "Starting run at $(date)"{redirect}
{mpirun}
MPIRUN_EXIT_CODE=$?
echo "Ended run at $(date)"
echo "Ended run at $(date)"{redirect}
echo "Results combining started at $(date)"
echo "Results combining started at $(date)"{redirect}
"""
)
if SYSTEM in {"delta", "omega", "sigma"}:
Expand All @@ -1111,16 +1125,16 @@ def _execute(
"""
)
script += textwrap.dedent(
"""\
${COMBINE} ${RUN_DESC} --debug
echo "Results combining ended at $(date)"
f"""\
${{COMBINE}} ${{RUN_DESC}} --debug
echo "Results combining ended at $(date)"{redirect}
"""
)
if deflate and not separate_deflate:
script += textwrap.dedent(
"""\
f"""\
echo "Results deflation started at $(date)"
echo "Results deflation started at $(date)"{redirect}
"""
)
if SYSTEM in {"beluga", "cedar", "graham"}:
Expand All @@ -1137,15 +1151,15 @@ def _execute(
${{DEFLATE}} *_ptrc_T*.nc *_prod_T*.nc *_carp_T*.nc *_grid_[TUVW]*.nc \\
*_turb_T*.nc *_dia[12n]_T*.nc FVCOM*.nc Slab_[UV]*.nc *_mtrc_T*.nc \\
--jobs {max_deflate_jobs} --debug
echo "Results deflation ended at $(date)"
echo "Results deflation ended at $(date)"{redirect}
"""
)
script += textwrap.dedent(
"""\
f"""\
echo "Results gathering started at $(date)"
${GATHER} ${RESULTS_DIR} --debug
echo "Results gathering ended at $(date)"
echo "Results gathering started at $(date)"{redirect}
${{GATHER}} ${{RESULTS_DIR}} --debug
echo "Results gathering ended at $(date)"{redirect}
"""
)
return script
Expand Down
168 changes: 118 additions & 50 deletions tests/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -2525,35 +2525,35 @@ def test_salish(self, deflate, monkeypatch):
mkdir -p ${RESULTS_DIR}
cd ${WORK_DIR}
echo "working dir: $(pwd)"
echo "working dir: $(pwd)" >>${RESULTS_DIR}/stdout
echo "Starting run at $(date)"
/usr/bin/mpirun --bind-to none -np 7 ./nemo.exe : --bind-to none -np 1 ./xios_server.exe
echo "Starting run at $(date)" >>${RESULTS_DIR}/stdout
/usr/bin/mpirun --bind-to none -np 7 ./nemo.exe : --bind-to none -np 1 ./xios_server.exe >>${RESULTS_DIR}/stdout 2>>${RESULTS_DIR}/stderr
MPIRUN_EXIT_CODE=$?
echo "Ended run at $(date)"
echo "Ended run at $(date)" >>${RESULTS_DIR}/stdout
echo "Results combining started at $(date)"
echo "Results combining started at $(date)" >>${RESULTS_DIR}/stdout
${COMBINE} ${RUN_DESC} --debug
echo "Results combining ended at $(date)"
echo "Results combining ended at $(date)" >>${RESULTS_DIR}/stdout
"""
)
if deflate:
expected += textwrap.dedent(
"""\
echo "Results deflation started at $(date)"
echo "Results deflation started at $(date)" >>${RESULTS_DIR}/stdout
${DEFLATE} *_ptrc_T*.nc *_prod_T*.nc *_carp_T*.nc *_grid_[TUVW]*.nc \\
*_turb_T*.nc *_dia[12n]_T*.nc FVCOM*.nc Slab_[UV]*.nc *_mtrc_T*.nc \\
--jobs 4 --debug
echo "Results deflation ended at $(date)"
echo "Results deflation ended at $(date)" >>${RESULTS_DIR}/stdout
"""
)
expected += textwrap.dedent(
"""\
echo "Results gathering started at $(date)"
echo "Results gathering started at $(date)" >>${RESULTS_DIR}/stdout
${GATHER} ${RESULTS_DIR} --debug
echo "Results gathering ended at $(date)"
echo "Results gathering ended at $(date)" >>${RESULTS_DIR}/stdout
chmod go+rx ${RESULTS_DIR}
chmod g+rw ${RESULTS_DIR}/*
Expand Down Expand Up @@ -3207,10 +3207,6 @@ class TestExecute:
"omega",
"mpiexec -hostfile $(openmpi_nodefile) --bind-to core -np 42 ./nemo.exe : --bind-to core -np 1 ./xios_server.exe",
),
(
"salish",
"/usr/bin/mpirun --bind-to none -np 42 ./nemo.exe : --bind-to none -np 1 ./xios_server.exe",
),
(
"sigma",
"mpiexec -hostfile $(openmpi_nodefile) --bind-to core -np 42 ./nemo.exe : --bind-to core -np 1 ./xios_server.exe",
Expand All @@ -3221,15 +3217,18 @@ class TestExecute:
),
],
)
def test_execute_with_deflate(self, system, mpirun_cmd):
with patch("salishsea_cmd.run.SYSTEM", system):
script = salishsea_cmd.run._execute(
nemo_processors=42,
xios_processors=1,
deflate=True,
max_deflate_jobs=4,
separate_deflate=False,
)
def test_execute_with_deflate(self, system, mpirun_cmd, monkeypatch):
monkeypatch.setattr(salishsea_cmd.run, "SYSTEM", system)

script = salishsea_cmd.run._execute(
nemo_processors=42,
xios_processors=1,
deflate=True,
max_deflate_jobs=4,
separate_deflate=False,
redirect_stdout_stderr=False,
)

expected = textwrap.dedent(
f"""\
mkdir -p ${{RESULTS_DIR}}
Expand Down Expand Up @@ -3287,6 +3286,46 @@ def test_execute_with_deflate(self, system, mpirun_cmd):
)
assert script == expected

def test_salish_execute_with_deflate(self, monkeypatch):
monkeypatch.setattr(salishsea_cmd.run, "SYSTEM", "salish")

script = salishsea_cmd.run._execute(
nemo_processors=42,
xios_processors=1,
deflate=True,
max_deflate_jobs=4,
separate_deflate=False,
redirect_stdout_stderr=True,
)

expected = textwrap.dedent(
f"""\
mkdir -p ${{RESULTS_DIR}}
cd ${{WORK_DIR}}
echo "working dir: $(pwd)" >>${{RESULTS_DIR}}/stdout
echo "Starting run at $(date)" >>${{RESULTS_DIR}}/stdout
/usr/bin/mpirun --bind-to none -np 42 ./nemo.exe : --bind-to none -np 1 ./xios_server.exe >>${{RESULTS_DIR}}/stdout 2>>${{RESULTS_DIR}}/stderr
MPIRUN_EXIT_CODE=$?
echo "Ended run at $(date)" >>${{RESULTS_DIR}}/stdout
echo "Results combining started at $(date)" >>${{RESULTS_DIR}}/stdout
${{COMBINE}} ${{RUN_DESC}} --debug
echo "Results combining ended at $(date)" >>${{RESULTS_DIR}}/stdout
echo "Results deflation started at $(date)" >>${{RESULTS_DIR}}/stdout
${{DEFLATE}} *_ptrc_T*.nc *_prod_T*.nc *_carp_T*.nc *_grid_[TUVW]*.nc \\
*_turb_T*.nc *_dia[12n]_T*.nc FVCOM*.nc Slab_[UV]*.nc *_mtrc_T*.nc \\
--jobs 4 --debug
echo "Results deflation ended at $(date)" >>${{RESULTS_DIR}}/stdout
echo "Results gathering started at $(date)" >>${{RESULTS_DIR}}/stdout
${{GATHER}} ${{RESULTS_DIR}} --debug
echo "Results gathering ended at $(date)" >>${{RESULTS_DIR}}/stdout
"""
)
assert script == expected

@pytest.mark.parametrize(
"system, mpirun_cmd, deflate, separate_deflate",
[
Expand Down Expand Up @@ -3393,24 +3432,6 @@ def test_execute_with_deflate(self, system, mpirun_cmd):
True,
True,
),
(
"salish",
"/usr/bin/mpirun --bind-to none -np 42 ./nemo.exe : --bind-to none -np 1 ./xios_server.exe",
False,
True,
),
(
"salish",
"/usr/bin/mpirun --bind-to none -np 42 ./nemo.exe : --bind-to none -np 1 ./xios_server.exe",
False,
False,
),
(
"salish",
"/usr/bin/mpirun --bind-to none -np 42 ./nemo.exe : --bind-to none -np 1 ./xios_server.exe",
True,
True,
),
(
"sigma",
"mpiexec -hostfile $(openmpi_nodefile) --bind-to core -np 42 ./nemo.exe : --bind-to core -np 1 ./xios_server.exe",
Expand Down Expand Up @@ -3450,16 +3471,19 @@ def test_execute_with_deflate(self, system, mpirun_cmd):
],
)
def test_execute_without_deflate(
self, system, mpirun_cmd, deflate, separate_deflate
self, system, mpirun_cmd, deflate, separate_deflate, monkeypatch
):
with patch("salishsea_cmd.run.SYSTEM", system):
script = salishsea_cmd.run._execute(
nemo_processors=42,
xios_processors=1,
deflate=deflate,
max_deflate_jobs=4,
separate_deflate=separate_deflate,
)
monkeypatch.setattr(salishsea_cmd.run, "SYSTEM", system)

script = salishsea_cmd.run._execute(
nemo_processors=42,
xios_processors=1,
deflate=deflate,
max_deflate_jobs=4,
separate_deflate=separate_deflate,
redirect_stdout_stderr=False,
)

expected = textwrap.dedent(
f"""\
mkdir -p ${{RESULTS_DIR}}
Expand Down Expand Up @@ -3497,6 +3521,50 @@ def test_execute_without_deflate(
)
assert script == expected

@pytest.mark.parametrize(
"deflate, separate_deflate",
[
(False, True),
(False, False),
(True, True),
],
)
def test_salish_execute_without_deflate(
self, deflate, separate_deflate, monkeypatch
):
monkeypatch.setattr(salishsea_cmd.run, "SYSTEM", "salish")

script = salishsea_cmd.run._execute(
nemo_processors=7,
xios_processors=1,
deflate=deflate,
max_deflate_jobs=4,
separate_deflate=separate_deflate,
redirect_stdout_stderr=True,
)

expected = textwrap.dedent(
f"""\
mkdir -p ${{RESULTS_DIR}}
cd ${{WORK_DIR}}
echo "working dir: $(pwd)" >>${{RESULTS_DIR}}/stdout
echo "Starting run at $(date)" >>${{RESULTS_DIR}}/stdout
/usr/bin/mpirun --bind-to none -np 7 ./nemo.exe : --bind-to none -np 1 ./xios_server.exe >>${{RESULTS_DIR}}/stdout 2>>${{RESULTS_DIR}}/stderr
MPIRUN_EXIT_CODE=$?
echo "Ended run at $(date)" >>${{RESULTS_DIR}}/stdout
echo "Results combining started at $(date)" >>${{RESULTS_DIR}}/stdout
${{COMBINE}} ${{RUN_DESC}} --debug
echo "Results combining ended at $(date)" >>${{RESULTS_DIR}}/stdout
echo "Results gathering started at $(date)" >>${{RESULTS_DIR}}/stdout
${{GATHER}} ${{RESULTS_DIR}} --debug
echo "Results gathering ended at $(date)" >>${{RESULTS_DIR}}/stdout
"""
)
assert script == expected


class TestCleanup:
"""Unit test for _cleanup() function."""
Expand Down

0 comments on commit 40dc91c

Please sign in to comment.