Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix slurm MPI submission bug #214

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
43 changes: 43 additions & 0 deletions docs/src/mpi.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,50 @@ back to your interactive chain via:
pt = Pigeons.load(mpi_run) # possible thanks to 'pigeons(..., checkpoint = true)' used above
```

### Custom submission settings
Some clusters require submission settings that are not included within `Pigeons`'s defaults.
Custom submission settings can be specified in these situations.

Specifying custom submission settings requires defining a rosetta of settings with [`add_custom_submission_system()`](@ref), and overloading `resource_string()` for the system. The following is an example of specifying custom settings for a slurm system running OpenMPI with `srun` for submission.

```
params= (
exec = "srun",
submit = `sbatch`,
del = `scancel`,
directive = "#SBATCH",
job_name = "--job-name=",
output_file = "-o ",
error_file = "-e ",
submit_dir = "\$SLURM_SUBMIT_DIR",
job_status = `squeue --job`,
job_status_all = `squeue -u`,
ncpu_info = `sinfo`
)

add_custom_submission_system(params)

function Pigeons.resource_string(m::MPIProcesses, ::Val{:custom})
return """
#SBATCH -t $(m.walltime)
#SBATCH --ntasks=$(m.n_mpi_processes)
#SBATCH --cpus-per-task=$(m.n_threads)
#SBATCH --mem-per-cpu=$(m.memory)
"""
end
```
and then setting the `submission_system` in `MPI_Settings`
Some systems may also require additional execution flags. Slurm sytems using `srun` often need their mpi specified with the `--mpi` flag.
Extra flags can be added to execution with `mpiexec_args` when constructing an [`MPIProcesses`](@ref).

An example cluster may require you to use `pmi2` with OpenMPI. This can be done by adding "mpiexec_args=\`--mpi=pmi2\`" to the arguments of MPIProcess:

```
Pigeons.MPIProcesses(
...
mpiexec_args=`--mpi=pmi2`
)
```
## Code dependencies

So far we have used examples where the target, explorers, etc
Expand Down
2 changes: 1 addition & 1 deletion src/pt/pigeons.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
$SIGNATURES
$TYPEDSIGNATURES

Run (a generalization of) Parallel Tempering.

Expand Down
2 changes: 1 addition & 1 deletion src/submission/ChildProcess.jl
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ end


"""
$SIGNATURES
$TYPEDSIGNATURES

Run Parallel Tempering in a new process.
See [`ChildProcess`](@ref).
Expand Down
65 changes: 58 additions & 7 deletions src/submission/MPIProcesses.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ In most contexts both 1 and 2 are needed for an ergonomic UI.

"""
Flag to run on MPI.
Before using, you have to call once [`setup_mpi`](@ref).
Settings can be changed by calling [`setup_mpi`](@ref) before running.

Fields:

Expand Down Expand Up @@ -49,7 +49,7 @@ $FIELDS
end

"""
$SIGNATURES
$TYPEDSIGNATURES
"""
function pigeons(pt_arguments, mpi_submission::MPIProcesses)
if !is_mpi_setup()
Expand Down Expand Up @@ -93,6 +93,12 @@ function mpi_submission_script(exec_folder, mpi_submission::MPIProcesses, julia_
r = rosetta()
resource_str = resource_string(mpi_submission, mpi_settings.submission_system)

exec_str = (
r.exec == "srun" ?
string("srun -n \$SLURM_NTASKS $(join(mpi_submission.mpiexec_args.exec, " "))") :
string("mpiexec $(join(mpi_submission.mpiexec_args.exec, " ")) --merge-stderr-to-stdout --output-filename $(exec_folder)")
)

code = """
#!/bin/bash
$resource_str
Expand All @@ -109,7 +115,7 @@ function mpi_submission_script(exec_folder, mpi_submission::MPIProcesses, julia_
# MethodError(f=Core.Compiler.widenconst, args=(Symbol("#342"),), world=0x0000000000001342)
export JULIA_PKG_PRECOMPILE_AUTO=0

mpiexec $(mpi_submission.mpiexec_args) --merge-stderr-to-stdout --output-filename $exec_folder $julia_cmd_str
$(exec_str) $julia_cmd_str
"""
script_path = "$exec_folder/.submission_script.sh"
write(script_path, code)
Expand All @@ -119,18 +125,63 @@ end

# Internal: "rosetta stone" of submission commands
const _rosetta = (;
queue_concept = [:submit, :del, :directive, :job_name, :output_file, :error_file, :submit_dir, :job_status, :job_status_all, :ncpu_info],
queue_concept = [:exec, :submit, :del, :directive, :job_name, :output_file, :error_file, :submit_dir, :job_status, :job_status_all, :ncpu_info],

# tested:
pbs = [`qsub`, `qdel`, "#PBS", "-N ", "-o ", "-e ", "\$PBS_O_WORKDIR", `qstat -x`, `qstat -u`, `pbsnodes -aSj -F dsv`],
slurm = [`sbatch`, `scancel`,"#SBATCH", "--job-name=","-o ", "-e ", "\$SLURM_SUBMIT_DIR", `squeue --job`, `squeue -u`, `sinfo`],
pbs = ["mpiexec",`qsub`, `qdel`, "#PBS", "-N ", "-o ", "-e ", "\$PBS_O_WORKDIR", `qstat -x`, `qstat -u`, `pbsnodes -aSj -F dsv`],
slurm = ["mpiexec",`sbatch`, `scancel`,"#SBATCH", "--job-name=","-o ", "-e ", "\$SLURM_SUBMIT_DIR", `squeue --job`, `squeue -u`, `sinfo`],

# not yet tested:
lsf = [`bsub`, `bkill`, "#BSUB", "-J ", "-o ", "-e ", "\$LSB_SUBCWD", `bjobs`, `bjobs -u`, `bhosts`],
lsf = ["mpiexec",`bsub`, `bkill`, "#BSUB", "-J ", "-o ", "-e ", "\$LSB_SUBCWD", `bjobs`, `bjobs -u`, `bhosts`],

custom = [] # can be used by downstream libraries/users to create custom submission commands in conjuction with dispatch on Pigeons.resource_string()
)

"""
$TYPEDSIGNATURES

Add a custom submission system to the rosetta stone.
This function expects a NamedTuple with the following fields:
- `exec` (String): the command to execute the job
- `submit` (Cmd): the command to submit the job
- `del` (Cmd): the command to delete the job
- `directive` (String): the directive to specify the job
- `job_name` (String): the flag to specify the job name
- `output_file` (String): the flag to specify the output file
- `error_file` (String): the flag to specify the error file
- `submit_dir` (String): the flag to specify the submit directory
- `job_status` (Cmd): the command to check the job status
- `job_status_all` (Cmd): the command to check the job status for all users
- `ncpu_info` (Cmd): the command to check the number of cpus available
"""
function add_custom_submission_system(θ::NamedTuple)
(;exec, submit, del, directive, job_name, output_file, error_file, submit_dir, job_status, job_status_all, ncpu_info) = θ

#Empty custom submission system params
while length(Pigeons._rosetta.custom) > 0
popfirst!(Pigeons._rosetta.custom)
end

append!(
Pigeons._rosetta.custom,
[
exec,
submit,
del,
directive,
job_name,
output_file,
error_file,
submit_dir,
job_status,
job_status_all,
ncpu_info
]
)
@warn("Custom submission system added to the rosetta stone. You will also need to define a resource_string function for this submission system.")
return nothing
end

supported_submission_systems() = filter(x -> x != :queue_concept && x != :custom, keys(_rosetta))

resource_string(m::MPIProcesses, symbol) = resource_string(m, Val(symbol))
Expand Down
10 changes: 4 additions & 6 deletions src/submission/MPISettings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,13 @@ function load_mpi_settings()
end

"""
$SIGNATURES
$TYPEDSIGNATURES

Look first at the list of clusters that have "presets" available,
by typing `Pigeons.setup_mpi_` and then tab. These are the most
straightforward to use.

If presets are not available, use `setup_mpi()`. To see the
documentation of the arguments of `setup_mpi()`, see
[`MPISettings`](@ref)
Use `setup_mpi()` if presets are not available. See [`MPISettings`](@ref) for information on the arguments of `setup_mpi()`,
(i.e. `args...` are passed to the constructor of [`MPISettings`](@ref)).

Pull requests to `Pigeons/src/submission/presets.jl` are welcome
Expand All @@ -83,9 +81,9 @@ modules_string(settings::MPISettings) =
)

"""
$SIGNATURES
$TYPEDSIGNATURES

Run this function once before running MPI jobs.
Execute this function once before running MPI jobs.
This should be done on the head node of a compute cluster.
The setting are permanently saved.
See [`MPISettings`](@ref).
Expand Down
Loading