Skip to content

Commit

Permalink
Feat: release 2.8.0 - adds hpc support (#163)
Browse files Browse the repository at this point in the history
* suport hpc

* update pytest

* params changes

* adding a param for hpc_id

* update docs

* minor readme update

* removing wdl params

* default storage mode

* mini-typo in readme

* review suggestions
  • Loading branch information
dapineyro authored Apr 8, 2024
1 parent d39754e commit c169460
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 12 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
## lifebit-ai/cloudos-cli: changelog

## v2.8.0 (2024-04-05)

- Adds support for using CloudOS HPC executor.

## v2.7.0 (2024-03-21)

### Feature
Expand Down
34 changes: 33 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -141,9 +141,12 @@ Options:
backwards compatibility.
--repository-platform TEXT Name of the repository platform of the
workflow. Default=github.
--execution-platform [aws|azure]
--execution-platform [aws|azure|hpc]
Name of the execution platform implemented
in your CloudOS. Default=aws.
--hpc-id TEXT ID of your HPC, only applicable when
--execution-platform=hpc.
Default=660fae20f93358ad61e0104b
--cost-limit FLOAT Add a cost limit to your job. Default=30.0
(For no cost limit please use -1).
--verbose Whether to print information messages or
Expand Down Expand Up @@ -318,6 +321,35 @@ cloudos job run \
--execution-platform azure
```

#### HPC execution support

CloudOS is also prepared to use an HPC compute infrastructure. For such cases, you will need to take into account the following for your job submissions using `cloudos job run` command:

- Use the following parameter: `--execution-platform hpc`.
- Indicate the HPC ID using: `--hpc-id XXXX`.

Example command:

```bash
cloudos job run \
--cloudos-url $CLOUDOS \
--apikey $MY_API_KEY \
--workspace-id $WORKSPACE_ID \
--project-name "$PROJECT_NAME" \
--workflow-name $WORKFLOW_NAME \
--job-config $JOB_PARAMS \
--execution-platform hpc \
--hpc-id $YOUR_HPC_ID
```

Please, note that HPC execution do not support the following parameters and all of them will be ignored:

- `--resumable`
- `--job-queue`
- `--instance-type` | `--instance-disk` | `--spot` | `--cost-limit`
- `--storage-mode` | `--lustre-size`
- `--wdl-mainfile` | `--wdl-importsfile` | `--cromwell-token`

#### Check job status

To check the status of a submitted job, just use the suggested command:
Expand Down
31 changes: 28 additions & 3 deletions cloudos/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,12 @@ def queue():
default='github')
@click.option('--execution-platform',
help='Name of the execution platform implemented in your CloudOS. Default=aws.',
type=click.Choice(['aws', 'azure']),
type=click.Choice(['aws', 'azure', 'hpc']),
default='aws')
@click.option('--hpc-id',
help=('ID of your HPC, only applicable when --execution-platform=hpc. ' +
'Default=660fae20f93358ad61e0104b'),
default='660fae20f93358ad61e0104b')
@click.option('--cost-limit',
help='Add a cost limit to your job. Default=30.0 (For no cost limit please use -1).',
type=float,
Expand Down Expand Up @@ -238,6 +242,7 @@ def run(apikey,
cromwell_token,
repository_platform,
execution_platform,
hpc_id,
cost_limit,
verbose,
request_interval,
Expand All @@ -249,9 +254,11 @@ def run(apikey,
if instance_type == 'NONE_SELECTED':
if execution_platform == 'aws':
instance_type = 'c5.xlarge'
if execution_platform == 'azure':
elif execution_platform == 'azure':
instance_type = 'Standard_D4as_v4'
if execution_platform == 'azure':
else:
instance_type = None
if execution_platform == 'azure' or execution_platform == 'hpc':
batch = None
spot = None
elif ignite:
Expand All @@ -261,10 +268,27 @@ def run(apikey,
'CloudOS\n')
else:
batch = True
if execution_platform == 'hpc':
print('\nHPC execution platform selected')
if hpc_id is None:
raise ValueError('Please, specify your HPC ID using --hpc parameter')
print('[Message] Please, take into account that HPC execution do not support ' +
'the following parameters and all of them will be ignored:\n' +
'\t--resumable\n' +
'\t--job-queue\n' +
'\t--instance-type | --instance-disk | --spot | --cost-limit\n' +
'\t--storage-mode | --lustre-size\n' +
'\t--wdl-mainfile | --wdl-importsfile | --cromwell-token\n')
wdl_mainfile = None
wdl_importsfile = None
storage_mode = 'regular'
if verbose:
print('\t...Detecting workflow type')
cl = Cloudos(cloudos_url, apikey, cromwell_token)
workflow_type = cl.detect_workflow(workflow_name, workspace_id, verify_ssl)
if execution_platform == 'hpc' and workflow_type == 'wdl':
raise ValueError(f'The workflow {workflow_name} is a WDL workflow. ' +
'WDL is not supported on HPC execution platform.')
if workflow_type == 'wdl':
print('\tWDL workflow detected\n')
if wdl_mainfile is None:
Expand Down Expand Up @@ -325,6 +349,7 @@ def run(apikey,
storage_mode=storage_mode,
lustre_size=lustre_size,
execution_platform=execution_platform,
hpc_id=hpc_id,
workflow_type=workflow_type,
cromwell_id=cromwell_id,
cost_limit=cost_limit,
Expand Down
2 changes: 1 addition & 1 deletion cloudos/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '2.7.0'
__version__ = '2.8.0'
20 changes: 14 additions & 6 deletions cloudos/jobs/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ def convert_nextflow_to_json(self,
storage_mode,
lustre_size,
execution_platform,
hpc_id,
workflow_type,
cromwell_id,
cost_limit):
Expand Down Expand Up @@ -248,8 +249,10 @@ def convert_nextflow_to_json(self,
lustre_size : int
The lustre storage to be used when --storage-mode=lustre, in GB. It should be 1200 or
a multiple of it.
execution_platform : string ['aws'|'azure']
execution_platform : string ['aws'|'azure'|'hpc']
The execution platform implemented in your CloudOS.
hpc_id : string
The ID of your HPC in CloudOS.
workflow_type : str
The type of workflow to run. Either 'nextflow' or 'wdl'.
cromwell_id : str
Expand Down Expand Up @@ -382,7 +385,6 @@ def convert_nextflow_to_json(self,
if storage_mode not in ['lustre', 'regular']:
raise ValueError('Please, use either \'lustre\' or \'regular\' for --storage-mode ' +
f'{storage_mode} is not allowed')

params = {
"parameters": workflow_params,
"project": project_id,
Expand All @@ -396,6 +398,7 @@ def convert_nextflow_to_json(self,
},
"cromwellCloudResources": cromwell_id,
"executionPlatform": execution_platform,
"hpc": hpc_id,
"storageSizeInGb": instance_disk,
"execution": {
"computeCostLimit": cost_limit,
Expand All @@ -405,14 +408,15 @@ def convert_nextflow_to_json(self,
"storageMode": storage_mode,
"revision": revision_block,
"profile": nextflow_profile,
instance: instance_type_block,
"masterInstance": {
instance: instance_type_block
}
if execution_platform != 'hpc':
params['masterInstance'] = {
"requestedInstance": {
"type": instance_type,
"asSpot": False
}
}
}
return params

def send_job(self,
Expand All @@ -432,6 +436,7 @@ def send_job(self,
storage_mode='regular',
lustre_size=1200,
execution_platform='aws',
hpc_id=None,
workflow_type='nextflow',
cromwell_id=None,
cost_limit=30.0,
Expand Down Expand Up @@ -478,8 +483,10 @@ def send_job(self,
lustre_size : int
The lustre storage to be used when --storage-mode=lustre, in GB. It should be 1200 or
a multiple of it.
execution_platform : string ['aws'|'azure']
execution_platform : string ['aws'|'azure'|'hpc']
The execution platform implemented in your CloudOS.
hpc_id : string
The ID of your HPC in CloudOS.
workflow_type : str
The type of workflow to run. Either 'nextflow' or 'wdl'.
cromwell_id : str
Expand Down Expand Up @@ -524,6 +531,7 @@ def send_job(self,
storage_mode,
lustre_size,
execution_platform,
hpc_id,
workflow_type,
cromwell_id,
cost_limit)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_data/convert_nextflow_to_json_params.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"parameters": [{"prefix": "--", "name": "reads", "parameterKind": "textValue", "textValue": "s3://lifebit-featured-datasets/pipelines/rnatoy-data"}, {"prefix": "--", "name": "genome", "parameterKind": "textValue", "textValue": "s3://lifebit-featured-datasets/pipelines/rnatoy-data/ggal_1_48850000_49020000.Ggal71.500bpflank.fa"}, {"prefix": "--", "name": "annot", "parameterKind": "textValue", "textValue": "s3://lifebit-featured-datasets/pipelines/rnatoy-data/ggal_1_48850000_49020000.bed.gff"}], "project": "6054754029b82f0112762b9c", "workflow": "60b0ca54303ee601a69b42d1", "name": "new_job", "resumable": true, "batch": {"dockerLogin": false, "enabled": false, "jobQueue": null}, "cromwellCloudResources": null, "executionPlatform": "aws", "storageSizeInGb": 500, "execution": {"computeCostLimit": -1, "optim": "test"}, "lusterFsxStorageSizeInGb": 1200, "storageMode": "regular", "revision": "", "profile": null, "spotInstances": {"instanceType": "c5.xlarge", "onDemandFallbackInstanceType": "c5.xlarge"}, "masterInstance": {"requestedInstance": {"type": "c5.xlarge", "asSpot": false}}}
{"parameters": [{"prefix": "--", "name": "reads", "parameterKind": "textValue", "textValue": "s3://lifebit-featured-datasets/pipelines/rnatoy-data"}, {"prefix": "--", "name": "genome", "parameterKind": "textValue", "textValue": "s3://lifebit-featured-datasets/pipelines/rnatoy-data/ggal_1_48850000_49020000.Ggal71.500bpflank.fa"}, {"prefix": "--", "name": "annot", "parameterKind": "textValue", "textValue": "s3://lifebit-featured-datasets/pipelines/rnatoy-data/ggal_1_48850000_49020000.bed.gff"}], "project": "6054754029b82f0112762b9c", "workflow": "60b0ca54303ee601a69b42d1", "name": "new_job", "resumable": true, "batch": {"dockerLogin": false, "enabled": false, "jobQueue": null}, "cromwellCloudResources": null, "executionPlatform": "aws", "hpc": null ,"storageSizeInGb": 500, "execution": {"computeCostLimit": -1, "optim": "test"}, "lusterFsxStorageSizeInGb": 1200, "storageMode": "regular", "revision": "", "profile": null, "spotInstances": {"instanceType": "c5.xlarge", "onDemandFallbackInstanceType": "c5.xlarge"}, "masterInstance": {"requestedInstance": {"type": "c5.xlarge", "asSpot": false}}}
3 changes: 3 additions & 0 deletions tests/test_jobs/test_convert_nextflow_to_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
"storage_mode": 'regular',
"lustre_size": 1200,
"execution_platform": "aws",
"hpc_id": None,
"workflow_type": 'nextflow',
"cromwell_id": None,
"cost_limit": -1
Expand All @@ -50,6 +51,7 @@ def test_convert_nextflow_to_json_output_correct():
storage_mode=param_dict["storage_mode"],
lustre_size=param_dict["lustre_size"],
execution_platform=param_dict["execution_platform"],
hpc_id=param_dict["hpc_id"],
workflow_type=param_dict["workflow_type"],
cromwell_id=param_dict["cromwell_id"],
cost_limit=param_dict["cost_limit"]
Expand Down Expand Up @@ -81,6 +83,7 @@ def test_convert_nextflow_to_json_badly_formed_config():
storage_mode=param_dict["storage_mode"],
lustre_size=param_dict["lustre_size"],
execution_platform=param_dict["execution_platform"],
hpc_id=param_dict["hpc_id"],
workflow_type=param_dict["workflow_type"],
cromwell_id=param_dict["cromwell_id"],
cost_limit=param_dict["cost_limit"]
Expand Down

0 comments on commit c169460

Please sign in to comment.