diff --git a/CHANGELOG.md b/CHANGELOG.md index 229c438..c61eccf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ ## lifebit-ai/cloudos-cli: changelog +## v2.8.0 (2024-04-05) + +- Adds support for using CloudOS HPC executor. + ## v2.7.0 (2024-03-21) ### Feature diff --git a/README.md b/README.md index 3a804db..dd3020a 100644 --- a/README.md +++ b/README.md @@ -141,9 +141,12 @@ Options: backwards compatibility. --repository-platform TEXT Name of the repository platform of the workflow. Default=github. - --execution-platform [aws|azure] + --execution-platform [aws|azure|hpc] Name of the execution platform implemented in your CloudOS. Default=aws. + --hpc-id TEXT ID of your HPC, only applicable when + --execution-platform=hpc. + Default=660fae20f93358ad61e0104b --cost-limit FLOAT Add a cost limit to your job. Default=30.0 (For no cost limit please use -1). --verbose Whether to print information messages or @@ -318,6 +321,35 @@ cloudos job run \ --execution-platform azure ``` +#### HPC execution support + +CloudOS is also prepared to use an HPC compute infrastructure. For such cases, you will need to take into account the following for your job submissions using `cloudos job run` command: + +- Use the following parameter: `--execution-platform hpc`. +- Indicate the HPC ID using: `--hpc-id XXXX`. + +Example command: + +```bash +cloudos job run \ + --cloudos-url $CLOUDOS \ + --apikey $MY_API_KEY \ + --workspace-id $WORKSPACE_ID \ + --project-name "$PROJECT_NAME" \ + --workflow-name $WORKFLOW_NAME \ + --job-config $JOB_PARAMS \ + --execution-platform hpc \ + --hpc-id $YOUR_HPC_ID +``` + +Please, note that HPC execution do not support the following parameters and all of them will be ignored: + +- `--resumable` +- `--job-queue` +- `--instance-type` | `--instance-disk` | `--spot` | `--cost-limit` +- `--storage-mode` | `--lustre-size` +- `--wdl-mainfile` | `--wdl-importsfile` | `--cromwell-token` + #### Check job status To check the status of a submitted job, just use the suggested command: diff --git a/cloudos/__main__.py b/cloudos/__main__.py index aed834f..f33597a 100644 --- a/cloudos/__main__.py +++ b/cloudos/__main__.py @@ -190,8 +190,12 @@ def queue(): default='github') @click.option('--execution-platform', help='Name of the execution platform implemented in your CloudOS. Default=aws.', - type=click.Choice(['aws', 'azure']), + type=click.Choice(['aws', 'azure', 'hpc']), default='aws') +@click.option('--hpc-id', + help=('ID of your HPC, only applicable when --execution-platform=hpc. ' + + 'Default=660fae20f93358ad61e0104b'), + default='660fae20f93358ad61e0104b') @click.option('--cost-limit', help='Add a cost limit to your job. Default=30.0 (For no cost limit please use -1).', type=float, @@ -238,6 +242,7 @@ def run(apikey, cromwell_token, repository_platform, execution_platform, + hpc_id, cost_limit, verbose, request_interval, @@ -249,9 +254,11 @@ def run(apikey, if instance_type == 'NONE_SELECTED': if execution_platform == 'aws': instance_type = 'c5.xlarge' - if execution_platform == 'azure': + elif execution_platform == 'azure': instance_type = 'Standard_D4as_v4' - if execution_platform == 'azure': + else: + instance_type = None + if execution_platform == 'azure' or execution_platform == 'hpc': batch = None spot = None elif ignite: @@ -261,10 +268,27 @@ def run(apikey, 'CloudOS\n') else: batch = True + if execution_platform == 'hpc': + print('\nHPC execution platform selected') + if hpc_id is None: + raise ValueError('Please, specify your HPC ID using --hpc parameter') + print('[Message] Please, take into account that HPC execution do not support ' + + 'the following parameters and all of them will be ignored:\n' + + '\t--resumable\n' + + '\t--job-queue\n' + + '\t--instance-type | --instance-disk | --spot | --cost-limit\n' + + '\t--storage-mode | --lustre-size\n' + + '\t--wdl-mainfile | --wdl-importsfile | --cromwell-token\n') + wdl_mainfile = None + wdl_importsfile = None + storage_mode = 'regular' if verbose: print('\t...Detecting workflow type') cl = Cloudos(cloudos_url, apikey, cromwell_token) workflow_type = cl.detect_workflow(workflow_name, workspace_id, verify_ssl) + if execution_platform == 'hpc' and workflow_type == 'wdl': + raise ValueError(f'The workflow {workflow_name} is a WDL workflow. ' + + 'WDL is not supported on HPC execution platform.') if workflow_type == 'wdl': print('\tWDL workflow detected\n') if wdl_mainfile is None: @@ -325,6 +349,7 @@ def run(apikey, storage_mode=storage_mode, lustre_size=lustre_size, execution_platform=execution_platform, + hpc_id=hpc_id, workflow_type=workflow_type, cromwell_id=cromwell_id, cost_limit=cost_limit, diff --git a/cloudos/_version.py b/cloudos/_version.py index efc8b7b..f2df444 100644 --- a/cloudos/_version.py +++ b/cloudos/_version.py @@ -1 +1 @@ -__version__ = '2.7.0' \ No newline at end of file +__version__ = '2.8.0' diff --git a/cloudos/jobs/job.py b/cloudos/jobs/job.py index ebe7d2e..b3fd916 100644 --- a/cloudos/jobs/job.py +++ b/cloudos/jobs/job.py @@ -199,6 +199,7 @@ def convert_nextflow_to_json(self, storage_mode, lustre_size, execution_platform, + hpc_id, workflow_type, cromwell_id, cost_limit): @@ -248,8 +249,10 @@ def convert_nextflow_to_json(self, lustre_size : int The lustre storage to be used when --storage-mode=lustre, in GB. It should be 1200 or a multiple of it. - execution_platform : string ['aws'|'azure'] + execution_platform : string ['aws'|'azure'|'hpc'] The execution platform implemented in your CloudOS. + hpc_id : string + The ID of your HPC in CloudOS. workflow_type : str The type of workflow to run. Either 'nextflow' or 'wdl'. cromwell_id : str @@ -382,7 +385,6 @@ def convert_nextflow_to_json(self, if storage_mode not in ['lustre', 'regular']: raise ValueError('Please, use either \'lustre\' or \'regular\' for --storage-mode ' + f'{storage_mode} is not allowed') - params = { "parameters": workflow_params, "project": project_id, @@ -396,6 +398,7 @@ def convert_nextflow_to_json(self, }, "cromwellCloudResources": cromwell_id, "executionPlatform": execution_platform, + "hpc": hpc_id, "storageSizeInGb": instance_disk, "execution": { "computeCostLimit": cost_limit, @@ -405,14 +408,15 @@ def convert_nextflow_to_json(self, "storageMode": storage_mode, "revision": revision_block, "profile": nextflow_profile, - instance: instance_type_block, - "masterInstance": { + instance: instance_type_block + } + if execution_platform != 'hpc': + params['masterInstance'] = { "requestedInstance": { "type": instance_type, "asSpot": False } } - } return params def send_job(self, @@ -432,6 +436,7 @@ def send_job(self, storage_mode='regular', lustre_size=1200, execution_platform='aws', + hpc_id=None, workflow_type='nextflow', cromwell_id=None, cost_limit=30.0, @@ -478,8 +483,10 @@ def send_job(self, lustre_size : int The lustre storage to be used when --storage-mode=lustre, in GB. It should be 1200 or a multiple of it. - execution_platform : string ['aws'|'azure'] + execution_platform : string ['aws'|'azure'|'hpc'] The execution platform implemented in your CloudOS. + hpc_id : string + The ID of your HPC in CloudOS. workflow_type : str The type of workflow to run. Either 'nextflow' or 'wdl'. cromwell_id : str @@ -524,6 +531,7 @@ def send_job(self, storage_mode, lustre_size, execution_platform, + hpc_id, workflow_type, cromwell_id, cost_limit) diff --git a/tests/test_data/convert_nextflow_to_json_params.json b/tests/test_data/convert_nextflow_to_json_params.json index 9bec671..c0f841f 100644 --- a/tests/test_data/convert_nextflow_to_json_params.json +++ b/tests/test_data/convert_nextflow_to_json_params.json @@ -1 +1 @@ -{"parameters": [{"prefix": "--", "name": "reads", "parameterKind": "textValue", "textValue": "s3://lifebit-featured-datasets/pipelines/rnatoy-data"}, {"prefix": "--", "name": "genome", "parameterKind": "textValue", "textValue": "s3://lifebit-featured-datasets/pipelines/rnatoy-data/ggal_1_48850000_49020000.Ggal71.500bpflank.fa"}, {"prefix": "--", "name": "annot", "parameterKind": "textValue", "textValue": "s3://lifebit-featured-datasets/pipelines/rnatoy-data/ggal_1_48850000_49020000.bed.gff"}], "project": "6054754029b82f0112762b9c", "workflow": "60b0ca54303ee601a69b42d1", "name": "new_job", "resumable": true, "batch": {"dockerLogin": false, "enabled": false, "jobQueue": null}, "cromwellCloudResources": null, "executionPlatform": "aws", "storageSizeInGb": 500, "execution": {"computeCostLimit": -1, "optim": "test"}, "lusterFsxStorageSizeInGb": 1200, "storageMode": "regular", "revision": "", "profile": null, "spotInstances": {"instanceType": "c5.xlarge", "onDemandFallbackInstanceType": "c5.xlarge"}, "masterInstance": {"requestedInstance": {"type": "c5.xlarge", "asSpot": false}}} +{"parameters": [{"prefix": "--", "name": "reads", "parameterKind": "textValue", "textValue": "s3://lifebit-featured-datasets/pipelines/rnatoy-data"}, {"prefix": "--", "name": "genome", "parameterKind": "textValue", "textValue": "s3://lifebit-featured-datasets/pipelines/rnatoy-data/ggal_1_48850000_49020000.Ggal71.500bpflank.fa"}, {"prefix": "--", "name": "annot", "parameterKind": "textValue", "textValue": "s3://lifebit-featured-datasets/pipelines/rnatoy-data/ggal_1_48850000_49020000.bed.gff"}], "project": "6054754029b82f0112762b9c", "workflow": "60b0ca54303ee601a69b42d1", "name": "new_job", "resumable": true, "batch": {"dockerLogin": false, "enabled": false, "jobQueue": null}, "cromwellCloudResources": null, "executionPlatform": "aws", "hpc": null ,"storageSizeInGb": 500, "execution": {"computeCostLimit": -1, "optim": "test"}, "lusterFsxStorageSizeInGb": 1200, "storageMode": "regular", "revision": "", "profile": null, "spotInstances": {"instanceType": "c5.xlarge", "onDemandFallbackInstanceType": "c5.xlarge"}, "masterInstance": {"requestedInstance": {"type": "c5.xlarge", "asSpot": false}}} diff --git a/tests/test_jobs/test_convert_nextflow_to_json.py b/tests/test_jobs/test_convert_nextflow_to_json.py index e48341c..46fdc27 100644 --- a/tests/test_jobs/test_convert_nextflow_to_json.py +++ b/tests/test_jobs/test_convert_nextflow_to_json.py @@ -24,6 +24,7 @@ "storage_mode": 'regular', "lustre_size": 1200, "execution_platform": "aws", + "hpc_id": None, "workflow_type": 'nextflow', "cromwell_id": None, "cost_limit": -1 @@ -50,6 +51,7 @@ def test_convert_nextflow_to_json_output_correct(): storage_mode=param_dict["storage_mode"], lustre_size=param_dict["lustre_size"], execution_platform=param_dict["execution_platform"], + hpc_id=param_dict["hpc_id"], workflow_type=param_dict["workflow_type"], cromwell_id=param_dict["cromwell_id"], cost_limit=param_dict["cost_limit"] @@ -81,6 +83,7 @@ def test_convert_nextflow_to_json_badly_formed_config(): storage_mode=param_dict["storage_mode"], lustre_size=param_dict["lustre_size"], execution_platform=param_dict["execution_platform"], + hpc_id=param_dict["hpc_id"], workflow_type=param_dict["workflow_type"], cromwell_id=param_dict["cromwell_id"], cost_limit=param_dict["cost_limit"]