Feat: release 2.8.0 - adds hpc support (#163)

* suport hpc * update pytest * params changes * adding a param for hpc_id * update docs * minor readme update * removing wdl params * default storage mode * mini-typo in readme * review suggestions
lifebit-ai · Apr 8, 2024 · c169460 · c169460
1 parent d39754e
commit c169460
Show file tree

Hide file tree

Showing 7 changed files with 84 additions and 12 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,9 @@
 ## lifebit-ai/cloudos-cli: changelog
 
+## v2.8.0 (2024-04-05)
+
+- Adds support for using CloudOS HPC executor.
+
 ## v2.7.0 (2024-03-21)
 
 ### Feature

diff --git a/README.md b/README.md
@@ -141,9 +141,12 @@ Options:
                                   backwards compatibility.
   --repository-platform TEXT      Name of the repository platform of the
                                   workflow. Default=github.
-  --execution-platform [aws|azure]
+  --execution-platform [aws|azure|hpc]
                                   Name of the execution platform implemented
                                   in your CloudOS. Default=aws.
+  --hpc-id TEXT                   ID of your HPC, only applicable when
+                                  --execution-platform=hpc.
+                                  Default=660fae20f93358ad61e0104b
   --cost-limit FLOAT              Add a cost limit to your job. Default=30.0
                                   (For no cost limit please use -1).
   --verbose                       Whether to print information messages or
@@ -318,6 +321,35 @@ cloudos job run \
     --execution-platform azure
 ```
 
+#### HPC execution support
+
+CloudOS is also prepared to use an HPC compute infrastructure. For such cases, you will need to take into account the following for your job submissions using `cloudos job run` command:
+
+- Use the following parameter: `--execution-platform hpc`.
+- Indicate the HPC ID using: `--hpc-id XXXX`.
+
+Example command:
+
+```bash
+cloudos job run \
+    --cloudos-url $CLOUDOS \
+    --apikey $MY_API_KEY \
+    --workspace-id $WORKSPACE_ID \
+    --project-name "$PROJECT_NAME" \
+    --workflow-name $WORKFLOW_NAME \
+    --job-config $JOB_PARAMS \
+    --execution-platform hpc \
+    --hpc-id $YOUR_HPC_ID
+```
+
+Please, note that HPC execution do not support the following parameters and all of them will be ignored:
+
+- `--resumable`
+- `--job-queue`
+- `--instance-type` | `--instance-disk` | `--spot` | `--cost-limit`
+- `--storage-mode` | `--lustre-size`
+- `--wdl-mainfile` | `--wdl-importsfile` | `--cromwell-token`
+
 #### Check job status
 
 To check the status of a submitted job, just use the suggested command:

diff --git a/cloudos/__main__.py b/cloudos/__main__.py
@@ -190,8 +190,12 @@ def queue():
               default='github')
 @click.option('--execution-platform',
               help='Name of the execution platform implemented in your CloudOS. Default=aws.',
-              type=click.Choice(['aws', 'azure']),
+              type=click.Choice(['aws', 'azure', 'hpc']),
               default='aws')
+@click.option('--hpc-id',
+              help=('ID of your HPC, only applicable when --execution-platform=hpc. ' +
+                    'Default=660fae20f93358ad61e0104b'),
+              default='660fae20f93358ad61e0104b')
 @click.option('--cost-limit',
               help='Add a cost limit to your job. Default=30.0 (For no cost limit please use -1).',
               type=float,
@@ -238,6 +242,7 @@ def run(apikey,
         cromwell_token,
         repository_platform,
         execution_platform,
+        hpc_id,
         cost_limit,
         verbose,
         request_interval,
@@ -249,9 +254,11 @@ def run(apikey,
     if instance_type == 'NONE_SELECTED':
         if execution_platform == 'aws':
             instance_type = 'c5.xlarge'
-        if execution_platform == 'azure':
+        elif execution_platform == 'azure':
             instance_type = 'Standard_D4as_v4'
-    if execution_platform == 'azure':
+        else:
+            instance_type = None
+    if execution_platform == 'azure' or execution_platform == 'hpc':
         batch = None
         spot = None
     elif ignite:
@@ -261,10 +268,27 @@ def run(apikey,
               'CloudOS\n')
     else:
         batch = True
+    if execution_platform == 'hpc':
+        print('\nHPC execution platform selected')
+        if hpc_id is None:
+            raise ValueError('Please, specify your HPC ID using --hpc parameter')
+        print('[Message] Please, take into account that HPC execution do not support ' +
+              'the following parameters and all of them will be ignored:\n' +
+              '\t--resumable\n' +
+              '\t--job-queue\n' +
+              '\t--instance-type | --instance-disk | --spot | --cost-limit\n' +
+              '\t--storage-mode | --lustre-size\n' +
+              '\t--wdl-mainfile | --wdl-importsfile | --cromwell-token\n')
+        wdl_mainfile = None
+        wdl_importsfile = None
+        storage_mode = 'regular'
     if verbose:
         print('\t...Detecting workflow type')
     cl = Cloudos(cloudos_url, apikey, cromwell_token)
     workflow_type = cl.detect_workflow(workflow_name, workspace_id, verify_ssl)
+    if execution_platform == 'hpc' and workflow_type == 'wdl':
+        raise ValueError(f'The workflow {workflow_name} is a WDL workflow. ' +
+                         'WDL is not supported on HPC execution platform.')
     if workflow_type == 'wdl':
         print('\tWDL workflow detected\n')
         if wdl_mainfile is None:
@@ -325,6 +349,7 @@ def run(apikey,
                       storage_mode=storage_mode,
                       lustre_size=lustre_size,
                       execution_platform=execution_platform,
+                      hpc_id=hpc_id,
                       workflow_type=workflow_type,
                       cromwell_id=cromwell_id,
                       cost_limit=cost_limit,

diff --git a/cloudos/_version.py b/cloudos/_version.py
@@ -1 +1 @@
-__version__ = '2.7.0'
+__version__ = '2.8.0'
diff --git a/cloudos/jobs/job.py b/cloudos/jobs/job.py
@@ -199,6 +199,7 @@ def convert_nextflow_to_json(self,
                                  storage_mode,
                                  lustre_size,
                                  execution_platform,
+                                 hpc_id,
                                  workflow_type,
                                  cromwell_id,
                                  cost_limit):
@@ -248,8 +249,10 @@ def convert_nextflow_to_json(self,
         lustre_size : int
             The lustre storage to be used when --storage-mode=lustre, in GB. It should be 1200 or
             a multiple of it.
-        execution_platform : string ['aws'|'azure']
+        execution_platform : string ['aws'|'azure'|'hpc']
             The execution platform implemented in your CloudOS.
+        hpc_id : string
+            The ID of your HPC in CloudOS.
         workflow_type : str
             The type of workflow to run. Either 'nextflow' or 'wdl'.
         cromwell_id : str
@@ -382,7 +385,6 @@ def convert_nextflow_to_json(self,
         if storage_mode not in ['lustre', 'regular']:
             raise ValueError('Please, use either \'lustre\' or \'regular\' for --storage-mode ' +
                              f'{storage_mode} is not allowed')
-
         params = {
             "parameters": workflow_params,
             "project": project_id,
@@ -396,6 +398,7 @@ def convert_nextflow_to_json(self,
             },
             "cromwellCloudResources": cromwell_id,
             "executionPlatform": execution_platform,
+            "hpc": hpc_id,
             "storageSizeInGb": instance_disk,
             "execution": {
                 "computeCostLimit": cost_limit,
@@ -405,14 +408,15 @@ def convert_nextflow_to_json(self,
             "storageMode": storage_mode,
             "revision": revision_block,
             "profile": nextflow_profile,
-            instance: instance_type_block,
-            "masterInstance": {
+            instance: instance_type_block
+        }
+        if execution_platform != 'hpc':
+            params['masterInstance'] = {
                 "requestedInstance": {
                     "type": instance_type,
                     "asSpot": False
                 }
             }
-        }
         return params
 
     def send_job(self,
@@ -432,6 +436,7 @@ def send_job(self,
                  storage_mode='regular',
                  lustre_size=1200,
                  execution_platform='aws',
+                 hpc_id=None,
                  workflow_type='nextflow',
                  cromwell_id=None,
                  cost_limit=30.0,
@@ -478,8 +483,10 @@ def send_job(self,
         lustre_size : int
             The lustre storage to be used when --storage-mode=lustre, in GB. It should be 1200 or
             a multiple of it.
-        execution_platform : string ['aws'|'azure']
+        execution_platform : string ['aws'|'azure'|'hpc']
             The execution platform implemented in your CloudOS.
+        hpc_id : string
+            The ID of your HPC in CloudOS.
         workflow_type : str
             The type of workflow to run. Either 'nextflow' or 'wdl'.
         cromwell_id : str
@@ -524,6 +531,7 @@ def send_job(self,
                                                storage_mode,
                                                lustre_size,
                                                execution_platform,
+                                               hpc_id,
                                                workflow_type,
                                                cromwell_id,
                                                cost_limit)

diff --git a/tests/test_data/convert_nextflow_to_json_params.json b/tests/test_data/convert_nextflow_to_json_params.json
@@ -1 +1 @@
-{"parameters": [{"prefix": "--", "name": "reads", "parameterKind": "textValue", "textValue": "s3://lifebit-featured-datasets/pipelines/rnatoy-data"}, {"prefix": "--", "name": "genome", "parameterKind": "textValue", "textValue": "s3://lifebit-featured-datasets/pipelines/rnatoy-data/ggal_1_48850000_49020000.Ggal71.500bpflank.fa"}, {"prefix": "--", "name": "annot", "parameterKind": "textValue", "textValue": "s3://lifebit-featured-datasets/pipelines/rnatoy-data/ggal_1_48850000_49020000.bed.gff"}], "project": "6054754029b82f0112762b9c", "workflow": "60b0ca54303ee601a69b42d1", "name": "new_job", "resumable": true, "batch": {"dockerLogin": false, "enabled": false, "jobQueue": null}, "cromwellCloudResources": null, "executionPlatform": "aws", "storageSizeInGb": 500, "execution": {"computeCostLimit": -1, "optim": "test"}, "lusterFsxStorageSizeInGb": 1200, "storageMode": "regular", "revision": "", "profile": null, "spotInstances": {"instanceType": "c5.xlarge", "onDemandFallbackInstanceType": "c5.xlarge"}, "masterInstance": {"requestedInstance": {"type": "c5.xlarge", "asSpot": false}}}
+{"parameters": [{"prefix": "--", "name": "reads", "parameterKind": "textValue", "textValue": "s3://lifebit-featured-datasets/pipelines/rnatoy-data"}, {"prefix": "--", "name": "genome", "parameterKind": "textValue", "textValue": "s3://lifebit-featured-datasets/pipelines/rnatoy-data/ggal_1_48850000_49020000.Ggal71.500bpflank.fa"}, {"prefix": "--", "name": "annot", "parameterKind": "textValue", "textValue": "s3://lifebit-featured-datasets/pipelines/rnatoy-data/ggal_1_48850000_49020000.bed.gff"}], "project": "6054754029b82f0112762b9c", "workflow": "60b0ca54303ee601a69b42d1", "name": "new_job", "resumable": true, "batch": {"dockerLogin": false, "enabled": false, "jobQueue": null}, "cromwellCloudResources": null, "executionPlatform": "aws", "hpc": null ,"storageSizeInGb": 500, "execution": {"computeCostLimit": -1, "optim": "test"}, "lusterFsxStorageSizeInGb": 1200, "storageMode": "regular", "revision": "", "profile": null, "spotInstances": {"instanceType": "c5.xlarge", "onDemandFallbackInstanceType": "c5.xlarge"}, "masterInstance": {"requestedInstance": {"type": "c5.xlarge", "asSpot": false}}}
diff --git a/tests/test_jobs/test_convert_nextflow_to_json.py b/tests/test_jobs/test_convert_nextflow_to_json.py
@@ -24,6 +24,7 @@
     "storage_mode": 'regular',
     "lustre_size": 1200,
     "execution_platform": "aws",
+    "hpc_id": None,
     "workflow_type": 'nextflow',
     "cromwell_id": None,
     "cost_limit": -1
@@ -50,6 +51,7 @@ def test_convert_nextflow_to_json_output_correct():
         storage_mode=param_dict["storage_mode"],
         lustre_size=param_dict["lustre_size"],
         execution_platform=param_dict["execution_platform"],
+        hpc_id=param_dict["hpc_id"],
         workflow_type=param_dict["workflow_type"],
         cromwell_id=param_dict["cromwell_id"],
         cost_limit=param_dict["cost_limit"]
@@ -81,6 +83,7 @@ def test_convert_nextflow_to_json_badly_formed_config():
             storage_mode=param_dict["storage_mode"],
             lustre_size=param_dict["lustre_size"],
             execution_platform=param_dict["execution_platform"],
+            hpc_id=param_dict["hpc_id"],
             workflow_type=param_dict["workflow_type"],
             cromwell_id=param_dict["cromwell_id"],
             cost_limit=param_dict["cost_limit"]
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		{"parameters": [{"prefix": "--", "name": "reads", "parameterKind": "textValue", "textValue": "s3://lifebit-featured-datasets/pipelines/rnatoy-data"}, {"prefix": "--", "name": "genome", "parameterKind": "textValue", "textValue": "s3://lifebit-featured-datasets/pipelines/rnatoy-data/ggal_1_48850000_49020000.Ggal71.500bpflank.fa"}, {"prefix": "--", "name": "annot", "parameterKind": "textValue", "textValue": "s3://lifebit-featured-datasets/pipelines/rnatoy-data/ggal_1_48850000_49020000.bed.gff"}], "project": "6054754029b82f0112762b9c", "workflow": "60b0ca54303ee601a69b42d1", "name": "new_job", "resumable": true, "batch": {"dockerLogin": false, "enabled": false, "jobQueue": null}, "cromwellCloudResources": null, "executionPlatform": "aws", "storageSizeInGb": 500, "execution": {"computeCostLimit": -1, "optim": "test"}, "lusterFsxStorageSizeInGb": 1200, "storageMode": "regular", "revision": "", "profile": null, "spotInstances": {"instanceType": "c5.xlarge", "onDemandFallbackInstanceType": "c5.xlarge"}, "masterInstance": {"requestedInstance": {"type": "c5.xlarge", "asSpot": false}}}
		{"parameters": [{"prefix": "--", "name": "reads", "parameterKind": "textValue", "textValue": "s3://lifebit-featured-datasets/pipelines/rnatoy-data"}, {"prefix": "--", "name": "genome", "parameterKind": "textValue", "textValue": "s3://lifebit-featured-datasets/pipelines/rnatoy-data/ggal_1_48850000_49020000.Ggal71.500bpflank.fa"}, {"prefix": "--", "name": "annot", "parameterKind": "textValue", "textValue": "s3://lifebit-featured-datasets/pipelines/rnatoy-data/ggal_1_48850000_49020000.bed.gff"}], "project": "6054754029b82f0112762b9c", "workflow": "60b0ca54303ee601a69b42d1", "name": "new_job", "resumable": true, "batch": {"dockerLogin": false, "enabled": false, "jobQueue": null}, "cromwellCloudResources": null, "executionPlatform": "aws", "hpc": null ,"storageSizeInGb": 500, "execution": {"computeCostLimit": -1, "optim": "test"}, "lusterFsxStorageSizeInGb": 1200, "storageMode": "regular", "revision": "", "profile": null, "spotInstances": {"instanceType": "c5.xlarge", "onDemandFallbackInstanceType": "c5.xlarge"}, "masterInstance": {"requestedInstance": {"type": "c5.xlarge", "asSpot": false}}}