From 3266a5bbb82896602e019679cdb9fab9a40f9975 Mon Sep 17 00:00:00 2001 From: Ukjae Jeong Date: Mon, 21 Oct 2024 10:53:24 +0100 Subject: [PATCH] Fix some argo lint errors in examples folder (#1244) **Pull Request Checklist** - [ ] ~~Fixes~~ #1163 - [ ] ~~Tests added~~ - [ ] ~~Documentation/examples added~~ - [x] [Good commit messages](https://cbea.ms/git-commit/) and/or PR title **Description of PR** Fixed argo lint errors in following dirs: - examples/workflows/use-case - examples/workflows/scripts - examples/workflows/misc --------- Signed-off-by: Ukjae Jeong --- docs/examples/workflows/misc/data.md | 3 +- docs/examples/workflows/misc/env.md | 2 ++ docs/examples/workflows/misc/env_from.md | 2 ++ docs/examples/workflows/misc/http.md | 6 ++-- docs/examples/workflows/misc/suspend.md | 3 +- .../workflows/scripts/callable_script.md | 3 +- .../workflows/scripts/callable_script_v1.md | 3 +- .../workflows/use-cases/fine_tune_llama.md | 12 +++----- .../workflows/use-cases/map_reduce.md | 19 ++++++++++--- .../use-cases/spacy_inference_pipeline.md | 12 ++++---- docs/examples/workflows/use-cases/spark.md | 28 +++++++++---------- examples/workflows/misc/data.py | 2 +- examples/workflows/misc/data.yaml | 1 + examples/workflows/misc/env-from.yaml | 1 + examples/workflows/misc/env.py | 1 + examples/workflows/misc/env.yaml | 1 + examples/workflows/misc/env_from.py | 1 + examples/workflows/misc/http.py | 4 +-- examples/workflows/misc/http.yaml | 2 ++ examples/workflows/misc/suspend.py | 2 +- examples/workflows/misc/suspend.yaml | 1 + .../workflows/scripts/callable-script-v1.yaml | 1 + .../workflows/scripts/callable-script.yaml | 1 + examples/workflows/scripts/callable_script.py | 2 +- .../workflows/scripts/callable_script_v1.py | 2 +- .../workflows/use_cases/fine-tune-llama.yaml | 2 -- .../workflows/use_cases/fine_tune_llama.py | 10 +++---- examples/workflows/use_cases/map-reduce.yaml | 7 ++++- examples/workflows/use_cases/map_reduce.py | 12 ++++++-- .../use_cases/spacy-inference-pipeline.yaml | 6 ++-- .../use_cases/spacy_inference_pipeline.py | 6 ++-- examples/workflows/use_cases/spark.py | 8 +++--- examples/workflows/use_cases/spark.yaml | 20 ++++++------- 33 files changed, 110 insertions(+), 76 deletions(-) diff --git a/docs/examples/workflows/misc/data.md b/docs/examples/workflows/misc/data.md index 856b60fd9..46f4f6f83 100644 --- a/docs/examples/workflows/misc/data.md +++ b/docs/examples/workflows/misc/data.md @@ -16,7 +16,7 @@ Workflow, ) - with Workflow(generate_name="data-") as w: + with Workflow(generate_name="data-", entrypoint="list-log-files") as w: Data( name="list-log-files", source=S3Artifact(name="test-bucket", bucket="my-bucket"), @@ -33,6 +33,7 @@ metadata: generateName: data- spec: + entrypoint: list-log-files templates: - data: source: diff --git a/docs/examples/workflows/misc/env.md b/docs/examples/workflows/misc/env.md index f9f7789ab..9b7c4038d 100644 --- a/docs/examples/workflows/misc/env.md +++ b/docs/examples/workflows/misc/env.md @@ -21,6 +21,7 @@ with Workflow(generate_name="secret-env-from-", entrypoint="whalesay") as w: whalesay = Container( + name="whalesay", image="docker/whalesay:latest", command=["cowsay"], env_from=[ @@ -76,5 +77,6 @@ optional: false prefix: abc image: docker/whalesay:latest + name: whalesay ``` diff --git a/docs/examples/workflows/misc/env_from.md b/docs/examples/workflows/misc/env_from.md index 8ce660c05..1697eab69 100644 --- a/docs/examples/workflows/misc/env_from.md +++ b/docs/examples/workflows/misc/env_from.md @@ -12,6 +12,7 @@ with Workflow(generate_name="secret-env-from-", entrypoint="whalesay") as w: whalesay = Container( + name="whalesay", image="docker/whalesay:latest", command=["cowsay"], env_from=[ @@ -44,5 +45,6 @@ optional: false prefix: abc image: docker/whalesay:latest + name: whalesay ``` diff --git a/docs/examples/workflows/misc/http.md b/docs/examples/workflows/misc/http.md index 239f0b659..ea0ad9497 100644 --- a/docs/examples/workflows/misc/http.md +++ b/docs/examples/workflows/misc/http.md @@ -11,10 +11,10 @@ from hera.expr import g from hera.workflows import HTTP, Parameter, Workflow - with Workflow(generate_name="http-") as w: + with Workflow(generate_name="http-", entrypoint="http") as w: HTTP( name="http", - inputs=[Parameter(name="url")], + inputs=[Parameter(name="url", value="https://example.com")], timeout_seconds=20, url=f"{g.inputs.parameters.url:$}", method="GET", @@ -32,6 +32,7 @@ metadata: generateName: http- spec: + entrypoint: http templates: - http: body: test body @@ -45,6 +46,7 @@ inputs: parameters: - name: url + value: https://example.com name: http ``` diff --git a/docs/examples/workflows/misc/suspend.md b/docs/examples/workflows/misc/suspend.md index e7bb7b873..3ed11c79f 100644 --- a/docs/examples/workflows/misc/suspend.md +++ b/docs/examples/workflows/misc/suspend.md @@ -10,7 +10,7 @@ ```python linenums="1" from hera.workflows import Parameter, Suspend, Workflow - with Workflow(generate_name="suspend-") as w: + with Workflow(generate_name="suspend-", entrypoint="suspend-without-duration") as w: Suspend(name="suspend-without-duration") Suspend(name="suspend-with-duration", duration=30) Suspend( @@ -31,6 +31,7 @@ metadata: generateName: suspend- spec: + entrypoint: suspend-without-duration templates: - name: suspend-without-duration suspend: {} diff --git a/docs/examples/workflows/scripts/callable_script.md b/docs/examples/workflows/scripts/callable_script.md index e42700523..c625001a7 100644 --- a/docs/examples/workflows/scripts/callable_script.md +++ b/docs/examples/workflows/scripts/callable_script.md @@ -92,7 +92,7 @@ return Output(output=[annotated_input_value]) - with Workflow(name="my-workflow") as w: + with Workflow(name="my-workflow", entrypoint="my-steps") as w: with Steps(name="my-steps") as s: my_function(arguments={"input": Input(a=2, b="bar", c=42)}) str_function(arguments={"input": serialize(Input(a=2, b="bar", c=42))}) @@ -109,6 +109,7 @@ metadata: name: my-workflow spec: + entrypoint: my-steps templates: - name: my-steps steps: diff --git a/docs/examples/workflows/scripts/callable_script_v1.md b/docs/examples/workflows/scripts/callable_script_v1.md index 83c898d0c..861c3e1b4 100644 --- a/docs/examples/workflows/scripts/callable_script_v1.md +++ b/docs/examples/workflows/scripts/callable_script_v1.md @@ -95,7 +95,7 @@ return Output(output=[annotated_input_value]) - with Workflow(name="my-workflow") as w: + with Workflow(name="my-workflow", entrypoint="my-steps") as w: with Steps(name="my-steps") as s: my_function(arguments={"input": Input(a=2, b="bar", c=42)}) str_function(arguments={"input": serialize(Input(a=2, b="bar", c=42))}) @@ -112,6 +112,7 @@ metadata: name: my-workflow spec: + entrypoint: my-steps templates: - name: my-steps steps: diff --git a/docs/examples/workflows/use-cases/fine_tune_llama.md b/docs/examples/workflows/use-cases/fine_tune_llama.md index 0c6bd1c6e..4e3397302 100644 --- a/docs/examples/workflows/use-cases/fine_tune_llama.md +++ b/docs/examples/workflows/use-cases/fine_tune_llama.md @@ -39,10 +39,10 @@ There are several implicit dependencies in this script: NUM_NODES = 4 """`create_ssd_storage_class` defines the K8s storage class required for an ssd that's created dynamically. - + K8s will create the necessary PersistentVolumeClaim and PersistentVolume resources when a pod requests a volume - rather than when the PVC/PV are _defined_. This helps avoid the risk of pod + volume zone mismatches. Note that this - was tested in GCP / GKE specifically. If you have a different cloud provider you have to change the `provisioner` + rather than when the PVC/PV are _defined_. This helps avoid the risk of pod + volume zone mismatches. Note that this + was tested in GCP / GKE specifically. If you have a different cloud provider you have to change the `provisioner` field. """ create_ssd_storage_class = Resource( @@ -144,7 +144,7 @@ There are several implicit dependencies in this script: """The delete resource removes the etcd client load balancer and the stateful set. - Useful for cases when you want to dynamically spin up an etcd cluster and then delete it after the client application + Useful for cases when you want to dynamically spin up an etcd cluster and then delete it after the client application is done. """ delete_etcd_resources = Resource( @@ -244,8 +244,6 @@ There are several implicit dependencies in this script: mount_path="/kubecon_na_23_llama2_finetune/finetune", name="{{inputs.parameters.node-vol}}", ), - # in addition, we set a volume mount for the empty dir volume that we use for communication between GPUs - m.VolumeMount(mount_path="/dev/shm", name="gpu-comm"), ], ) @@ -540,8 +538,6 @@ There are several implicit dependencies in this script: name: '{{inputs.parameters.node-vol}}' - mountPath: /dev/shm name: gpu-comm - - mountPath: /dev/shm - name: gpu-comm inputs: parameters: - name: rdvz-id diff --git a/docs/examples/workflows/use-cases/map_reduce.md b/docs/examples/workflows/use-cases/map_reduce.md index a3e18914c..3cd52e292 100644 --- a/docs/examples/workflows/use-cases/map_reduce.md +++ b/docs/examples/workflows/use-cases/map_reduce.md @@ -34,7 +34,10 @@ See the upstream example [here](https://github.com/argoproj/argo-workflows/blob/ @script( image="python:alpine3.6", - inputs=Artifact(name="part", path="/mnt/in/part.json"), + inputs=[ + Parameter(name="part_id"), + Artifact(name="part", path="/mnt/in/part.json"), + ], outputs=S3Artifact( name="part", path="/mnt/out/part.json", @@ -76,10 +79,13 @@ See the upstream example [here](https://github.com/argoproj/argo-workflows/blob/ with Workflow(generate_name="map-reduce-", entrypoint="main", arguments=Parameter(name="num_parts", value="4")) as w: with DAG(name="main"): - s = split(arguments=Parameter(name="num_parts", value="{{workflow.parameters.numParts}}")) + s = split(arguments=Parameter(name="num_parts", value="{{workflow.parameters.num_parts}}")) m = map_( with_param=s.result, - arguments=S3Artifact(name="part", key="{{workflow.name}}/parts/{{item}}.json"), + arguments=[ + Parameter(name="part_id", value="{{item}}"), + S3Artifact(name="part", key="{{workflow.name}}/parts/{{item}}.json"), + ], ) s >> m >> reduce() ``` @@ -103,7 +109,7 @@ See the upstream example [here](https://github.com/argoproj/argo-workflows/blob/ - arguments: parameters: - name: num_parts - value: '{{workflow.parameters.numParts}}' + value: '{{workflow.parameters.num_parts}}' name: split template: split - arguments: @@ -111,6 +117,9 @@ See the upstream example [here](https://github.com/argoproj/argo-workflows/blob/ - name: part s3: key: '{{workflow.name}}/parts/{{item}}.json' + parameters: + - name: part_id + value: '{{item}}' depends: split name: map- template: map- @@ -156,6 +165,8 @@ See the upstream example [here](https://github.com/argoproj/argo-workflows/blob/ artifacts: - name: part path: /mnt/in/part.json + parameters: + - name: part_id name: map- outputs: artifacts: diff --git a/docs/examples/workflows/use-cases/spacy_inference_pipeline.md b/docs/examples/workflows/use-cases/spacy_inference_pipeline.md index e2d786990..35299761a 100644 --- a/docs/examples/workflows/use-cases/spacy_inference_pipeline.md +++ b/docs/examples/workflows/use-cases/spacy_inference_pipeline.md @@ -111,13 +111,13 @@ Step 2: Performs inference on the dataset in the volume path /mnt/data using Spa with Workflow( - generate_name="spacy_inference_pipeline-", - entrypoint="spacy_inference_pipeline", + generate_name="spacy-inference-pipeline-", + entrypoint="spacy-inference-pipeline", volumes=[Volume(name="data-dir", size="1Gi", mount_path="/mnt/data")], service_account_name="hera", namespace="argo", ) as w: - with Steps(name="spacy_inference_pipeline") as steps: + with Steps(name="spacy-inference-pipeline") as steps: data_prep(name="data-prep") inference_spacy(name="inference-spacy") ``` @@ -128,13 +128,13 @@ Step 2: Performs inference on the dataset in the volume path /mnt/data using Spa apiVersion: argoproj.io/v1alpha1 kind: Workflow metadata: - generateName: spacy_inference_pipeline- + generateName: spacy-inference-pipeline- namespace: argo spec: - entrypoint: spacy_inference_pipeline + entrypoint: spacy-inference-pipeline serviceAccountName: hera templates: - - name: spacy_inference_pipeline + - name: spacy-inference-pipeline steps: - - name: data-prep template: data-prep diff --git a/docs/examples/workflows/use-cases/spark.md b/docs/examples/workflows/use-cases/spark.md index b2803fef9..9d7f76874 100644 --- a/docs/examples/workflows/use-cases/spark.md +++ b/docs/examples/workflows/use-cases/spark.md @@ -15,7 +15,7 @@ compares a regular Pandas dataframe with a Spark dataframe. Inspired by: https:/ @script(image="jupyter/pyspark-notebook:latest", resources=Resources(cpu_request=4, memory_request="8Gi")) - def spark(n: int) -> None: + def spark(num_data: int) -> None: import random import subprocess import time @@ -33,7 +33,7 @@ compares a regular Pandas dataframe with a Spark dataframe. Inspired by: https:/ spark = SparkSession.builder.master("local[1]").appName("my-spark-example-running-in-hera.com").getOrCreate() # let's compare a regular dataframe vs a spark dataframe! First, we define the data to use - data, columns = [random.randint(0, n) for _ in range(n)], ["value"] + data, columns = [random.randint(0, num_data) for _ in range(num_data)], ["value"] # as a very simple and naive comparison, let's print out the average, min, and max of both dataframes # and now let's create a regular Pandas dataframe @@ -58,8 +58,8 @@ compares a regular Pandas dataframe with a Spark dataframe. Inspired by: https:/ with Workflow(generate_name="spark-", entrypoint="d") as w: with DAG(name="d"): - for i, n in enumerate([1_000, 10_000, 100_000, 1_000_000, 10_000_000, 100_000_000]): - spark(name="spark-{i}".format(i=i), arguments={"n": n}) + for i, num_data in enumerate([1_000, 10_000, 100_000, 1_000_000, 10_000_000, 100_000_000]): + spark(name="spark-{i}".format(i=i), arguments={"num_data": num_data}) ``` === "YAML" @@ -76,44 +76,44 @@ compares a regular Pandas dataframe with a Spark dataframe. Inspired by: https:/ tasks: - arguments: parameters: - - name: n + - name: num_data value: '1000' name: spark-0 template: spark - arguments: parameters: - - name: n + - name: num_data value: '10000' name: spark-1 template: spark - arguments: parameters: - - name: n + - name: num_data value: '100000' name: spark-2 template: spark - arguments: parameters: - - name: n + - name: num_data value: '1000000' name: spark-3 template: spark - arguments: parameters: - - name: n + - name: num_data value: '10000000' name: spark-4 template: spark - arguments: parameters: - - name: n + - name: num_data value: '100000000' name: spark-5 template: spark name: d - inputs: parameters: - - name: n + - name: num_data name: spark script: command: @@ -128,8 +128,8 @@ compares a regular Pandas dataframe with a Spark dataframe. Inspired by: https:/ import sys sys.path.append(os.getcwd()) import json - try: n = json.loads(r'''{{inputs.parameters.n}}''') - except: n = r'''{{inputs.parameters.n}}''' + try: num_data = json.loads(r'''{{inputs.parameters.num_data}}''') + except: num_data = r'''{{inputs.parameters.num_data}}''' import random import subprocess @@ -138,7 +138,7 @@ compares a regular Pandas dataframe with a Spark dataframe. Inspired by: https:/ import pandas as pd from pyspark.sql import SparkSession spark = SparkSession.builder.master('local[1]').appName('my-spark-example-running-in-hera.com').getOrCreate() - (data, columns) = ([random.randint(0, n) for _ in range(n)], ['value']) + (data, columns) = ([random.randint(0, num_data) for _ in range(num_data)], ['value']) pandas_df = pd.DataFrame(data=data, columns=columns) start = time.time() pandas_result = pandas_df.describe() diff --git a/examples/workflows/misc/data.py b/examples/workflows/misc/data.py index b43f0e7fd..a610095a4 100644 --- a/examples/workflows/misc/data.py +++ b/examples/workflows/misc/data.py @@ -6,7 +6,7 @@ Workflow, ) -with Workflow(generate_name="data-") as w: +with Workflow(generate_name="data-", entrypoint="list-log-files") as w: Data( name="list-log-files", source=S3Artifact(name="test-bucket", bucket="my-bucket"), diff --git a/examples/workflows/misc/data.yaml b/examples/workflows/misc/data.yaml index 6df5d2073..198902d4e 100644 --- a/examples/workflows/misc/data.yaml +++ b/examples/workflows/misc/data.yaml @@ -3,6 +3,7 @@ kind: Workflow metadata: generateName: data- spec: + entrypoint: list-log-files templates: - data: source: diff --git a/examples/workflows/misc/env-from.yaml b/examples/workflows/misc/env-from.yaml index 4410c5934..7a89fb962 100644 --- a/examples/workflows/misc/env-from.yaml +++ b/examples/workflows/misc/env-from.yaml @@ -18,3 +18,4 @@ spec: optional: false prefix: abc image: docker/whalesay:latest + name: whalesay diff --git a/examples/workflows/misc/env.py b/examples/workflows/misc/env.py index cdf1c2995..b577ab224 100644 --- a/examples/workflows/misc/env.py +++ b/examples/workflows/misc/env.py @@ -11,6 +11,7 @@ with Workflow(generate_name="secret-env-from-", entrypoint="whalesay") as w: whalesay = Container( + name="whalesay", image="docker/whalesay:latest", command=["cowsay"], env_from=[ diff --git a/examples/workflows/misc/env.yaml b/examples/workflows/misc/env.yaml index 0dd6dd594..3c332d072 100644 --- a/examples/workflows/misc/env.yaml +++ b/examples/workflows/misc/env.yaml @@ -35,3 +35,4 @@ spec: optional: false prefix: abc image: docker/whalesay:latest + name: whalesay diff --git a/examples/workflows/misc/env_from.py b/examples/workflows/misc/env_from.py index 560264d56..2b276841b 100644 --- a/examples/workflows/misc/env_from.py +++ b/examples/workflows/misc/env_from.py @@ -2,6 +2,7 @@ with Workflow(generate_name="secret-env-from-", entrypoint="whalesay") as w: whalesay = Container( + name="whalesay", image="docker/whalesay:latest", command=["cowsay"], env_from=[ diff --git a/examples/workflows/misc/http.py b/examples/workflows/misc/http.py index 260beab14..d00027e76 100644 --- a/examples/workflows/misc/http.py +++ b/examples/workflows/misc/http.py @@ -1,10 +1,10 @@ from hera.expr import g from hera.workflows import HTTP, Parameter, Workflow -with Workflow(generate_name="http-") as w: +with Workflow(generate_name="http-", entrypoint="http") as w: HTTP( name="http", - inputs=[Parameter(name="url")], + inputs=[Parameter(name="url", value="https://example.com")], timeout_seconds=20, url=f"{g.inputs.parameters.url:$}", method="GET", diff --git a/examples/workflows/misc/http.yaml b/examples/workflows/misc/http.yaml index 6f877365e..9d9353969 100644 --- a/examples/workflows/misc/http.yaml +++ b/examples/workflows/misc/http.yaml @@ -3,6 +3,7 @@ kind: Workflow metadata: generateName: http- spec: + entrypoint: http templates: - http: body: test body @@ -16,4 +17,5 @@ spec: inputs: parameters: - name: url + value: https://example.com name: http diff --git a/examples/workflows/misc/suspend.py b/examples/workflows/misc/suspend.py index 5ec92d227..dd780eee8 100644 --- a/examples/workflows/misc/suspend.py +++ b/examples/workflows/misc/suspend.py @@ -1,6 +1,6 @@ from hera.workflows import Parameter, Suspend, Workflow -with Workflow(generate_name="suspend-") as w: +with Workflow(generate_name="suspend-", entrypoint="suspend-without-duration") as w: Suspend(name="suspend-without-duration") Suspend(name="suspend-with-duration", duration=30) Suspend( diff --git a/examples/workflows/misc/suspend.yaml b/examples/workflows/misc/suspend.yaml index 6bfbe7f6f..5cdddf8ce 100644 --- a/examples/workflows/misc/suspend.yaml +++ b/examples/workflows/misc/suspend.yaml @@ -3,6 +3,7 @@ kind: Workflow metadata: generateName: suspend- spec: + entrypoint: suspend-without-duration templates: - name: suspend-without-duration suspend: {} diff --git a/examples/workflows/scripts/callable-script-v1.yaml b/examples/workflows/scripts/callable-script-v1.yaml index 9e9f2144f..d709e4fbf 100644 --- a/examples/workflows/scripts/callable-script-v1.yaml +++ b/examples/workflows/scripts/callable-script-v1.yaml @@ -3,6 +3,7 @@ kind: Workflow metadata: name: my-workflow spec: + entrypoint: my-steps templates: - name: my-steps steps: diff --git a/examples/workflows/scripts/callable-script.yaml b/examples/workflows/scripts/callable-script.yaml index 8be25a25d..37cf41a86 100644 --- a/examples/workflows/scripts/callable-script.yaml +++ b/examples/workflows/scripts/callable-script.yaml @@ -3,6 +3,7 @@ kind: Workflow metadata: name: my-workflow spec: + entrypoint: my-steps templates: - name: my-steps steps: diff --git a/examples/workflows/scripts/callable_script.py b/examples/workflows/scripts/callable_script.py index 424d24066..064120392 100644 --- a/examples/workflows/scripts/callable_script.py +++ b/examples/workflows/scripts/callable_script.py @@ -82,7 +82,7 @@ def function_kebab_object(annotated_input_value: Annotated[Input, Parameter(name return Output(output=[annotated_input_value]) -with Workflow(name="my-workflow") as w: +with Workflow(name="my-workflow", entrypoint="my-steps") as w: with Steps(name="my-steps") as s: my_function(arguments={"input": Input(a=2, b="bar", c=42)}) str_function(arguments={"input": serialize(Input(a=2, b="bar", c=42))}) diff --git a/examples/workflows/scripts/callable_script_v1.py b/examples/workflows/scripts/callable_script_v1.py index beb7ab35b..a60243e12 100644 --- a/examples/workflows/scripts/callable_script_v1.py +++ b/examples/workflows/scripts/callable_script_v1.py @@ -85,7 +85,7 @@ def function_kebab_object(annotated_input_value: Annotated[Input, Parameter(name return Output(output=[annotated_input_value]) -with Workflow(name="my-workflow") as w: +with Workflow(name="my-workflow", entrypoint="my-steps") as w: with Steps(name="my-steps") as s: my_function(arguments={"input": Input(a=2, b="bar", c=42)}) str_function(arguments={"input": serialize(Input(a=2, b="bar", c=42))}) diff --git a/examples/workflows/use_cases/fine-tune-llama.yaml b/examples/workflows/use_cases/fine-tune-llama.yaml index a44b03d97..fbd349128 100644 --- a/examples/workflows/use_cases/fine-tune-llama.yaml +++ b/examples/workflows/use_cases/fine-tune-llama.yaml @@ -226,8 +226,6 @@ spec: name: '{{inputs.parameters.node-vol}}' - mountPath: /dev/shm name: gpu-comm - - mountPath: /dev/shm - name: gpu-comm inputs: parameters: - name: rdvz-id diff --git a/examples/workflows/use_cases/fine_tune_llama.py b/examples/workflows/use_cases/fine_tune_llama.py index 4dd99879a..16185ac47 100644 --- a/examples/workflows/use_cases/fine_tune_llama.py +++ b/examples/workflows/use_cases/fine_tune_llama.py @@ -32,10 +32,10 @@ NUM_NODES = 4 """`create_ssd_storage_class` defines the K8s storage class required for an ssd that's created dynamically. - + K8s will create the necessary PersistentVolumeClaim and PersistentVolume resources when a pod requests a volume -rather than when the PVC/PV are _defined_. This helps avoid the risk of pod + volume zone mismatches. Note that this -was tested in GCP / GKE specifically. If you have a different cloud provider you have to change the `provisioner` +rather than when the PVC/PV are _defined_. This helps avoid the risk of pod + volume zone mismatches. Note that this +was tested in GCP / GKE specifically. If you have a different cloud provider you have to change the `provisioner` field. """ create_ssd_storage_class = Resource( @@ -137,7 +137,7 @@ """The delete resource removes the etcd client load balancer and the stateful set. -Useful for cases when you want to dynamically spin up an etcd cluster and then delete it after the client application +Useful for cases when you want to dynamically spin up an etcd cluster and then delete it after the client application is done. """ delete_etcd_resources = Resource( @@ -237,8 +237,6 @@ mount_path="/kubecon_na_23_llama2_finetune/finetune", name="{{inputs.parameters.node-vol}}", ), - # in addition, we set a volume mount for the empty dir volume that we use for communication between GPUs - m.VolumeMount(mount_path="/dev/shm", name="gpu-comm"), ], ) diff --git a/examples/workflows/use_cases/map-reduce.yaml b/examples/workflows/use_cases/map-reduce.yaml index 6e62af8a0..b5601cb92 100644 --- a/examples/workflows/use_cases/map-reduce.yaml +++ b/examples/workflows/use_cases/map-reduce.yaml @@ -14,7 +14,7 @@ spec: - arguments: parameters: - name: num_parts - value: '{{workflow.parameters.numParts}}' + value: '{{workflow.parameters.num_parts}}' name: split template: split - arguments: @@ -22,6 +22,9 @@ spec: - name: part s3: key: '{{workflow.name}}/parts/{{item}}.json' + parameters: + - name: part_id + value: '{{item}}' depends: split name: map- template: map- @@ -67,6 +70,8 @@ spec: artifacts: - name: part path: /mnt/in/part.json + parameters: + - name: part_id name: map- outputs: artifacts: diff --git a/examples/workflows/use_cases/map_reduce.py b/examples/workflows/use_cases/map_reduce.py index 839ac7e26..91ab51be2 100644 --- a/examples/workflows/use_cases/map_reduce.py +++ b/examples/workflows/use_cases/map_reduce.py @@ -28,7 +28,10 @@ def split(num_parts: int) -> None: @script( image="python:alpine3.6", - inputs=Artifact(name="part", path="/mnt/in/part.json"), + inputs=[ + Parameter(name="part_id"), + Artifact(name="part", path="/mnt/in/part.json"), + ], outputs=S3Artifact( name="part", path="/mnt/out/part.json", @@ -70,9 +73,12 @@ def reduce() -> None: with Workflow(generate_name="map-reduce-", entrypoint="main", arguments=Parameter(name="num_parts", value="4")) as w: with DAG(name="main"): - s = split(arguments=Parameter(name="num_parts", value="{{workflow.parameters.numParts}}")) + s = split(arguments=Parameter(name="num_parts", value="{{workflow.parameters.num_parts}}")) m = map_( with_param=s.result, - arguments=S3Artifact(name="part", key="{{workflow.name}}/parts/{{item}}.json"), + arguments=[ + Parameter(name="part_id", value="{{item}}"), + S3Artifact(name="part", key="{{workflow.name}}/parts/{{item}}.json"), + ], ) s >> m >> reduce() diff --git a/examples/workflows/use_cases/spacy-inference-pipeline.yaml b/examples/workflows/use_cases/spacy-inference-pipeline.yaml index fc8a18e25..ef5b1ce40 100644 --- a/examples/workflows/use_cases/spacy-inference-pipeline.yaml +++ b/examples/workflows/use_cases/spacy-inference-pipeline.yaml @@ -1,13 +1,13 @@ apiVersion: argoproj.io/v1alpha1 kind: Workflow metadata: - generateName: spacy_inference_pipeline- + generateName: spacy-inference-pipeline- namespace: argo spec: - entrypoint: spacy_inference_pipeline + entrypoint: spacy-inference-pipeline serviceAccountName: hera templates: - - name: spacy_inference_pipeline + - name: spacy-inference-pipeline steps: - - name: data-prep template: data-prep diff --git a/examples/workflows/use_cases/spacy_inference_pipeline.py b/examples/workflows/use_cases/spacy_inference_pipeline.py index 5e3943375..d8aa9a59e 100644 --- a/examples/workflows/use_cases/spacy_inference_pipeline.py +++ b/examples/workflows/use_cases/spacy_inference_pipeline.py @@ -105,12 +105,12 @@ class NEROutput(BaseModel): with Workflow( - generate_name="spacy_inference_pipeline-", - entrypoint="spacy_inference_pipeline", + generate_name="spacy-inference-pipeline-", + entrypoint="spacy-inference-pipeline", volumes=[Volume(name="data-dir", size="1Gi", mount_path="/mnt/data")], service_account_name="hera", namespace="argo", ) as w: - with Steps(name="spacy_inference_pipeline") as steps: + with Steps(name="spacy-inference-pipeline") as steps: data_prep(name="data-prep") inference_spacy(name="inference-spacy") diff --git a/examples/workflows/use_cases/spark.py b/examples/workflows/use_cases/spark.py index 4352d2d76..eb383bd4b 100644 --- a/examples/workflows/use_cases/spark.py +++ b/examples/workflows/use_cases/spark.py @@ -8,7 +8,7 @@ @script(image="jupyter/pyspark-notebook:latest", resources=Resources(cpu_request=4, memory_request="8Gi")) -def spark(n: int) -> None: +def spark(num_data: int) -> None: import random import subprocess import time @@ -26,7 +26,7 @@ def spark(n: int) -> None: spark = SparkSession.builder.master("local[1]").appName("my-spark-example-running-in-hera.com").getOrCreate() # let's compare a regular dataframe vs a spark dataframe! First, we define the data to use - data, columns = [random.randint(0, n) for _ in range(n)], ["value"] + data, columns = [random.randint(0, num_data) for _ in range(num_data)], ["value"] # as a very simple and naive comparison, let's print out the average, min, and max of both dataframes # and now let's create a regular Pandas dataframe @@ -51,5 +51,5 @@ def spark(n: int) -> None: with Workflow(generate_name="spark-", entrypoint="d") as w: with DAG(name="d"): - for i, n in enumerate([1_000, 10_000, 100_000, 1_000_000, 10_000_000, 100_000_000]): - spark(name="spark-{i}".format(i=i), arguments={"n": n}) + for i, num_data in enumerate([1_000, 10_000, 100_000, 1_000_000, 10_000_000, 100_000_000]): + spark(name="spark-{i}".format(i=i), arguments={"num_data": num_data}) diff --git a/examples/workflows/use_cases/spark.yaml b/examples/workflows/use_cases/spark.yaml index 9001cbab0..4e83dafc2 100644 --- a/examples/workflows/use_cases/spark.yaml +++ b/examples/workflows/use_cases/spark.yaml @@ -9,44 +9,44 @@ spec: tasks: - arguments: parameters: - - name: n + - name: num_data value: '1000' name: spark-0 template: spark - arguments: parameters: - - name: n + - name: num_data value: '10000' name: spark-1 template: spark - arguments: parameters: - - name: n + - name: num_data value: '100000' name: spark-2 template: spark - arguments: parameters: - - name: n + - name: num_data value: '1000000' name: spark-3 template: spark - arguments: parameters: - - name: n + - name: num_data value: '10000000' name: spark-4 template: spark - arguments: parameters: - - name: n + - name: num_data value: '100000000' name: spark-5 template: spark name: d - inputs: parameters: - - name: n + - name: num_data name: spark script: command: @@ -61,8 +61,8 @@ spec: import sys sys.path.append(os.getcwd()) import json - try: n = json.loads(r'''{{inputs.parameters.n}}''') - except: n = r'''{{inputs.parameters.n}}''' + try: num_data = json.loads(r'''{{inputs.parameters.num_data}}''') + except: num_data = r'''{{inputs.parameters.num_data}}''' import random import subprocess @@ -71,7 +71,7 @@ spec: import pandas as pd from pyspark.sql import SparkSession spark = SparkSession.builder.master('local[1]').appName('my-spark-example-running-in-hera.com').getOrCreate() - (data, columns) = ([random.randint(0, n) for _ in range(n)], ['value']) + (data, columns) = ([random.randint(0, num_data) for _ in range(num_data)], ['value']) pandas_df = pd.DataFrame(data=data, columns=columns) start = time.time() pandas_result = pandas_df.describe()