From d7fb348f1a1305b117970f40589f181a63f1aea0 Mon Sep 17 00:00:00 2001 From: Egor Sklyarov Date: Tue, 15 Aug 2023 11:46:24 +0400 Subject: [PATCH 1/4] Add max price policy --- cli/dstack/_internal/backend/base/__init__.py | 5 ++++- .../_internal/configurators/__init__.py | 8 +++++++ cli/dstack/_internal/core/job.py | 21 +++++++++++-------- cli/dstack/_internal/core/profile.py | 5 ++++- runner/internal/models/backend.go | 13 ++++++------ 5 files changed, 35 insertions(+), 17 deletions(-) diff --git a/cli/dstack/_internal/backend/base/__init__.py b/cli/dstack/_internal/backend/base/__init__.py index dd795b5e9..ce67523f5 100644 --- a/cli/dstack/_internal/backend/base/__init__.py +++ b/cli/dstack/_internal/backend/base/__init__.py @@ -521,4 +521,7 @@ def get_instance_candidates( ] instances = self.compute().get_supported_instances() instances = [i for i in instances if _matches_requirements(i.resources, requirements)] - return self.pricing().get_prices(instances, spot_query) + offers = self.pricing().get_prices(instances, spot_query) + if requirements.max_price is not None: + offers = [o for o in offers if o.price <= requirements.max_price] + return offers diff --git a/cli/dstack/_internal/configurators/__init__.py b/cli/dstack/_internal/configurators/__init__.py index 0e2aad894..1d09f1d6b 100644 --- a/cli/dstack/_internal/configurators/__init__.py +++ b/cli/dstack/_internal/configurators/__init__.py @@ -60,6 +60,10 @@ def get_parser( help="Request a GPU for the run", ) + parser.add_argument( + "--max-price", metavar="PRICE", type=float, help="Maximum price per hour, $" + ) + spot_group = parser.add_mutually_exclusive_group() spot_group.add_argument( "--spot", action="store_const", dest="spot_policy", const=job.SpotPolicy.SPOT @@ -102,6 +106,9 @@ def apply_args(self, args: argparse.Namespace): gpu.update(args.gpu) self.profile.resources.gpu = ProfileGPU.parse_obj(gpu) + if args.max_price is not None: + self.profile.resources.max_price = args.max_price + if args.spot_policy is not None: self.profile.spot_policy = args.spot_policy @@ -277,6 +284,7 @@ def requirements(self) -> job.Requirements: memory_mib=self.profile.resources.memory, gpus=None, shm_size_mib=self.profile.resources.shm_size, + max_price=self.profile.resources.max_price, ) if self.profile.resources.gpu: r.gpus = job.GpusRequirements( diff --git a/cli/dstack/_internal/core/job.py b/cli/dstack/_internal/core/job.py index 7c084ed43..d41b583a4 100644 --- a/cli/dstack/_internal/core/job.py +++ b/cli/dstack/_internal/core/job.py @@ -32,18 +32,19 @@ class Gateway(BaseModel): class GpusRequirements(BaseModel): - count: Optional[int] = None - memory_mib: Optional[int] = None - name: Optional[str] = None + count: Optional[int] + memory_mib: Optional[int] + name: Optional[str] class Requirements(BaseModel): - cpus: Optional[int] = None - memory_mib: Optional[int] = None - gpus: Optional[GpusRequirements] = None - shm_size_mib: Optional[int] = None - spot: Optional[bool] = None - local: Optional[bool] = None + cpus: Optional[int] + memory_mib: Optional[int] + gpus: Optional[GpusRequirements] + shm_size_mib: Optional[int] + spot: Optional[bool] + local: Optional[bool] + max_price: Optional[float] def pretty_format(self): res = "" @@ -53,6 +54,8 @@ def pretty_format(self): res += f", {self.gpus.count}x{self.gpus.name or 'GPU'}" if self.gpus.memory_mib: res += f" {self.gpus.memory_mib / 1024:g}GB" + if self.max_price is not None: + res += f" under ${self.max_price:g} per hour" return res diff --git a/cli/dstack/_internal/core/profile.py b/cli/dstack/_internal/core/profile.py index 722da8659..e7a48237d 100644 --- a/cli/dstack/_internal/core/profile.py +++ b/cli/dstack/_internal/core/profile.py @@ -1,7 +1,7 @@ import re from typing import List, Optional, Union -from pydantic import Field, validator +from pydantic import Field, confloat, validator from typing_extensions import Annotated, Literal from dstack._internal.core.configuration import ForbidExtra @@ -90,6 +90,9 @@ class ProfileResources(ForbidExtra): ), ] cpu: int = DEFAULT_CPU + max_price: Annotated[ + Optional[confloat(gt=0.0)], Field(description="The maximum price per hour, $") + ] _validate_mem = validator("memory", "shm_size", pre=True, allow_reuse=True)(parse_memory) @validator("gpu", pre=True) diff --git a/runner/internal/models/backend.go b/runner/internal/models/backend.go index 90f72a153..2514b1f26 100644 --- a/runner/internal/models/backend.go +++ b/runner/internal/models/backend.go @@ -96,12 +96,13 @@ type App struct { } type Requirements struct { - GPUs GPU `yaml:"gpus,omitempty"` - CPUs int `yaml:"cpus,omitempty"` - Memory int `yaml:"memory_mib,omitempty"` - Spot bool `yaml:"spot,omitempty"` - ShmSize int64 `yaml:"shm_size_mib,omitempty"` - Local bool `json:"local"` + GPUs GPU `yaml:"gpus,omitempty"` + CPUs int `yaml:"cpus,omitempty"` + Memory int `yaml:"memory_mib,omitempty"` + Spot bool `yaml:"spot,omitempty"` + ShmSize int64 `yaml:"shm_size_mib,omitempty"` + MaxPrice float64 `yaml:"max_price,omitempty"` + Local bool `json:"local"` } type GPU struct { From cba721f280710d4b0a6368e1dfc12b2e42b749e6 Mon Sep 17 00:00:00 2001 From: Egor Sklyarov Date: Tue, 15 Aug 2023 12:49:38 +0400 Subject: [PATCH 2/4] Update setup.py data glob --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 13ef27dda..bb2b4a155 100644 --- a/setup.py +++ b/setup.py @@ -107,7 +107,7 @@ def get_long_description(): package_dir={"": "cli"}, packages=find_packages("cli"), package_data={ - "dstack._internal": ["schemas/*.json", "scripts/*.sh"], + "dstack._internal": ["schemas/*.json", "scripts/*.sh", "scripts/*.py"], "dstack._internal.hub": [ "statics/*", "statics/**/*", From 032a4d115c6e9e9a08bf19410dee4c4e40abea63 Mon Sep 17 00:00:00 2001 From: Egor Sklyarov Date: Tue, 15 Aug 2023 12:55:43 +0400 Subject: [PATCH 3/4] Set AWS pricing client region to us-east-1 --- cli/dstack/_internal/backend/aws/pricing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/dstack/_internal/backend/aws/pricing.py b/cli/dstack/_internal/backend/aws/pricing.py index c507c6ebe..8a41900f0 100644 --- a/cli/dstack/_internal/backend/aws/pricing.py +++ b/cli/dstack/_internal/backend/aws/pricing.py @@ -21,7 +21,7 @@ class AWSPricing(Pricing): def __init__(self, session: boto3.Session): super().__init__() self.session = session - self.pricing_client = self.session.client("pricing") + self.pricing_client = self.session.client("pricing", region_name="us-east-1") def _fetch_ondemand(self, attributes: Dict[str, str]): def get_ondemand_price(terms: dict) -> Dict[str, str]: From 43dba2766af27c4e8f8955b1d5e2f9b721f15475 Mon Sep 17 00:00:00 2001 From: Egor Sklyarov Date: Tue, 15 Aug 2023 13:02:51 +0400 Subject: [PATCH 4/4] Add max price in docs --- docs/docs/reference/cli/run.md | 1 + docs/docs/reference/profiles.yml.md | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/docs/reference/cli/run.md b/docs/docs/reference/cli/run.md index 77c88fbf7..8608ddd56 100644 --- a/docs/docs/reference/cli/run.md +++ b/docs/docs/reference/cli/run.md @@ -52,6 +52,7 @@ The following arguments are optional: - `-p PORT`, `--port PORT` – (Optional) Requests port or define mapping (`LOCAL_PORT:CONTAINER_PORT`) - `-e ENV`, `--env ENV` – (Optional) Set environment variable (`NAME=value`) - `--gpu` – (Optional) Request a GPU for the run. Specify any: name, count, memory (`NAME:COUNT:MEMORY` or `NAME` or `COUNT:MEMORY`, etc...) +- `--max-price` – (Optional) Maximum price per hour, $ - `ARGS` – (Optional) Use `ARGS` to pass custom run arguments Spot policy (the arguments are mutually exclusive): diff --git a/docs/docs/reference/profiles.yml.md b/docs/docs/reference/profiles.yml.md index 7bfeca32c..b17e0d552 100644 --- a/docs/docs/reference/profiles.yml.md +++ b/docs/docs/reference/profiles.yml.md @@ -14,9 +14,10 @@ Below is a full reference of all available properties. - `gpu` - (Optional) The minimum number of GPUs, their model name and memory - `name` - (Optional) The name of the GPU model (e.g., `"K80"`, `"V100"`, `"A100"`, etc) - `count` - (Optional) The minimum number of GPUs. Defaults to `1`. - - `memory` (Optional) The minimum size of GPU memory (e.g., `"16GB"`) - - `shm_size` (Optional) The size of shared memory (e.g., `"8GB"`). If you are using parallel communicating + - `memory` - (Optional) The minimum size of GPU memory (e.g., `"16GB"`) + - `shm_size` - (Optional) The size of shared memory (e.g., `"8GB"`). If you are using parallel communicating processes (e.g., dataloaders in PyTorch), you may need to configure this. + - `max_price` - (Optional) Maximum price per hour, $ - `spot_policy` - (Optional) The policy for provisioning spot or on-demand instances: `spot`, `on-demand`, or `auto`. `spot` provisions a spot instance. `on-demand` provisions a on-demand instance. `auto` first tries to provision a spot instance and then tries on-demand if spot is not available. Defaults to `on-demand` for dev environments and to `auto` for tasks. - `retry_policy` - (Optional) The policy for re-submitting the run. - `retry` - (Optional) Whether to retry the run on failure or not. Default to `false`