Skip to content

Commit

Permalink
Issue 918 fix cli argimenuts for dstack pool add (#919)
Browse files Browse the repository at this point in the history
* Added --idle-duration to the `dstack pool add`

* Fix spotpolicy

* Fix contants
  • Loading branch information
Sergey Mezentsev authored Feb 21, 2024
1 parent 2b42dd8 commit 5e62c44
Show file tree
Hide file tree
Showing 9 changed files with 50 additions and 26 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.2.1
rev: v0.2.2
hooks:
- id: ruff
name: ruff common
Expand Down
44 changes: 32 additions & 12 deletions src/dstack/_internal/cli/commands/pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,11 @@
)
from dstack._internal.core.models.pools import Instance, Pool
from dstack._internal.core.models.profiles import (
DEFAULT_TERMINATION_IDLE_TIME,
DEFAULT_POOL_TERMINATION_IDLE_TIME,
Profile,
SpotPolicy,
TerminationPolicy,
parse_max_duration,
)
from dstack._internal.core.models.resources import DEFAULT_CPU_COUNT, DEFAULT_MEMORY_SIZE
from dstack._internal.core.models.runs import InstanceStatus, Requirements
Expand Down Expand Up @@ -125,7 +126,10 @@ def _register(self) -> None:
add_parser.add_argument(
"--remote-port", help="Remote runner port", dest="remote_port", default=10999
)
add_parser.add_argument("--name", dest="instance_name", help="The name of the instance")
add_parser.add_argument(
"--name", dest="instance_name", help="Set the name of the instance"
)
add_parser.add_argument("--idle-duration", dest="idle_duration", help="Idle duration")
register_profile_args(add_parser)
register_resource_args(add_parser)
add_parser.set_defaults(subfunc=self._add)
Expand Down Expand Up @@ -239,20 +243,37 @@ def _add(self, args: argparse.Namespace) -> None:
shm_size=args.shared_memory,
disk=args.disk,
)
requirements = Requirements(
resources=resources,
max_price=args.max_price,
spot=(args.spot_policy == SpotPolicy.SPOT), # TODO(egor-s): None if SpotPolicy.AUTO
)

profile = load_profile(Path.cwd(), args.profile)
apply_profile_args(args, profile)
profile.pool_name = args.pool_name

termination_policy_idle = DEFAULT_TERMINATION_IDLE_TIME
termination_policy = TerminationPolicy.DESTROY_AFTER_IDLE
profile.termination_idle_time = termination_policy_idle
profile.termination_policy = termination_policy
spot = None
if profile.spot_policy == SpotPolicy.SPOT:
spot = True
if profile.spot_policy == SpotPolicy.ONDEMAND:
spot = False

requirements = Requirements(
resources=resources,
max_price=args.max_price,
spot=spot,
)

idle_duration = parse_max_duration(args.idle_duration)
if idle_duration is None:
profile.termination_idle_time = DEFAULT_POOL_TERMINATION_IDLE_TIME
profile.termination_policy = TerminationPolicy.DESTROY_AFTER_IDLE
elif idle_duration == "off":
profile.termination_idle_time = DEFAULT_POOL_TERMINATION_IDLE_TIME
profile.termination_policy = TerminationPolicy.DONT_DESTROY
elif isinstance(idle_duration, int):
profile.termination_idle_time = idle_duration
profile.termination_policy = TerminationPolicy.DESTROY_AFTER_IDLE
else:
raise CLIError(
f"Invalid format --idle-duration {args.idle_duration!r}. It must be literal string 'off' or an integer number with an suffix s|m|h|d|w "
)

# Add remote instance
if args.remote:
Expand Down Expand Up @@ -369,7 +390,6 @@ def print_offers_table(
# else "no"
# )

# TODO: improve spot policy
if requirements.spot is None:
spot_policy = "auto"
elif requirements.spot:
Expand Down
4 changes: 2 additions & 2 deletions src/dstack/_internal/cli/commands/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from dstack._internal.core.errors import CLIError, ConfigurationError, ServerClientError
from dstack._internal.core.models.configurations import ConfigurationType
from dstack._internal.core.models.profiles import (
DEFAULT_TERMINATION_IDLE_TIME,
DEFAULT_RUN_TERMINATION_IDLE_TIME,
CreationPolicy,
TerminationPolicy,
)
Expand Down Expand Up @@ -118,7 +118,7 @@ def _command(self, args: argparse.Namespace):
self._parser.print_help()
return

termination_policy_idle = DEFAULT_TERMINATION_IDLE_TIME
termination_policy_idle = DEFAULT_RUN_TERMINATION_IDLE_TIME
termination_policy = TerminationPolicy.DESTROY_AFTER_IDLE

if args.idle_duration is not None:
Expand Down
8 changes: 5 additions & 3 deletions src/dstack/_internal/core/models/profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@

DEFAULT_RETRY_LIMIT = 3600
DEFAULT_POOL_NAME = "default-pool"
DEFAULT_TERMINATION_IDLE_TIME = 5 * 60 # 5 minutes by default

DEFAULT_RUN_TERMINATION_IDLE_TIME = 5 * 60 # 5 minutes
DEFAULT_POOL_TERMINATION_IDLE_TIME = 72 * 60 * 60 # 3 days


class SpotPolicy(str, Enum):
Expand Down Expand Up @@ -49,7 +51,7 @@ def parse_duration(v: Optional[Union[int, str]]) -> Optional[int]:
return amount * multiplier


def parse_max_duration(v: Union[int, str]) -> int:
def parse_max_duration(v: Optional[Union[int, str]]) -> Optional[Union[str, int]]:
if v == "off":
return v
return parse_duration(v)
Expand Down Expand Up @@ -120,7 +122,7 @@ class Profile(ForbidExtra):
termination_idle_time: Annotated[
int,
Field(description="Seconds to wait before destroying the instance"),
] = DEFAULT_TERMINATION_IDLE_TIME
] = DEFAULT_RUN_TERMINATION_IDLE_TIME

_validate_max_duration = validator("max_duration", pre=True, allow_reuse=True)(
parse_max_duration
Expand Down
1 change: 0 additions & 1 deletion src/dstack/_internal/core/models/runs.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,6 @@ class InstanceStatus(str, Enum):
BUSY = "busy"
TERMINATING = "terminating"
TERMINATED = "terminated"
FAILED = "failed"

@property
def finished_statuses(cls) -> Sequence["InstanceStatus"]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ async def check_shim(instance_id: UUID) -> None:

if instance.status in (InstanceStatus.READY, InstanceStatus.BUSY):
logger.warning(
"instance %s shim is not available, marked as failed", instance.name
"instance %s: shim has become unavailable, marked as failed", instance.name
)
FAIL_THRESHOLD = 10 * 6 * 20 # instance_healthcheck fails 20 minutes constantly
if instance.fail_count > FAIL_THRESHOLD:
Expand Down
7 changes: 5 additions & 2 deletions src/dstack/_internal/server/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@
from sqlalchemy_utils import UUIDType

from dstack._internal.core.models.backends.base import BackendType
from dstack._internal.core.models.profiles import DEFAULT_TERMINATION_IDLE_TIME, TerminationPolicy
from dstack._internal.core.models.profiles import (
DEFAULT_POOL_TERMINATION_IDLE_TIME,
TerminationPolicy,
)
from dstack._internal.core.models.repos.base import RepoType
from dstack._internal.core.models.runs import InstanceStatus, JobErrorCode, JobStatus
from dstack._internal.core.models.users import GlobalRole, ProjectRole
Expand Down Expand Up @@ -289,7 +292,7 @@ class InstanceModel(BaseModel):
# temination policy
termination_policy: Mapped[Optional[TerminationPolicy]] = mapped_column(String(50))
termination_idle_time: Mapped[int] = mapped_column(
Integer, default=DEFAULT_TERMINATION_IDLE_TIME
Integer, default=DEFAULT_POOL_TERMINATION_IDLE_TIME
)

# connection fail handling
Expand Down
4 changes: 2 additions & 2 deletions src/dstack/_internal/server/testing/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from dstack._internal.core.models.instances import InstanceType, Resources
from dstack._internal.core.models.profiles import (
DEFAULT_POOL_NAME,
DEFAULT_TERMINATION_IDLE_TIME,
DEFAULT_POOL_TERMINATION_IDLE_TIME,
Profile,
)
from dstack._internal.core.models.repos.base import RepoType
Expand Down Expand Up @@ -327,7 +327,7 @@ async def create_instance(
price=1,
region="eu-west",
backend=BackendType.DATACRUNCH,
termination_idle_time=DEFAULT_TERMINATION_IDLE_TIME,
termination_idle_time=DEFAULT_POOL_TERMINATION_IDLE_TIME,
)
session.add(im)
await session.commit()
Expand Down
4 changes: 2 additions & 2 deletions src/dstack/api/_public/runs.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from dstack._internal.core.models.instances import InstanceOfferWithAvailability, SSHKey
from dstack._internal.core.models.pools import Instance
from dstack._internal.core.models.profiles import (
DEFAULT_TERMINATION_IDLE_TIME,
DEFAULT_RUN_TERMINATION_IDLE_TIME,
CreationPolicy,
Profile,
ProfileRetryPolicy,
Expand Down Expand Up @@ -394,7 +394,7 @@ def get_plan(
instance_name: Optional[str] = None,
creation_policy: Optional[CreationPolicy] = None,
termination_policy: Optional[TerminationPolicy] = None,
termination_policy_idle: int = DEFAULT_TERMINATION_IDLE_TIME,
termination_policy_idle: int = DEFAULT_RUN_TERMINATION_IDLE_TIME,
) -> RunPlan:
# """
# Get run plan. Same arguments as `submit`
Expand Down

0 comments on commit 5e62c44

Please sign in to comment.