Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 918 fix cli argimenuts for dstack pool add #919

Merged
merged 3 commits into from
Feb 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.2.1
rev: v0.2.2
hooks:
- id: ruff
name: ruff common
Expand Down
44 changes: 32 additions & 12 deletions src/dstack/_internal/cli/commands/pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,11 @@
)
from dstack._internal.core.models.pools import Instance, Pool
from dstack._internal.core.models.profiles import (
DEFAULT_TERMINATION_IDLE_TIME,
DEFAULT_POOL_TERMINATION_IDLE_TIME,
Profile,
SpotPolicy,
TerminationPolicy,
parse_max_duration,
)
from dstack._internal.core.models.resources import DEFAULT_CPU_COUNT, DEFAULT_MEMORY_SIZE
from dstack._internal.core.models.runs import InstanceStatus, Requirements
Expand Down Expand Up @@ -125,7 +126,10 @@ def _register(self) -> None:
add_parser.add_argument(
"--remote-port", help="Remote runner port", dest="remote_port", default=10999
)
add_parser.add_argument("--name", dest="instance_name", help="The name of the instance")
add_parser.add_argument(
"--name", dest="instance_name", help="Set the name of the instance"
)
add_parser.add_argument("--idle-duration", dest="idle_duration", help="Idle duration")
register_profile_args(add_parser)
register_resource_args(add_parser)
add_parser.set_defaults(subfunc=self._add)
Expand Down Expand Up @@ -239,20 +243,37 @@ def _add(self, args: argparse.Namespace) -> None:
shm_size=args.shared_memory,
disk=args.disk,
)
requirements = Requirements(
resources=resources,
max_price=args.max_price,
spot=(args.spot_policy == SpotPolicy.SPOT), # TODO(egor-s): None if SpotPolicy.AUTO
)

profile = load_profile(Path.cwd(), args.profile)
apply_profile_args(args, profile)
profile.pool_name = args.pool_name

termination_policy_idle = DEFAULT_TERMINATION_IDLE_TIME
termination_policy = TerminationPolicy.DESTROY_AFTER_IDLE
profile.termination_idle_time = termination_policy_idle
profile.termination_policy = termination_policy
spot = None
if profile.spot_policy == SpotPolicy.SPOT:
spot = True
if profile.spot_policy == SpotPolicy.ONDEMAND:
spot = False

requirements = Requirements(
resources=resources,
max_price=args.max_price,
spot=spot,
)

idle_duration = parse_max_duration(args.idle_duration)
if idle_duration is None:
profile.termination_idle_time = DEFAULT_POOL_TERMINATION_IDLE_TIME
profile.termination_policy = TerminationPolicy.DESTROY_AFTER_IDLE
elif idle_duration == "off":
profile.termination_idle_time = DEFAULT_POOL_TERMINATION_IDLE_TIME
profile.termination_policy = TerminationPolicy.DONT_DESTROY
elif isinstance(idle_duration, int):
profile.termination_idle_time = idle_duration
profile.termination_policy = TerminationPolicy.DESTROY_AFTER_IDLE
else:
raise CLIError(
f"Invalid format --idle-duration {args.idle_duration!r}. It must be literal string 'off' or an integer number with an suffix s|m|h|d|w "
)

# Add remote instance
if args.remote:
Expand Down Expand Up @@ -369,7 +390,6 @@ def print_offers_table(
# else "no"
# )

# TODO: improve spot policy
if requirements.spot is None:
spot_policy = "auto"
elif requirements.spot:
Expand Down
4 changes: 2 additions & 2 deletions src/dstack/_internal/cli/commands/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from dstack._internal.core.errors import CLIError, ConfigurationError, ServerClientError
from dstack._internal.core.models.configurations import ConfigurationType
from dstack._internal.core.models.profiles import (
DEFAULT_TERMINATION_IDLE_TIME,
DEFAULT_RUN_TERMINATION_IDLE_TIME,
CreationPolicy,
TerminationPolicy,
)
Expand Down Expand Up @@ -118,7 +118,7 @@ def _command(self, args: argparse.Namespace):
self._parser.print_help()
return

termination_policy_idle = DEFAULT_TERMINATION_IDLE_TIME
termination_policy_idle = DEFAULT_RUN_TERMINATION_IDLE_TIME
termination_policy = TerminationPolicy.DESTROY_AFTER_IDLE

if args.idle_duration is not None:
Expand Down
8 changes: 5 additions & 3 deletions src/dstack/_internal/core/models/profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@

DEFAULT_RETRY_LIMIT = 3600
DEFAULT_POOL_NAME = "default-pool"
DEFAULT_TERMINATION_IDLE_TIME = 5 * 60 # 5 minutes by default

DEFAULT_RUN_TERMINATION_IDLE_TIME = 5 * 60 # 5 minutes
DEFAULT_POOL_TERMINATION_IDLE_TIME = 72 * 60 * 60 # 3 days


class SpotPolicy(str, Enum):
Expand Down Expand Up @@ -49,7 +51,7 @@ def parse_duration(v: Optional[Union[int, str]]) -> Optional[int]:
return amount * multiplier


def parse_max_duration(v: Union[int, str]) -> int:
def parse_max_duration(v: Optional[Union[int, str]]) -> Optional[Union[str, int]]:
if v == "off":
return v
return parse_duration(v)
Expand Down Expand Up @@ -120,7 +122,7 @@ class Profile(ForbidExtra):
termination_idle_time: Annotated[
int,
Field(description="Seconds to wait before destroying the instance"),
] = DEFAULT_TERMINATION_IDLE_TIME
] = DEFAULT_RUN_TERMINATION_IDLE_TIME

_validate_max_duration = validator("max_duration", pre=True, allow_reuse=True)(
parse_max_duration
Expand Down
1 change: 0 additions & 1 deletion src/dstack/_internal/core/models/runs.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,6 @@ class InstanceStatus(str, Enum):
BUSY = "busy"
TERMINATING = "terminating"
TERMINATED = "terminated"
FAILED = "failed"

@property
def finished_statuses(cls) -> Sequence["InstanceStatus"]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ async def check_shim(instance_id: UUID) -> None:

if instance.status in (InstanceStatus.READY, InstanceStatus.BUSY):
logger.warning(
"instance %s shim is not available, marked as failed", instance.name
"instance %s: shim has become unavailable, marked as failed", instance.name
)
FAIL_THRESHOLD = 10 * 6 * 20 # instance_healthcheck fails 20 minutes constantly
if instance.fail_count > FAIL_THRESHOLD:
Expand Down
7 changes: 5 additions & 2 deletions src/dstack/_internal/server/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@
from sqlalchemy_utils import UUIDType

from dstack._internal.core.models.backends.base import BackendType
from dstack._internal.core.models.profiles import DEFAULT_TERMINATION_IDLE_TIME, TerminationPolicy
from dstack._internal.core.models.profiles import (
DEFAULT_POOL_TERMINATION_IDLE_TIME,
TerminationPolicy,
)
from dstack._internal.core.models.repos.base import RepoType
from dstack._internal.core.models.runs import InstanceStatus, JobErrorCode, JobStatus
from dstack._internal.core.models.users import GlobalRole, ProjectRole
Expand Down Expand Up @@ -289,7 +292,7 @@ class InstanceModel(BaseModel):
# temination policy
termination_policy: Mapped[Optional[TerminationPolicy]] = mapped_column(String(50))
termination_idle_time: Mapped[int] = mapped_column(
Integer, default=DEFAULT_TERMINATION_IDLE_TIME
Integer, default=DEFAULT_POOL_TERMINATION_IDLE_TIME
)

# connection fail handling
Expand Down
4 changes: 2 additions & 2 deletions src/dstack/_internal/server/testing/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from dstack._internal.core.models.instances import InstanceType, Resources
from dstack._internal.core.models.profiles import (
DEFAULT_POOL_NAME,
DEFAULT_TERMINATION_IDLE_TIME,
DEFAULT_POOL_TERMINATION_IDLE_TIME,
Profile,
)
from dstack._internal.core.models.repos.base import RepoType
Expand Down Expand Up @@ -327,7 +327,7 @@ async def create_instance(
price=1,
region="eu-west",
backend=BackendType.DATACRUNCH,
termination_idle_time=DEFAULT_TERMINATION_IDLE_TIME,
termination_idle_time=DEFAULT_POOL_TERMINATION_IDLE_TIME,
)
session.add(im)
await session.commit()
Expand Down
4 changes: 2 additions & 2 deletions src/dstack/api/_public/runs.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from dstack._internal.core.models.instances import InstanceOfferWithAvailability, SSHKey
from dstack._internal.core.models.pools import Instance
from dstack._internal.core.models.profiles import (
DEFAULT_TERMINATION_IDLE_TIME,
DEFAULT_RUN_TERMINATION_IDLE_TIME,
CreationPolicy,
Profile,
ProfileRetryPolicy,
Expand Down Expand Up @@ -394,7 +394,7 @@ def get_plan(
instance_name: Optional[str] = None,
creation_policy: Optional[CreationPolicy] = None,
termination_policy: Optional[TerminationPolicy] = None,
termination_policy_idle: int = DEFAULT_TERMINATION_IDLE_TIME,
termination_policy_idle: int = DEFAULT_RUN_TERMINATION_IDLE_TIME,
) -> RunPlan:
# """
# Get run plan. Same arguments as `submit`
Expand Down
Loading