Skip to content

Commit

Permalink
Fix runner not terminating on max duration exceeded
Browse files Browse the repository at this point in the history
The behaviour was that the runner always stopped instead of respecting the termination policy.
Since stopping is not supported for all backends, the instance may continue to run.
  • Loading branch information
r4victor committed Aug 10, 2023
1 parent 16372f4 commit d69b047
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 11 deletions.
5 changes: 5 additions & 0 deletions runner/consts/consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,8 @@ const DELAY_READ_STATUS = 5 * time.Second

const REPO_HTTPS_URL = "https://%s/%s/%s.git"
const REPO_GIT_URL = "git@%s:%s/%s.git"

const (
TERMINATE_POLICY = "terminate"
STOP_POLICY = "stop"
)
22 changes: 11 additions & 11 deletions runner/internal/executor/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,16 @@ import (
"encoding/json"
"errors"
"fmt"
"github.com/docker/go-connections/nat"
"github.com/dstackai/dstack/runner/internal/gateway"
"io"
"os"
"path"
"path/filepath"
"strconv"
"time"

"github.com/docker/go-connections/nat"
"github.com/dstackai/dstack/runner/internal/gateway"

"github.com/dstackai/dstack/runner/internal/backend/base"

"github.com/dstackai/dstack/runner/internal/models"
Expand Down Expand Up @@ -143,6 +144,14 @@ func (ex *Executor) Run(ctx context.Context) error {
if err != nil {
return gerrors.Wrap(err)
}
if job.MaxDurationExceeded() {
log.Info(runCtx, "Job max duration exceeded")
if job.TerminationPolicy == consts.STOP_POLICY {
job.Status = states.Stopping
} else {
job.Status = states.Terminating
}
}
if job.Status == states.Stopping {
log.Info(runCtx, "Stopped")
ex.Stop(false)
Expand All @@ -160,15 +169,6 @@ func (ex *Executor) Run(ctx context.Context) error {
_ = ex.backend.UpdateState(runCtx)
return errRun
}
if job.MaxDurationExceeded() {
log.Info(runCtx, "Job max duration exceeded. Stopping...")
ex.Stop(false)
log.Info(runCtx, "Waiting job end")
errRun := <-erCh
job.Status = states.Stopped
_ = ex.backend.UpdateState(runCtx)
return errRun
}
case <-ctx.Done():
log.Info(runCtx, "Stopped")
ex.Stop(true)
Expand Down

0 comments on commit d69b047

Please sign in to comment.