Skip to content

Commit

Permalink
merge master
Browse files Browse the repository at this point in the history
  • Loading branch information
BO SANG committed Dec 26, 2024
1 parent f9d734b commit 5bbf7bb
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 1 deletion.
6 changes: 5 additions & 1 deletion dlrover/python/elastic_agent/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Optional

from torch.distributed.elastic.agent.server.api import WorkerSpec

from dlrover.python.common.singleton import Singleton
from dlrover.python.diagnosis.common.diagnosis_action import (
DiagnosisAction,
Expand All @@ -21,7 +25,7 @@

class AgentContext(Singleton):
def __init__(self):
self._worker_spec = None
self._worker_spec: Optional[WorkerSpec] = None
self.remaining_failovers = 0
self.restart_count = 0
self._run_result = None
Expand Down
2 changes: 2 additions & 0 deletions dlrover/python/elastic_agent/torch/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -1066,6 +1066,8 @@ def _check_and_process_diagnosis_action(self):
action.timestamp, datetime.now().timestamp()
)
expired_time_period = action.expired_time_period - time_diff
if expired_time_period < 0:
expired_time_period = 0
action.update_timestamp(
timestamp=datetime.now().timestamp(),
expired_time_period=expired_time_period,
Expand Down

0 comments on commit 5bbf7bb

Please sign in to comment.