From b86ff9fcc959d1157c063d91c130fed2584f6f4c Mon Sep 17 00:00:00 2001 From: Majid d Date: Sat, 19 Sep 2015 21:08:07 -0400 Subject: [PATCH 1/2] better way to check and it's cross-platform --- spearmint/schedulers/local.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/spearmint/schedulers/local.py b/spearmint/schedulers/local.py index 04072d9..68ca972 100755 --- a/spearmint/schedulers/local.py +++ b/spearmint/schedulers/local.py @@ -187,6 +187,7 @@ import os import subprocess import sys +import psutil def init(*args, **kwargs): return LocalScheduler(*args, **kwargs) @@ -228,12 +229,7 @@ def submit(self, job_id, experiment_name, experiment_dir, database_address): def alive(self, process_id): - try: - # Send an alive signal to proc (note this could kill it in windows) - os.kill(process_id, 0) - except OSError: - # Job is no longer running. - return False - else: - return True + # todo: not correct to check pid b/c it could be taken by + # another program . + return psutil.pid_exists(process_id) From ccc503ae08798cb5ed9fd6090310de89b6d9b39f Mon Sep 17 00:00:00 2001 From: Majid d Date: Mon, 21 Sep 2015 20:36:30 -0400 Subject: [PATCH 2/2] makes it resume when local jobs are interrupted..otherwise it gets stuck in a loop and doesn't execute --- spearmint/main.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spearmint/main.py b/spearmint/main.py index 052bea6..86a6a52 100755 --- a/spearmint/main.py +++ b/spearmint/main.py @@ -182,6 +182,7 @@ # to enter into this License and Terms of Use on behalf of itself and # its Institution. + import sys import optparse import importlib @@ -273,6 +274,7 @@ def main(): # Note: I chose to fill up one resource and them move on to the next # You could also do it the other way, by changing "while" to "if" here + remove_broken_jobs(db, jobs, experiment_name, resources) while resource.acceptingJobs(jobs): # Load jobs from DB