Skip to content

Commit

Permalink
bettering killing
Browse files Browse the repository at this point in the history
  • Loading branch information
kunyuan committed Feb 18, 2015
1 parent 972d7a0 commit 72fd037
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 96 deletions.
1 change: 1 addition & 0 deletions clear.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ then
rm *.hkl
rm *.txt
rm statis_total.hkl
rm _job*.sh
fi
fi
rm Message.txt
Expand Down
3 changes: 2 additions & 1 deletion dyson/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@ def __init__(self, map, Lat, Hamiltonian, Anneal):
self.__Interaction=np.array(Hamiltonian["Interaction"])
self.__ExternalField=np.array(Hamiltonian["ExternalField"])
self.__DeltaField=np.array(Anneal["DeltaField"])
self.__Hopping=np.array(Hamiltonian["Hopping"])
self.__MaxTauBin=self.__Map.MaxTauBin
self.__Beta=self.__Map.Beta
if "Hopping" in Hamiltonian:
self.__Hopping=np.array(Hamiltonian["Hopping"])
if "ChemicalPotential" in Hamiltonian:
self.__Mu=np.array(Hamiltonian["ChemicalPotential"])
if "Description" in Hamiltonian:
Expand Down
76 changes: 24 additions & 52 deletions inlist.py
Original file line number Diff line number Diff line change
@@ -1,73 +1,43 @@
'''This is the input file of all jobs.
You have to add new job objects to TO_DO list
if you want to run simulation.'''
CPU = 4
SLEEP = 1 #check job status for every SLEEP seconds
#common dictionary for all jobs
# monte carlo job defintion
MonteCarlo={
"Control": {
"__Execute" : "./simulator.exe",
"__Duplicate" : 3,
"__IsCluster" : False,
"__AutoRun" : True,
"__KeepCPUBusy": True,
},
"Job": {
"DoesLoad" : False,
"Sample" : 100000000 ##0.8 min for 1000000(*1000) Samples in MC
}
"Job": {"Sample" : 100000000} ##0.8 min for 1000000(*1000) Samples in MC
}

Dyson={
"Control": {
"__Execute" : ["python", "./dyson/main.py"],
"__Duplicate" : 1,
"__IsCluster" : MonteCarlo["Control"]["__IsCluster"],
"__AutoRun" : True,
"__KeepCPUBusy": False,
"__AutoRun" : MonteCarlo["Control"]["__AutoRun"],
"__PBSCommand": "#PBS -l mem=5gb"
},
"Job": {
"DysonOnly": MonteCarlo["Control"]["__Duplicate"]==0
#"DysonOnly": False
}
}

<<<<<<< HEAD
Beta=4.0
Order=4
=======
Beta=2.0
Beta=1.5
Order=1
>>>>>>> 877f720202e931f7a6d27e5753d6ba8d8f15d647
Common={
"Tau": {
"MaxTauBin" : 64,
"Beta": Beta,
},
"Tau": {"MaxTauBin" : 64, "Beta": Beta},
"Lattice": {
#"Name": "Square",
#"NSublat": 1,
#"L": [16,16],
#"Name": "Honeycomb",
#"NSublat": 2,
#"L": [16,16],
#"Name": "Kagome",
#"NSublat": 3,
#"L": [16,16],
#"Name": "Cubic",
#"NSublat": 1,
#"L": [8,8,8],
"Name": "Pyrochlore",
"NSublat": 4,
"L": [8,8,8]
#"Name": "Checkboard",
#"NSublat": 2,
#2D lattice
#"Name": "Square", "NSublat": 1,
#"Name": "Checkboard", "NSublat": 2,
#"Name": "Honeycomb", "NSublat": 2,
#"Name": "Kagome", "NSublat": 3,
#"L": [8,8]
#"Name": "3DCheckerboard",
#"NSublat": 2,
#"L": [16,16,16]
#"L": [8,8,8]

#3D lattice
#"Name": "Cubic", "NSublat": 1,
#"Name": "3DCheckerboard", "NSublat": 2,
"Name": "Pyrochlore", "NSublat": 4,
"L": [8,8,8]
},
"Model": {
"Name": "J1J2",
Expand All @@ -79,10 +49,8 @@
}

MonteCarlo["Markov"]={
"Order": Order,
"Order": Order, "Sweep" : 10, "Toss" : 1000,
#Start from order 0, so that OrderReWeight has Order+1 elements
"Sweep" : 10,
"Toss" : 1000,
"OrderReWeight" : [100.0, 0.5, 1.0, 0.1, 0.05, 0.05, 0.01, 0.005],
"WormSpaceReweight" : 0.05,
"PolarReweight" : 2.0,
Expand All @@ -102,19 +70,23 @@
}

Dyson["Dyson"]={
"OrderAccepted": {"Sigma":1, "Polar":1},
"ErrorThreshold": 0.1,
#"SleepTime": 300,
"SleepTime": 40,
#"SleepTime": 300,
"OrderAccepted": {"Sigma":1, "Polar":1}, "ErrorThreshold": 0.2,
"Annealing": {
"DeltaField": [0.0, 0.0, 0.0, 0.0],
"Interval": [-0.1, -0.1, -0.1, -0.1]
}
}

import job_class as job
'''This is the input file of all jobs.
You have to add new job objects to TO_DO list
if you want to run simulation.'''
TO_DO = []
MonteCarlo.update(Common)
TO_DO.append(job.JobMonteCarlo(MonteCarlo))
Dyson.update(Common)
TO_DO.append(job.JobDyson(Dyson))
CPU = 4
SLEEP = 1 #check job status for every SLEEP seconds
17 changes: 7 additions & 10 deletions job_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

def get_current_PID(KeyWord):
workspace=os.path.abspath(".")
filelist=sorted([int(e.split('_')[0]) for e in os.listdir(workspace) if KeyWord in e])
filelist=sorted([int(e.split('_')[0]) for e in os.listdir(workspace) if (KeyWord in e) and e[0] is not '_'])
if len(filelist)==0:
NextPID=0
else:
Expand Down Expand Up @@ -46,25 +46,21 @@ def __check_parameters__(self, para):
return False

def __set_model_specific__(self):
PI=3.141592653589793238
if self.para["Model"]["Name"] in ("J1J2", "Heisenberg"):
self.para["Model"]["Hopping"]=[0.0,]
if self.para["Model"].has_key("Description") and "ImW" in self.para["Model"]["Description"]:
self.para["Model"]["ChemicalPotential"]=[0.0,0.0]
else:
mu=1j*PI/2.0/self.para["Tau"]["Beta"]
self.para["Model"]["ChemicalPotential"]=[mu,mu]
pass

class JobMonteCarlo(Job):
'''job subclass for monte carlo jobs'''
def __init__(self, para):
Job.__init__(self, para)
self.job["Type"] = "MC"
self.control["__KeepCPUBusy"]=True
#search folder for old jobs, the new pid=largest old pid+1
PIDList, NextPID=get_current_PID("statis")
if self.job["DoesLoad"] and len(PIDList) is not 0:
if len(PIDList) is not 0:
self.job["DoesLoad"]=True
self.pid=PIDList[:self.control["__Duplicate"]]
else:
self.job["DoesLoad"]=False
self.pid=range(NextPID, NextPID+self.control["__Duplicate"])

def __check_parameters__(self, para):
Expand Down Expand Up @@ -92,6 +88,7 @@ class JobDyson(Job):
def __init__(self, para):
Job.__init__(self, para)
self.job["Type"] = "DYSON"
self.control["__KeepCPUBusy"]=False
#PIDList, NextPID=get_current_PID("Weight")
if self.control["__Duplicate"]>0:
self.pid=range(1)
Expand Down
39 changes: 19 additions & 20 deletions job_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@

PROCLIST = []
PROCLIST_BACK = []
workdir="."
workdir=os.path.abspath(".")
logging.basicConfig(filename=workdir+"/project.log",
level=logging.INFO,
format="\n[job.daemon][%(asctime)s][%(levelname)s]:\n%(message)s",
datefmt='%y/%m/%d %H:%M:%S')

INFILEPATH = os.path.join(os.path.abspath(workdir),"infile")
OUTFILEPATH = os.path.join(os.path.abspath(workdir),"outfile")
INFILEPATH = os.path.join(workdir,"infile")
OUTFILEPATH = os.path.join(workdir,"outfile")

class JobAtom():
'''atom class of all jobs'''
Expand All @@ -34,6 +34,8 @@ def __init__(self, control, pid, para):
self.is_cluster=control["__IsCluster"]
self.auto_run=control["__AutoRun"]
self.keep_cpu_busy=control["__KeepCPUBusy"]
if control.has_key("__PBSCommand"):
self.pbs_command=control["__PBSCommand"]
self.para = para
return

Expand Down Expand Up @@ -82,23 +84,23 @@ def submit_job(job_atom):
os.system("mkdir "+OUTFILEPATH)

homedir = os.getcwd()
jobname = homedir.split("/")[-1]+"."+job_atom.name
_, tail = os.path.split(homedir)
jobname = tail+"."+job_atom.name

infile = INFILEPATH+"/_in_{0}_{1}".format(job_atom.name, job_atom.pid)
outfile = OUTFILEPATH+"/out_{0}_{1}.txt".format(
job_atom.name, job_atom.pid)
jobfile = os.path.abspath(workdir+"/_job_{0}_{1}.sh".format(
job_atom.name, job_atom.pid))
infile = os.path.join(INFILEPATH,"_in_{0}_{1}".format(job_atom.name, job_atom.pid))
outfile = os.path.join(OUTFILEPATH,"out_{0}_{1}.txt".format(job_atom.name, job_atom.pid))
jobfile = os.path.join(workdir,"_job_{0}_{1}.sh".format(job_atom.name, job_atom.pid))
IO.SaveDict(infile, "w", job_atom.para)
if job_atom.is_cluster:
fjob = open(jobfile, "w")
fjob.write("#!/bin/sh\n"+"#PBS -N "+jobname+"\n")
fjob.write("#PBS -o "+homedir+"/Output\n")
fjob.write("#PBS -e "+homedir+"/Error\n")
fjob.write("echo $PBS_JOBID >>"+homedir+"/id_job.log\n")
fjob.write("cd "+homedir+"\n")
fjob.write(job_atom.execute+" -f "+infile)
fjob.close()
with open(jobfile, "w") as fjob:
fjob.write("#!/bin/sh\n"+"#PBS -N "+jobname+"\n")
if hasattr(job_atom, "pbs_command"):
fjob.write(job_atom.pbs_command+"\n")
fjob.write("#PBS -o "+homedir+"/Output\n")
fjob.write("#PBS -e "+homedir+"/Error\n")
fjob.write("echo $PBS_JOBID >>"+homedir+"/id_job.log\n")
fjob.write("cd "+homedir+"\n")
fjob.write(job_atom.execute+" -f "+infile)
if job_atom.auto_run:
os.system("qsub "+jobfile)
os.system("rm "+jobfile)
Expand All @@ -115,9 +117,6 @@ def submit_job(job_atom):
else:
PROCLIST_BACK.append((proc, job_atom))

#print shellstr
#print PROCLIST
#print PROCLIST_BACK
logging.info(job_atom.get_job_name()+" is started...")
logging.info("input:\n"+str(job_atom.para))
logging.info("PID:{0}\n".format(proc.pid))
Expand Down
35 changes: 22 additions & 13 deletions killthemall.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,28 @@
#!/usr/bin/env python
import os
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("-f", "--force", action='store_true', help="kill jobs with qdel command")
parser.add_argument('-r', '--range', nargs=2, required=False, type=int, help="--range start end: kill [start, end) jobs")
args = parser.parse_args()
from multiprocessing import Pool

if args.range is not None:
lines=range(args.range[0],args.range[1])
else:
filename="id_job.log"
lines = [line.rstrip('\n') for line in open(filename)]
for jobid in lines:
print "Killing job {0}".format(jobid)
def delete(jobid):
print "Deleting job {0}".format(jobid)
os.system("qdel {0}".format(jobid))
def terminate(jobid):
print "Terminating job {0}".format(jobid)
os.system("qsig -s SIGTERM {0}".format(jobid))

if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("-f", "--force", action='store_true', help="kill jobs with qdel command")
parser.add_argument('-r', '--range', nargs=2, required=False, type=int, help="--range start end: kill [start, end) jobs")
args = parser.parse_args()

if args.range is not None:
lines=range(args.range[0],args.range[1])
else:
filename="id_job.log"
lines = [line.rstrip('\n') for line in open(filename)]
p = Pool(16)
if args.force:
os.system("qdel {0}".format(jobid))
p.map(delete, lines)
else:
os.system("qsig -s SIGTERM {0}".format(jobid))
p.map(terminate, lines)

0 comments on commit 72fd037

Please sign in to comment.