Merge pull request #62 from jpata/review_fixes

Review fixes
jpata · Feb 24, 2021 · d29b7e1 · d29b7e1
2 parents 7e36b59 + 5ccb94c
commit d29b7e1
Show file tree

Hide file tree

Showing 156 changed files with 843 additions and 35 deletions.
diff --git a/README.md b/README.md
@@ -1,13 +1,14 @@
 [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4452283.svg)](https://doi.org/10.5281/zenodo.4452283)
+[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4559324.svg)](https://doi.org/10.5281/zenodo.4559324)
 [![CI](https://github.com/jpata/particleflow/workflows/CI/badge.svg)](https://github.com/jpata/particleflow/actions)
 
 <p float="left">
-  <img src="delphes/plots/event.png" alt="Simulated event" width="600"/>
+  <img src="notebooks/plots/event.png" alt="Simulated event" width="600"/>
 </p>
 
 <p float="left">
-  <img src="delphes/plots/num_particles.png" alt="Particle multiplicity" width="300"/>
-  <img src="delphes/plots/res_pid2.png" alt="Neutral hadron resolution" width="300"/>
+  <img src="notebooks/plots/num_particles.png" alt="Particle multiplicity" width="300"/>
+  <img src="notebooks/plots/res_pid2.png" alt="Neutral hadron resolution" width="300"/>
 </p>
 
 ## MLPF with Delphes

diff --git a/README_delphes.md b/README_delphes.md
@@ -12,6 +12,7 @@ singularity exec http://jpata.web.cern.ch/jpata/centos7hepsim.sif ./run_sim.sh
 # Run the ntuplization step
 # generate X,y input matrices for NN training in out/pythia8_ttbar/*.pkl.bz2
 singularity exec http://jpata.web.cern.ch/jpata/centos7hepsim.sif ./run_ntuple.sh
+singularity exec http://jpata.web.cern.ch/jpata/centos7hepsim.sif ./run_ntuple_qcd.sh
 
 #Alternatively, to skip run_sim.sh and run_ntuple.sh, download everything from https://doi.org/10.5281/zenodo.4452283 and put into out/pythia8_ttbar
 
@@ -23,22 +24,28 @@ mkdir val
 mkdir root
 mv *.root root/
 mb *.promc root/
-
-#these are held out for validation
-mv tev14_pythia8_ttbar_9_*.pkl.bz2 val/
 mv *.pkl.bz2 raw/
 cd ../..
 
+mv out/pythia8_qcd ../data/
+cd ../data/pythia8_qcd
+mkdir val
+mkdir root
+mv *.root root/
+mv *.promc root/
+mv *.pkl.bz2 val/
+cd ../..
+
 # Generate the TFRecord datasets needed for larger-than-RAM training
 singularity exec --nv http://jpata.web.cern.ch/jpata/base.simg python3 mlpf/launcher.py --action data --model-spec parameters/delphes-gnn-skipconn.yaml
 
 # Run the training of the base GNN model using e.g. 5 GPUs in a data-parallel mode
 CUDA_VISIBLE_DEVICES=0,1,2,3,4 singularity exec --nv http://jpata.web.cern.ch/jpata/base.simg python3 mlpf/launcher.py --action train --model-spec parameters/delphes-gnn-skipconn.yaml
 
 #Run the validation to produce the predictions file
-singularity exec --nv http://jpata.web.cern.ch/jpata/base.simg python3 mlpf/launcher.py --action eval --model-spec parameters/delphes-gnn-skipconn.yaml --weights ./experiments/delphes-gnn-skipconn-*/weights.300-*.hdf5
+singularity exec --nv http://jpata.web.cern.ch/jpata/base.simg python3 mlpf/launcher.py --action eval --model-spec parameters/delphes-gnn-skipconn.yaml --weights ./experiments/delphes-gnn-skipconn-*/weights-300-*.hdf5
 
-singularity exec --nv http://jpata.web.cern.ch/jpata/base.simg python3 mlpf/launcher.py --action time --model-spec parameters/delphes-gnn-skipconn.yaml --weights ./experiments/delphes-gnn-skipconn-*/weights.300-*.hdf5
+singularity exec --nv http://jpata.web.cern.ch/jpata/base.simg python3 mlpf/launcher.py --action time --model-spec parameters/delphes-gnn-skipconn.yaml --weights ./experiments/delphes-gnn-skipconn-*/weights-300-*.hdf5
 ```
 
 ## Recipe to prepare Delphes singularity image
@@ -50,4 +57,4 @@ sudo singularity build --sandbox centos7hepsim.sandbox centos7hepsim.img
 sudo singularity exec -B /home --writable centos7hepsim.sandbox ./install.sh
 sudo singularity build centos7hepsim.sif centos7hepsim.sandbox
 sudo rm -Rf centos7hepsim.sandbox
-```
+```
diff --git a/delphes/ntuplizer.py b/delphes/ntuplizer.py
@@ -13,7 +13,7 @@
 ROOT.gInterpreter.Declare('#include "classes/DelphesClasses.h"')
 
 #for debugging
-save_full_graphs = False
+save_full_graphs = True
 
 #0 - nothing associated
 #1 - charged hadron
@@ -138,30 +138,50 @@ def make_cand_array(cand_dict):
 
 #make (reco, gen, cand) triplets from tracks and towers
 #also return genparticles that were not associated to any reco object
-def make_triplets(g, tracks, towers, particles):
+def make_triplets(g, tracks, towers, particles, pfparticles):
     triplets = []
     remaining_particles = set(particles)
+    remaining_pfcandidates = set(pfparticles)
+
+    #loop over all reco tracks
     for t in tracks:
+
+        #for each track, find the associated GenParticle
         ptcl = None
-
         for e in g.edges(t):
             if e[1][0] == "particle":
                 ptcl = e[1]
                 break
 
+        #for each track, find the associated PFCandidate.
+        #The track does not store the PFCandidate links directly.
+        #Instead, we need to get the links to PFCandidates from the GenParticle found above.
+        #We should only look for charged PFCandidates,
+        #we assume the track makes only one genparticle, and the GenParticle makes only one charged PFCandidate
         pf_ptcl = None
         for e in g.edges(ptcl):
-            if e[1][0] in ["pfneutral", "pfcharged", "pfel", "pfphoton", "pfmu"]:
+            if e[1][0] in ["pfcharged", "pfel", "pfmu"] and e[1] in remaining_pfcandidates:
                 pf_ptcl = e[1]
                 break
+
         remaining_particles.remove(ptcl)
+
+        if pf_ptcl:
+            remaining_pfcandidates.remove(pf_ptcl)
+
         triplets.append((t, ptcl, pf_ptcl))
-
+
+    #now loop over all the reco calo towers
     for t in towers:
+
+        #get all the genparticles in the tower
         num_ptcls = 0
         ptcls, fracs = get_tower_gen_fracs(g, t)
+
+        #get the index of the highest energy deposit in the array (neutral hadron, charged hadron, photon, electron)
         imax = np.argmax(fracs)
 
+        #determine the PID based on which energy deposit is maximal
         charge = 0
         if len(ptcls) > 0:
             if imax==0:
@@ -175,7 +195,8 @@ def make_triplets(g, tracks, towers, particles):
             for ptcl in ptcls:
                 if ptcl in remaining_particles:
                     remaining_particles.remove(ptcl)
-
+
+        #add up the genparticles in the tower   
         lvs = []
         for ptcl in ptcls:
             lv = uproot_methods.TLorentzVector.from_ptetaphie(
@@ -189,6 +210,7 @@ def make_triplets(g, tracks, towers, particles):
         lv = None
         gen_ptcl = None
 
+        #determine the GenParticle to reconstruct from this tower
         if len(lvs) > 0:
             lv = sum(lvs[1:], lvs[0])
             gen_ptcl = {"pid": pid, "pt": lv.pt, "eta": lv.eta, "phi": lv.phi, "energy": lv.energy}
@@ -197,18 +219,24 @@ def make_triplets(g, tracks, towers, particles):
             if gen_ptcl["pid"] == 211 and abs(gen_ptcl["eta"]) > 2.5:
                 gen_ptcl["pid"] = 130
 
-            #we don't want to reconstruct neutral genparticles that have too low energy. the threshold is set according to the delphes PFCandidate energy distribution
+            #we don't want to reconstruct neutral genparticles that have too low energy.
+            #the threshold is set according to the delphes PFCandidate energy distribution
             if gen_ptcl["pid"] == 130 and gen_ptcl["energy"] < 9.0:
                 gen_ptcl = None
 
+        #find the PFCandidate matched to this tower.
+        #again, we need to loop over the GenParticles that are associated to the tower.
         pf_ptcl = None   
         for ptcl in ptcls:
             for e in g.edges(ptcl):
-                if e[1][0] in ["pfneutral", "pfcharged", "pfel", "pfphoton", "pfmu"]:
+                if e[1][0] in ["pfneutral", "pfcharged", "pfel", "pfphoton", "pfmu"] and e[1] in remaining_pfcandidates:
                     pf_ptcl = e[1]
                     break
+        if pf_ptcl:
+            remaining_pfcandidates.remove(pf_ptcl)
+
         triplets.append((t, gen_ptcl, pf_ptcl))
-    return triplets, list(remaining_particles)
+    return triplets, list(remaining_particles), list(remaining_pfcandidates)
 
 def process_chunk(infile, ev_start, ev_stop, outfile):
     f = ROOT.TFile.Open(infile)
@@ -272,6 +300,7 @@ def process_chunk(infile, ev_start, ev_stop, outfile):
             graph.nodes[node]["eta_outer"] = tracks[i].EtaOuter
             graph.nodes[node]["phi_outer"] = tracks[i].PhiOuter
             graph.nodes[node]["pt"] = tracks[i].PT
+            graph.nodes[node]["pid"] = tracks[i].PID
             graph.nodes[node]["charge"] = tracks[i].Charge
             ip = pileupmix_idxdict[tracks[i].Particle.GetObject()]
             graph.add_edge(("track", i), ("particle", ip))
@@ -336,16 +365,20 @@ def process_chunk(infile, ev_start, ev_stop, outfile):
 
         #write the full graph, mainly for study purposes
         # if iev<10 and save_full_graphs:
-        #     nx.readwrite.write_gpickle(graph, sys.argv[2].replace(".pkl","_graph_{}.pkl".format(iev)))
+        #     nx.readwrite.write_gpickle(graph, outfile.replace(".pkl.bz2","_graph_{}.pkl".format(iev)))
 
         #now clean up the graph, keeping only reconstructable genparticles
         #we also merge neutral genparticles within towers, as they are otherwise not reconstructable
         particles = [n for n in graph.nodes if n[0] == "particle"]
+        pfcand = [n for n in graph.nodes if n[0].startswith("pf")]
 
         tracks = [n for n in graph.nodes if n[0] == "track"]
         towers = [n for n in graph.nodes if n[0] == "tower"]
 
-        triplets, remaining_particles = make_triplets(graph, tracks, towers, particles)
+        triplets, remaining_particles, remaining_pfcandidates = make_triplets(graph, tracks, towers, particles, pfcand)
+        # print("remaining PF", len(remaining_pfcandidates))
+        # for pf in remaining_pfcandidates:
+        #     print(graph.nodes[pf])
 
         X = []
         ygen = []
@@ -392,7 +425,7 @@ def process_chunk(infile, ev_start, ev_stop, outfile):
         ycand_all.append(ycand)
 
     with bz2.BZ2File(outfile, "wb") as fi:
-        pickle.dump({"X": X_all, "ygen": ygen_all, "ygen_remaining": ygen_remaining_all, "ycand": ycand_all}, fi)
+        pickle.dump({"X": X_all, "ygen": ygen_all, "ycand": ycand_all}, fi)
 
 def process_chunk_args(args):
     process_chunk(*args)
@@ -409,7 +442,7 @@ def chunks(lst, n):
     f = ROOT.TFile.Open(infile)
     tree = f.Get("Delphes")
     num_evs = tree.GetEntries()
-    #num_evs = 5000
+    #num_evs = 1000
 
     arg_list = []
     ichunk = 0
@@ -421,5 +454,5 @@ def chunks(lst, n):
         ichunk += 1
 
     pool.map(process_chunk_args, arg_list)
-    #for arg in arg_list:
+    # for arg in arg_list:
     #    process_chunk_args(arg)
diff --git a/delphes/plots/event.pdf b/delphes/plots/event.pdf
diff --git a/delphes/plots/event.png b/delphes/plots/event.png
diff --git a/delphes/plots/num_particles.png b/delphes/plots/num_particles.png
diff --git a/delphes/plots/res_pid2.png b/delphes/plots/res_pid2.png
diff --git a/delphes/run_ntuple_qcd.sh b/delphes/run_ntuple_qcd.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+source /opt/hepsim.sh
+export LD_LIBRARY_PATH=/opt/hepsim/delphes:$LD_LIBRARY_PATH
+export ROOT_INCLUDE_PATH=/opt/hepsim/delphes:/opt/hepsim/delphes/external
+
+XDIR="out/pythia8_qcd"
+mkdir -p $XDIR
+#rm -f $XDIR/*.pkl
+
+for NUM in `seq 10 10`; do
+  INROOT="tev14_pythia8_qcd_$NUM.root"
+  OUTPKL="tev14_pythia8_qcd_$NUM.pkl.bz2"
+  python ntuplizer.py $XDIR/$INROOT $XDIR/$OUTPKL
+done
diff --git a/delphes/run_sim.sh b/delphes/run_sim.sh
@@ -13,5 +13,6 @@ mkdir -p $XDIR
 for i in `seq 0 9`; do
   nohup ./run_sim_seed.sh $i &
 done
+nohup ./run_sim_seed_qcd.sh 10 &
 
 wait
diff --git a/delphes/run_sim_seed_qcd.sh b/delphes/run_sim_seed_qcd.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+set +e
+
+NUM=$1
+
+XDIR="out/pythia8_qcd"
+mkdir -p $XDIR
+OUTROOT="tev14_pythia8_qcd_$NUM.root"
+OUT="tev14_pythia8_qcd_$NUM.promc"
+LOG="logfile_$NUM.txt"
+
+rm -f $XDIR/$OUTROOT $XDIR/$OUT
+
+source /opt/hepsim.sh
+cp tev14_pythia8_qcd.py tev14_pythia8_qcd.py.${NUM}
+echo "Random:seed=${NUM}" >> tev14_pythia8_.py.${NUM}
+./main.exe tev14_pythia8_qcd.py.${NUM} $XDIR/$OUT > $XDIR/$LOG 2>&1
+/opt/hepsim/delphes-local/DelphesProMC delphes_card_CMS_PileUp.tcl $XDIR/$OUTROOT $XDIR/$OUT >> $XDIR/$LOG 2>&1
diff --git a/delphes/tev14_pythia8_qcd.py b/delphes/tev14_pythia8_qcd.py
@@ -0,0 +1,57 @@
+# based on Pythia8_A14_NNPDF23LO_Common.py
+# and https://atlaswww.hep.anl.gov/hepsim/info.php?item=18
+# HepSim Pythia setting
+# J. Duarte and J. Pata
+# apply particle slim?
+ApplyParticleSlim=off
+#
+# Collision settings
+EventsNumber=5000
+Random:setSeed = on
+Random:seed = 0
+Beams:idA = 2212
+Beams:idB = 2212
+Beams:eCM = 14000.
+#physics processes
+HardQCD:all = on
+PhaseSpace:pTHatMin = 20
+# set top quark mass to CMS value of 172.5
+6:m0 = 172.5
+
+#
+#PDF:pSet = LHAPDF6:MSTW2008lo68cl.LHgrid
+PDF:pSet = LHAPDF6:NNPDF23_lo_as_0130_qed
+PDF:extrapolate = on
+
+Tune:ee = 7 
+Tune:pp = 14
+# PDF:useLHAPDF = on
+SpaceShower:rapidityOrder = on
+SigmaProcess:alphaSvalue = 0.140
+SpaceShower:pT0Ref = 1.56
+SpaceShower:pTmaxFudge = 0.91
+SpaceShower:pTdampFudge = 1.05
+SpaceShower:alphaSvalue = 0.127
+TimeShower:alphaSvalue = 0.127
+BeamRemnants:primordialKThard = 1.88
+MultipartonInteractions:pT0Ref = 2.09
+MultipartonInteractions:alphaSvalue = 0.126
+# BeamRemnants:reconnectRange  = 1.71
+
+#Pythia settings 
+#PhaseSpace:mHatMin = 100.
+#PhaseSpace:mHatMax = 10000
+#PhaseSpace:pTHatMin = 40
+#PhaseSpace:pTHatMax = 4000
+#set K_S, Lambda stable
+ParticleDecays:limitTau0 = on
+#Makes particles with c*tau>10 mm stable
+ParticleDecays:tau0Max = 10
+
+# fill high-pT tail and add weights to events
+#PhaseSpace:bias2Selection = on
+#PhaseSpace:bias2SelectionPow = 5.0
+
+# color reconnection
+ColourReconnection:reconnect=on
+ColourReconnection:range=1.71