diff --git a/machine_learning_hep/analysis/analyzer_jets.py b/machine_learning_hep/analysis/analyzer_jets.py index e72343f34f..266b2c319a 100644 --- a/machine_learning_hep/analysis/analyzer_jets.py +++ b/machine_learning_hep/analysis/analyzer_jets.py @@ -404,6 +404,9 @@ def _fit_mass(self, hist, filename = None): # pylint: disable=too-many-branches,too-many-statements def fit(self): + if not self.cfg('hfjet', True): + self.logger.info("Not fitting mass distributions for inclusive jets") + return self.logger.info("Fitting inclusive mass distributions") gStyle.SetOptFit(1111) for level in self.fit_levels: @@ -738,7 +741,9 @@ def _analyze(self, method = 'sidesub'): # Signal extraction self.logger.info("Signal extraction (method %s): obs. %s, %s, ipt %d", method, var, mcordata, ipt) - if method == 'sidesub': + if not self.cfg('hfjet', True): + h = project_hist(h_in, axes_proj[1:], {}) + elif method == 'sidesub': h = self._subtract_sideband(h_in, var, mcordata, ipt) elif method == 'sigextr': h = self._extract_signal(h_in, var, mcordata, ipt) diff --git a/machine_learning_hep/data/data_run3/database_ml_parameters_JPsiJet_pp.yml b/machine_learning_hep/data/data_run3/database_ml_parameters_JPsiJet_pp.yml new file mode 100644 index 0000000000..4d4a47142a --- /dev/null +++ b/machine_learning_hep/data/data_run3/database_ml_parameters_JPsiJet_pp.yml @@ -0,0 +1,915 @@ +--- +# © Copyright CERN 2018. All rights not expressly granted are reserved. # +# Author: Gian.Michele.Innocenti@cern.ch # +# This program is free software: you can redistribute it and/or modify it # +# under the terms of the GNU General Public License as published by the # +# Free Software Foundation, either version 3 of the License, or (at your # +# option) any later version. This program is distributed in the hope that # +# it will be useful, but WITHOUT ANY WARRANTY; without even the implied # +# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # +# See the GNU General Public License for more details. # +# You should have received a copy of the GNU General Public License # +# along with this program. if not, see . # + +LcJet_pp: + doml: true + mass: 2.28646 + sel_reco_unp: "fPt>0" + sel_gen_unp: "fPt>0" + sel_cen_unp: null + sel_good_evt_unp: null # "fIsEventReject == 0" + # sel_reco_skim: ["mlPromptScore > 0.96", "mlPromptScore > 0.97", "mlPromptScore > 0.9", "mlPromptScore > 0.85", "mlPromptScore > 0.8", "mlPromptScore > 0.6", null] # (sel_skim_binmin bins) + sel_reco_skim: [null, null, null, null, null, null, null] # (sel_skim_binmin bins) + sel_gen_skim: [null, null, null, null, null, null, null] # (sel_skim_binmin bins) + sel_skim_binmin: [1, 2, 4, 6, 8, 12, 24] # skimming pt bins (sel_skim_binmin bins) + sel_skim_binmax: [2, 4, 6, 8, 12, 24, 48] # skimming pt bins (sel_skim_binmin bins) + var_binning: fPt + dofullevtmerge: false + var_cand: fCandidateSelFlag + # var_swap: fIsCandidateSwapped + bitmap_sel: + var_name: fFlagMcMatchRec + var_name_gen: fFlagMcMatchGen + var_name_origgen: fOriginMcGen + var_name_origrec: fOriginMcRec + var_isstd: isstd + var_ismcsignal: ismcsignal + var_ismcprompt: ismcprompt + var_ismcfd: ismcfd + var_ismcbkg: ismcbkg + var_ismcrefl: ismcrefl + isstd: [[1], []] + ismcsignal: [[1], []] + ismcprompt: [[0], []] + ismcfd: [[1], []] + ismcbkg: [[], [1]] + ismcrefl: [[1], [1]] # probably missing from tree creator + + #region dfs + dfs: + read: + # evtorig: + # level: all + # index: fIndexHf3PCollBases + # trees: + # O2hf3pcollbase: [fNumContrib] + # extra: + # fIsEventReject: 0 + collcnt: + trees: + O2collcount: + [ + fReadCounts, + fReadCountsWithTVX, + fReadCountsWithTVXAndZVertexAndSel8, + fReadCountsWithTVXAndZVertexAndSelMC, + ] + bccnt: + trees: + O2bccount: + [ + fReadCountsWithTVX, + fReadCountsWithTVXAndNoTFB, + fReadCountsWithTVXAndNoTFBAndNoITSROFB, + ] + + # collgen: + # level: gen + # index: fIndexLcCMCPJetCOs + # trees: + # O2lccmcpjetco: [fPosZ, fCentrality, fEventSel] + jetgen: + level: gen + index: fIndexLCCMCPJETOS + trees: + O2hf3ppbase: + [fPt, fY, fEta, fPhi, fFlagMcMatchGen, fOriginMcGen] + O2lccmcpjeto: + [ + fIndexLCCMCPJETCOS, + fIndexHF3PPBASES_0, + fJetPt, + fJetPhi, + fJetEta, + fJetNConstituents, + fJetR, + ] + O2lccmcpjetmo: + [ + fIndexArrayLCCMCDJETOS_hf, + fIndexArrayLCCMCDJETOS_geo, + fIndexArrayLCCMCDJETOS_pt, + ] + O2lccmcpjetsso: + [ + fEnergyMother, + fPtLeading, + fPtSubLeading, + fTheta, + fNSub2DR, + fNSub1, + fNSub2, + ] + tags: + isstd: { var: fFlagMcMatchGen, req: [[1], []] } + ismcsignal: + { var: fFlagMcMatchGen, req: [[1], []], abs: true } + ismcbkg: { var: fFlagMcMatchGen, req: [[], [1]], abs: true } + ismcprompt: { var: fOriginMcGen, req: [[0], []] } + ismcfd: { var: fOriginMcGen, req: [[1], []] } + filter: "fPt >= 1. and abs(fY) <= 0.8 and abs(fJetEta) < (.9 - (fJetR / 100.))" # TODO: check jet eta cut + + colldet: + level: det + index: fIndexLCCMCDJETCOS + trees: + O2lccmcdjetco: [fPosZ, fCentrality, fEventSel] + jetdet: + level: det + index: fIndexLCCMCDJETOS + trees: + O2hf3pbase: [fIndexHf3PCollBases, fPt, fY, fEta, fPhi, fM] + O2hf3pmc: [fFlagMcMatchRec, fOriginMcRec] + O2hf3ppar: + [ + fCpa, + fCpaXY, + fChi2PCA, + fDecayLength, + fDecayLengthXY, + fDecayLengthNormalised, + fDecayLengthXYNormalised, + fImpactParameter0, + fImpactParameter1, + fImpactParameter2, + fImpactParameterNormalised0, + fImpactParameterNormalised1, + fImpactParameterNormalised2, + fPtProng0, + fPtProng1, + fPtProng2, + ] + O2hf3ppare: + [ + fErrorDecayLength, + fErrorDecayLengthXY, + fErrorImpactParameter0, + fErrorImpactParameter1, + ] + O2hf3psel: [fCandidateSelFlag] + O2hf3pml: [fMlScores] + O2lccmcdjeto: + [ + fIndexLCCMCDJETCOS, + fIndexHF3PBASES_0, + fJetPt, + fJetPhi, + fJetEta, + fJetNConstituents, + fJetR, + ] + O2lccmcdjetmo: + [ + fIndexArrayLCCMCPJETOS_hf, + fIndexArrayLCCMCPJETOS_geo, + fIndexArrayLCCMCPJETOS_pt, + ] + O2lccmcdjetsso: + [ + fEnergyMother, + fPtLeading, + fPtSubLeading, + fTheta, + fNSub2DR, + fNSub1, + fNSub2, + ] + tags: + isstd: { var: fFlagMcMatchRec, req: [[1], []] } + ismcsignal: + { var: fFlagMcMatchRec, req: [[1], []], abs: true } + ismcbkg: { var: fFlagMcMatchRec, req: [[], [1]], abs: true } + ismcprompt: { var: fOriginMcRec, req: [[0], []] } + ismcfd: { var: fOriginMcRec, req: [[1], []] } + extract_component: + - { var: fMlScores, newvar: mlPromptScore, component: 1 } + filter: "fPt >= 1. and abs(fY) <= 0.8 and abs(fJetEta) < (.9 - (fJetR / 100.))" # TODO: check jet eta cut + # swap: {cand: fCandidateSelFlag, var_swap: fIsCandidateSwapped, vars: [ismcsignal, ismcprompt, icmcfd]} + + colldata: + level: data + index: fIndexDIELCJETCOS + trees: + O2dielcjetco: [fPosZ, fCentrality, fEventSel] + jetdata: + level: data + index: fIndexDIELCJETOS + trees: + O2dielcjeto: + [ + fIndexDIELCJETCOS, + fIndexRTDIELECTRONS_0, + fJetPt, + fJetPhi, + fJetEta, + fJetNConstituents, + fJetR, + ] + O2dielcjetsso: + [ + fIndexDIELCJETOS, + fEnergyMother, + fPtLeading, + fPtSubLeading, + fTheta, + fNSub2DR, + fNSub1, + fNSub2, + ] + O2rtdielectron: [fPt, fEta, fPhi, fMass] + filter: "abs(fJetEta) < (.9 - (fJetR / 100.))" # TODO: check jet eta cut + + merge: + # - {base: jetgen, ref: collgen} + - { base: jetdet, ref: colldet } + - { base: jetdata, ref: colldata } + + write: + jetgen: + level: gen + file: AnalysisResultsGen.parquet + jetdet: + level: det + file: AnalysisResultsReco.parquet + jetdata: + level: data + file: AnalysisResultsReco.parquet + # evtorig: + # level: all + # file: AnalysisResultsEvtOrig.parquet + # evt: + # level: all + # source: evtorig + # file: AnalysisResultsEvt.parquet + # filter: "fIsEventReject == 0" + collcnt: + level: all + file: AnalysisResultsCollCnt.parquet + bccnt: + level: all + file: AnalysisResultsBcCnt.parquet + + variables: + var_all: + [ + fIndexCollisions, + fFlagMcMatchRec, + fCandidateSelFlag, + fOriginMcRec, + fIsCandidateSwapped, + fNProngsContributorsPV, + fY, + fEta, + fPt, + fCpa, + fCpaXY, + fM, + fChi2PCA, + fDecayLength, + fDecayLengthXY, + fPtProng0, + fPtProng1, + fPtProng2, + fImpactParameter0, + fImpactParameter1, + fImpactParameter2, + fNSigTpcPi0, + fNSigTpcPr0, + fNSigTpcKa1, + fNSigTpcPi2, + fNSigTpcPr2, + fNSigTofPi0, + fNSigTofPr0, + fNSigTofKa1, + fNSigTofPi2, + fNSigTofPr2, + fNSigTpcTofPi0, + fNSigTpcTofPr0, + fNSigTpcTofKa1, + fNSigTpcTofPi2, + fNSigTpcTofPr2, + ] + var_training: + [ + [ + fImpactParameter0, + fImpactParameter1, + fImpactParameter2, + fCpa, + fChi2PCA, + fDecayLength, + fDecayLengthXY, + fNSigTpcTofPi0, + fNSigTpcTofPr0, + fNSigTpcTofKa1, + fNSigTpcTofPi2, + fNSigTpcTofPr2, + ], + [ + fImpactParameter0, + fImpactParameter1, + fImpactParameter2, + fCpa, + fChi2PCA, + fDecayLength, + fDecayLengthXY, + fNSigTpcTofPi0, + fNSigTpcTofPr0, + fNSigTpcTofKa1, + fNSigTpcTofPi2, + fNSigTpcTofPr2, + ], + [ + fImpactParameter0, + fImpactParameter1, + fImpactParameter2, + fCpa, + fChi2PCA, + fDecayLength, + fDecayLengthXY, + fNSigTpcTofPi0, + fNSigTpcTofPr0, + fNSigTpcTofKa1, + fNSigTpcTofPi2, + fNSigTpcTofPr2, + ], + [ + fImpactParameter0, + fImpactParameter1, + fImpactParameter2, + fCpa, + fChi2PCA, + fDecayLength, + fDecayLengthXY, + fNSigTpcTofPi0, + fNSigTpcTofPr0, + fNSigTpcTofKa1, + fNSigTpcTofPi2, + fNSigTpcTofPr2, + ], + [ + fImpactParameter0, + fImpactParameter1, + fImpactParameter2, + fCpa, + fChi2PCA, + fDecayLength, + fDecayLengthXY, + fNSigTpcTofPi0, + fNSigTpcTofPr0, + fNSigTpcTofKa1, + fNSigTpcTofPi2, + fNSigTpcTofPr2, + ], + [ + fImpactParameter0, + fImpactParameter1, + fImpactParameter2, + fCpa, + fChi2PCA, + fDecayLength, + fDecayLengthXY, + fNSigTpcTofPi0, + fNSigTpcTofPr0, + fNSigTpcTofKa1, + fNSigTpcTofPi2, + fNSigTpcTofPr2, + ], + ] + # sel_skim_binmin bins + var_boundaries: [fCosThetaStar, fPtProng] + var_correlation: + - [fCosThetaStar] # TODO: update + - [fPtProng0] + var_signal: signal + var_class: class + var_inv_mass: fM + var_y: fY + var_evt_sel: fIsEventReject + var_cuts: + - [fPtProng0, lt, null] + - [fPtProng1, lt, null] + + plot_options: + prob_cut_scan: + fPtProng0: + xlim: + - 0 + - 1 + fPtProng1: + xlim: + - 0 + - 1 + eff_cut_scan: + fPtProng0: + xlim: + - 0 + - 1 + fPtProng1: + xlim: + - 0 + - 1 + + files_names: + namefile_unmerged_tree: AO2D.root + namefile_reco: AnalysisResultsReco.parquet + namefile_evt: AnalysisResultsEvt.parquet + namefile_collcnt: AnalysisResultsCollCnt.parquet + namefile_bccnt: AnalysisResultsBcCnt.parquet + namefile_evtvalroot: AnalysisResultsROOTEvtVal.root + namefile_evtorig: AnalysisResultsEvtOrig.parquet + namefile_gen: AnalysisResultsGen.parquet + namefile_reco_applieddata: AnalysisResultsRecoAppliedData.parquet + namefile_reco_appliedmc: AnalysisResultsRecoAppliedMC.parquet + namefile_mcweights: mcweights.root + treeoutput: "Lctree" + histofilename: "masshisto.root" + efffilename: "effhisto.root" + respfilename: "resphisto.root" + crossfilename: "cross_section_tot.root" + resultfilename: "results.root" + + #region multi + multi: + data: + nprocessesparallel: 80 + maxfiles: [-1] #list of periods + chunksizeunp: [100] #list of periods + chunksizeskim: [100] #list of periods + fracmerge: [1.] #list of periods + seedmerge: [12] #list of periods + period: [LHC22o] #list of periods + select_period: [1] + prefix_dir: /data2/MLhep/real/train_300133/ + unmerged_tree_dir: [alice] #list of periods + pkl: ["${USER}/lcjet/pkl"] #list of periods + pkl_skimmed: ["${USER}/lcjet/pklsk"] #list of periods + pkl_skimmed_merge_for_ml: ["${USER}/lcjet/pklskml"] #list of periods + pkl_skimmed_merge_for_ml_all: "${USER}/lcjet/pp_data_mltot" + pkl_evtcounter_all: "${USER}/lcjet/pp_data_evttot" + mcreweights: [../Analyses] #list of periods + mc: + nprocessesparallel: 80 + maxfiles: [-1] #list of periods + chunksizeunp: [100] #list of periods + chunksizeskim: [1000] #list of periods + fracmerge: [1.] #list of periods + seedmerge: [12] #list of periods + period: [LHC24d3b] #list of periods + select_period: [1] + prefix_dir: /data2/MLhep/sim/train_257383/ + unmerged_tree_dir: [alice] + pkl: ["${USER}/lcjet/pkl"] #list of periods + pkl_skimmed: ["${USER}/lcjet/pklsk"] #list of periods + pkl_skimmed_merge_for_ml: ["${USER}/lcjet/pklskml"] #list of periods + pkl_skimmed_merge_for_ml_all: "${USER}/lcjet/pp_mc_prod_mltot" + pkl_evtcounter_all: "${USER}/lcjet/pp_mc_prod_evttot" + mcreweights: [../Analyses] #list of periods + + ml: + evtsel: fIsEventReject == 0 + triggersel: + data: null + mc: null + + nbkg: 500000 + nsig: 500000 + mult_bkg: [1, 1, 1, 1, 1, 1, 1] + nclasses: [20000, 20000] + sampletags: [0, 1] + equalise_sig_bkg: True + # sampletagforsignal: 1 + # sampletagforbkg: 0 + sel_ml: [fM < 2.22 or fM > 2.35, ismcsignal == 1 and ismcprompt == 1] + sel_bkg: fM < 2.22 or fM > 2.35 + class_labels: [bkg, sig] + nkfolds: 5 + rnd_shuffle: 12 + rnd_splt: 12 + rnd_all: 12 + test_frac: 0.2 + binmin: [1, 2, 4, 6, 8, 12, 24] # must be equal to sel_skim_binmin (sel_skim_binmin bins) + binmax: [2, 4, 6, 8, 12, 24, 36] # must be equal to sel_skim_binmax (sel_skim_binmin bins) + mltype: BinaryClassification + ncorescrossval: 10 + prefix_dir_ml: "/data2/${USER}/MLhep/" + mlplot: mlplot + mlout: mlout + + opt: + isFONLLfromROOT: true + filename_fonll: "data/fonll/DmesonLcPredictions_13TeV_y05_FFptDepLHCb_BRpythia8_PDG2020.root" # file with FONLL predictions + fonll_particle: "hLcpkpipred" + fonll_pred: "max" # edge of the FONLL prediction + FF: 0.204 # fragmentation fraction + sigma_MB: 57.8e-3 # Minimum Bias cross section (pp) 50.87e-3 [b], 1 for Pb-Pb + Taa: 1 # 23260 [b^-1] in 0-10% Pb-Pb, 3917 [b^-1] in 30-50% Pb-Pb, 1 for pp + BR: 6.24e-2 # branching ratio of the decay Lc -> p K- pi+ + f_prompt: 0.9 # estimated fraction of prompt candidates + bkg_data_fraction: 0.05 # fraction of real data used in the estimation + num_steps: 111 # number of steps used in efficiency and signif. estimation + bkg_function: pol2 # fit function for bkg (among TH1 predefined fit functions, e.g. expo, pol1, pol2, ...) + save_fit: True # save bkg fits with the various cuts on ML output + raahp: [1, 1, 1, 1, 1, 1, 1] # sel_skim_binmin bins + presel_gen_eff: "fPt > 0. and abs(fY) < 0.8" + + mlapplication: + data: + prefix_dir_app: "/data2/${USER}/" + pkl_skimmed_dec: [LHC22pp/MLapplication/prod_LHC22o/skpkldecdata] #list of periods + pkl_skimmed_decmerged: [ + LHC22pp/MLapplication/prod_LHC22o/skpkldecdatamerged, + ] #list of periods + mc: + prefix_dir_app: "/data2/${USER}/" + pkl_skimmed_dec: [LHC22pp_mc/MLapplication/prod_LHC24d3b/skpkldecmc] #list of periods + pkl_skimmed_decmerged: [ + LHC22pp_mc/MLapplication/prod_LHC24d3b/skpkldecmcmerged, + ] #list of periods + modelname: xgboost + modelsperptbin: [ + xgboost_classifierLcpKpi_dfselection_fPt_1.0_2.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_2.0_4.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_4.0_6.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_6.0_8.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_8.0_12.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_12.0_24.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_12.0_24.0.sav, + ] # sel_skim_binmin bins + probcutpresel: + data: [0.70, 0.70, 0.60, 0.60, 0.40, 0.40, 0.] # sel_skim_binmin bins + mc: [0.70, 0.70, 0.60, 0.60, 0.40, 0.40, 0.] # sel_skim_binmin bins + probcutoptimal: [0.96, 0.97, 0.90, 0.85, 0.80, 0.60, 0.] # sel_skim_binmin bins + + #region analysis + analysis: + anahptspectrum: "LctopKpi" #D0Kpi, DplusKpipi, DstarD0pi, DsKKpi, LctopKpi, LcK0Sp + fd_method: "Nb" #fc, Nb + cctype: "pp" + sigmamb: 57.8e-3 #NB: multiplied by 1e12 before giving to HFPtSpectrum! + inputfonllpred: data/fonll/DmesonLcPredictions_13TeV_y05_FFptDepLHCb_BRpythia8_PDG2020.root + dir_general_plots: "/data2/${USER}/data/analysis_plots" + + jet_obs: &jet_default + sel_an_binmin: [3, 4, 5, 6, 7, 8, 10, 12, 16] # hadron pt bins (sel_an_binmin bins) + sel_an_binmax: [4, 5, 6, 7, 8, 10, 12, 16, 24] # hadron pt bins (sel_an_binmin bins) + bins_ptjet: [5, 7, 15, 30, 50] # systematics, TODO: split rec and gen binning + bins_ptjet_eff: [2, 5, 7, 15, 30, 50, 70] # systematics, TODO: split rec and gen binning + cand_collidx: fIndexHf3PCollBases + counter_read_data: fReadCountsWithTVXAndZVertexAndSel8 + counter_read_mc: fReadCountsWithTVXAndZVertexAndSelMC + counter_tvx: fReadCountsWithTVX + xsection_inel: 59.4 # (mb) cross-section of minimum-bias events # used # systematics + lumi_scale_mc: 408 # charm enhancement factor in MC to scale the MC luminosity + branching_ratio: 6.24e-2 # used + + observables: + zg: + bins_gen_fix: [6, -.1, .5] + bins_det_fix: [6, -.1, .5] + label: "#it{z}_{g}" + label_y: "(1/#it{N}_{jet ch}) d#it{N}/d#it{z}_{g}" + nsd: + bins_gen_fix: [10, -.5, 9.5] + bins_det_fix: [10, -.5, 9.5] + label: "#it{n}_{SD}" + label_y: "(1/#it{N}_{jet ch}) d#it{N}/d#it{n}_{SD}" + rg: + bins_gen_fix: [11, -.1, 1.] + bins_det_fix: [11, -.1, 1.] + label: "#it{R}_{g}" + label_y: "(1/#it{N}_{jet ch}) d#it{N}/d#it{R}_{g}" + zpar: + # bins_gen_fix: [10, 0., 1.] + # bins_det_fix: [10, 0., 1.] + bins_gen_var: [0.4, 0.6, 0.7, 0.8, 0.9, 1.] + bins_det_var: [0.4, 0.6, 0.7, 0.8, 0.9, 1.] + label: "#it{z}_{#parallel}" + label_y: "(1/#it{N}_{jet ch}) d#it{N}/d#it{z}_{#parallel}" + dr: + bins_gen_fix: [10, 0., 1.] + bins_det_fix: [10, 0., 1.] + label: "#Delta#it{r}" + lntheta: + bins_gen_fix: [10, 0., 5.] + bins_det_fix: [10, 0., 5.] + label: "#minusln(#it{#theta})" + arraycols: [3] + lnkt: + bins_gen_fix: [10, -8., 2.] + bins_det_fix: [10, -8., 2.] + label: "ln(#it{k}_{T}/(GeV/#it{c}))" + arraycols: [3] + lntheta-lnkt: + arraycols: [3, 4] + + mass_roofit: + - level: mc + # per_ptjet: true + components: + sig: + fn: "Gaussian::peak(m[1., 5.], mean[2.27,2.29], sigma_g1[.01,.005,.035])" + wide: + fn: 'Gaussian::wide(m, mean, expr("n*sigma_g1", n[1.,5.], sigma_g1))' + model: + fn: "SUM::sig(f_peak[0.,1.]*peak, wide)" + - ptrange: [1., 5.] + range: [2.16, 2.40] + fix_params: ["n", "f_peak"] + # per_ptjet: true + components: + # sig: + # fn: 'Gaussian::sig(m, mean[2.28,2.29], sigma_g1[.005,.005,.015])' + bkg: + fn: "Polynomial::bkg(m, {a0[0.2, -3, 3], a1[-0.1, -3, 3], a2[0.1, 0.01, 3]})" + model: + fn: "SUM::sum(f_sig[0.,1.]*sig, bkg)" + - ptrange: [5., 8.] + range: [2.1, 2.48] + fix_params: ["n", "f_peak"] + # per_ptjet: true + components: + # sig: + # fn: 'Gaussian::sig(m, mean[2.28,2.29], sigma_g1[.005,.03])' + bkg: + fn: "Polynomial::bkg(m, {a0[0.2, -3, 3], a1[0.2 , -3, 3], a2[0.2, -3, 3]})" + model: + fn: "SUM::sum(f_sig[0.,1.]*sig, bkg)" + - range: [2.05, 2.5] + fix_params: ["n", "f_peak"] + # per_ptjet: true + components: + # sig: + # fn: 'Gaussian::sig(m, mean[2.28,2.29], sigma_g1[.005,.03])' + bkg: + fn: "Polynomial::bkg(m, {a0[0.2, -3, 3], a1[0.2 , -3, 3], a2[0.2, -3, 3]})" + model: + fn: "SUM::sum(f_sig[0.,1.]*sig, bkg)" + + #sidesub_per_ptjet: true + sidesub: + - regions: + left: [-5.5, -3.] # systematics + signal: [-2., 2.] # systematics + right: [3., 5.5] # systematics + + mass_fit: + func_sig: "gaus" + func_bkg: "expo" + # par_start: + # par_fix: {1: 2.286} + par_constrain: { 1: [2.28, 2.29], 2: [.005, .03] } + range: [2.08, 2.48] + mass_fit_lim: [1.9, 2.62] # histogram range of the invariant mass distribution [GeV/c^2] + bin_width: 0.001 # bin width of the invariant mass histogram + n_rebin: 3 # number of mass bins to merge + efficiency: + index_match: fIndexArrayLCCMCPJETOS_hf + extra_cols: ["mlPromptScore"] + correction_method: run3 + + unfolding_iterations: 8 # used, maximum iteration + unfolding_iterations_sel: 5 # used, selected iteration # systematics + unfolding_prior_flatness: 0. # ranges from 0. (no flatness) to 1. (flat) + + fd_folding_method: 3d + fd_root: "/data2/vkucera/powheg/trees_powheg_fd_F05_R05.root" + fd_parquet: "/data2/jklein/powheg/Lc_powheg_fd_F05_R05.parquet" + + proc_type: Jets + useperiod: [1] #list of periods + usejetptbinned_deff: false + doeff_resp: true #efficiency correction for the response matrix + unmatched_gen: true + latexnamehadron: "#Lambda_{c}^{#plus}" + latexnamedecay: "pK#pi" + var_binning2: pt_jet + var_binning2_gen: pt_gen_jet + latexbin2var: "#it{p}_{T}^{jet ch}" + sel_binmin2_reco: [7.0, 15.0, 30.0] # rec jet pt bins (sel_binmin2_reco bins) + sel_binmax2_reco: [15.0, 30.0, 50.0] # rec jet pt bins (sel_binmin2_reco bins) + sel_binmin2_gen: [7.0, 15.0, 30.0] # gen jet pt bins (sel_binmin2_gen bins) + sel_binmax2_gen: [15.0, 30.0, 50.0] # gen jet pt bins (sel_binmin2_gen bins) + var_binningshape: zg_jet + var_binningshape_gen: zg_gen_jet + var_shape_latex: "#it{z}_{g}" + sel_binminshape_reco: [-0.1, 0.1, 0.2, 0.3, 0.4] + sel_binmaxshape_reco: [0.1, 0.2, 0.3, 0.4, 0.5] + sel_binminshape_gen: [-0.1, 0.1, 0.2, 0.3, 0.4] + sel_binmaxshape_gen: [0.1, 0.2, 0.3, 0.4, 0.5] + sel_closure_frac: 0.2 + triggerbit: INT7 + #jetsel_gen: "abs(y_cand) < 0.8 and abs(z_vtx_gen) < 10 and abs(eta_jet) < 0.5" + #jetsel_sim: "abs(y_cand) < 0.8 and abs(eta_jet) < 0.5" # jet selection in simulations + #jetsel_reco: "abs(y_cand) < 0.8 and abs(z_vtx_reco) < 10 and abs(eta_jet) < 0.5" + #jetsel_gen_matched_reco: "abs(eta_gen_jet) < 5.0" + jetsel_gen: "abs(y_cand) < 0.5 and abs(z_vtx_gen) < 10 and abs(eta_jet) < 0.5" + jetsel_sim: "abs(y_cand) < 0.5 and abs(eta_jet) < 0.5" # jet selection in simulations + jetsel_reco: "abs(y_cand) < 0.5 and abs(z_vtx_reco) < 10 and abs(eta_jet) < 0.5" + jetsel_gen_matched_reco: "abs(y_cand) < 0.5 and abs(z_vtx_gen) < 10 and abs(eta_gen_jet) < 0.5" + evtsel: fIsEventReject==0 + triggersel: + data: "trigger_hasbit_INT7==1" + mc: null + data: &data_out_default + runselection: [null] #FIXME + results: [ + "/home/${USER}/mlhep/lcjet/jet_obs/default/default/data/results", + ] #list of periods + resultsallp: "/home/${USER}/mlhep/lcjet/jet_obs/default/default/data/results_all" + mc: &mc_out_default + runselection: [null, null] #FIXME + results: [ + "/home/${USER}/mlhep/lcjet/jet_obs/default/default/mc/results", + ] #list of periods + resultsallp: "/home/${USER}/mlhep/lcjet/jet_obs/default/default/mc/results_all" + data_proc: # alternative processor output used as the analyzer input + <<: *data_out_default + mc_proc: # alternative processor output used as the analyzer input + <<: *mc_out_default + + # simple fitter START + sgnfunc: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] # kGaus=0, k2Gaus=1, k2GausSigmaRatioPar=2 (sel_an_binmin bins) + bkgfunc: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] # kExpo=0, kLin=1, kPol2=2, kNoBk=3, kPow=4, kPowEx=5 (sel_an_binmin bins) + masspeak: 2.286 + massmin: [ + 1.66, + 1.66, + 1.66, + 1.66, + 1.66, + 1.66, + 1.66, + 1.66, + 1.66, + 1.66, + 1.66, + ] # sel_an_binmin bins, fit region of the invariant mass distribution [GeV/c^2] + massmax: [ + 2.06, + 2.06, + 2.06, + 2.06, + 2.06, + 2.06, + 2.06, + 2.06, + 2.06, + 2.06, + 2.06, + ] # sel_an_binmin bins, fit region of the invariant mass distribution [GeV/c^2] + rebin: [6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6] # sel_an_binmin bins + fix_mean: [ + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + ] # sel_an_binmin bins + masspeaksec: 2.286 + + # If SetArraySigma true: sigma_initial is taken from sigmaarray; false: sigma_initial is taken from MC + # If SetFixGaussianSigma true: sigma fixed to sigma_initial + # SetFixGaussianSigma: [false, false, false, false, false, false, false, false, false, false, false] # sel_an_binmin bins + SetFixGaussianSigma: [ + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + ] # sel_an_binmin bins + SetArraySigma: [ + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + ] # sel_an_binmin bins + sigmaarray: [ + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, + ] # initial sigma (sel_an_binmin bins) + + fix_sigmasec: [true, true, true, true, true, true, true, true, true] # sel_an_binmin bins + sigmaarraysec: [ + 0.007497, + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, + ] # sel_an_binmin bins + use_reflections: true + # simple fitter END + + signal_sigma: 2.0 + sigma_scale: 0.9545 + sideband_sigma_1_left: 4 + sideband_sigma_2_left: 9 + sideband_sigma_1_right: 4 + sideband_sigma_2_right: 9 + sidebandleftonly: false + + niterunfolding: 15 + niterunfoldingchosen: 4 + + doprior: false + domodeldep: false + path_modeldep: /home/nzardosh/PYTHIA_Sim/PYTHIA8_Simulations/Plots/D0_Substructure_Simulations_Output.root + + powheg_path_nonprompt: /data/POWHEG/trees_powheg_fd_central.root + + powheg_path_prompt: /data/POWHEG/trees_powheg_pr_central.root + powheg_prompt_variations_path: /data/POWHEG/trees_powheg_pr_ + powheg_prompt_variations: + [ + "F1_R05", + "F05_R1", + "F2_R1", + "F1_R2", + "F2_R2", + "F05_R05", + "Mhigh", + "Mlow", + ] + + pythia8_prompt_variations_path: /data/PYTHIA8/trees_pythia8_pr_ + pythia8_prompt_variations: ["default", "charm_lo"] #["default","colour0soft"] + pythia8_prompt_variations_legend: [ + "PYTHIA 8 (Monash)", + "PYTHIA 8 charm LO", + ] # ["PYTHIA 8 (Monash)","PYTHIA 8 SoftQCD, mode 0"] + + variations_db: database_variations_LcJet_pp_jet_obs.yml + + # Additional cuts applied before mass histogram is filled + use_cuts: True + cuts: [ + "mlPromptScore > 0.97", + "mlPromptScore > 0.9", + "mlPromptScore > 0.9", + "mlPromptScore > 0.85", + "mlPromptScore > 0.85", + "mlPromptScore > 0.8", + "mlPromptScore > 0.8", + "mlPromptScore > 0.6", + "mlPromptScore > 0.6", + ] # (sel_an_binmin bins) systematics FIXME: Update for new model. + + systematics: # used in machine_learning_hep/analysis/systematics.py + probvariation: + useperiod: [0, 0, 1] #period from where to define prob cuts + ncutvar: 10 #number of looser and tighter variations + maxperccutvar: 0.25 #max diff in efficiency for loosest/tightest var + cutvarminrange: [0.80, 0.80, 0.6, 0.3, 0.3] #Min starting point for scan + cutvarmaxrange: [0.98, 0.95, 0.95, 0.95, 0.95] #Max starting point for scan + fixedmean: True #Fix mean cutvar histo to central fit + fixedsigma: True #Fix sigma cutvar histo to central fit + mcptshape: + #FONLL / generated LHC19h4c1 + weights: [1.000000] + #From SetPtWeightsFromFONLL13overLHC17c3a12 in AliPhysics + #weights: [1.429770] + weights_min_pt: 0 + weights_max_pt: 40 + weights_bins: 400 diff --git a/machine_learning_hep/data/data_run3/database_ml_parameters_Jet_pp.yml b/machine_learning_hep/data/data_run3/database_ml_parameters_Jet_pp.yml new file mode 100644 index 0000000000..8dea33068f --- /dev/null +++ b/machine_learning_hep/data/data_run3/database_ml_parameters_Jet_pp.yml @@ -0,0 +1,1107 @@ +--- +# © Copyright CERN 2018. All rights not expressly granted are reserved. # +# Author: Gian.Michele.Innocenti@cern.ch # +# This program is free software: you can redistribute it and/or modify it # +# under the terms of the GNU General Public License as published by the # +# Free Software Foundation, either version 3 of the License, or (at your # +# option) any later version. This program is distributed in the hope that # +# it will be useful, but WITHOUT ANY WARRANTY; without even the implied # +# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # +# See the GNU General Public License for more details. # +# You should have received a copy of the GNU General Public License # +# along with this program. if not, see . # + +Jet_pp: + doml: true + mass: 1.86484 + sel_reco_unp: null + sel_reco_singletrac_unp: null + sel_gen_unp: null + sel_cen_unp: null + sel_good_evt_unp: null + sel_reco_skim: [null] # (sel_skim_binmin bins) FIXME: Update for new model. + sel_gen_skim: [null] # (sel_skim_binmin bins) FIXME: Update for new model. + sel_skim_binmin: [0] # skimming pt bins (sel_skim_binmin bins) FIXME: Update for new model. + sel_skim_binmax: [50] # skimming pt bins (sel_skim_binmin bins) FIXME: Update for new model. + var_binning: null + dofullevtmerge: false + + # obsolete, to be removed + # var_cand: fCandidateSelFlag + # # var_swap: fIsCandidateSwapped + bitmap_sel: + var_name: fFlagMcMatchRec + var_name_gen: fFlagMcMatchGen + var_name_origgen: fOriginMcGen + var_name_origrec: fOriginMcRec + var_isstd: isstd + var_ismcsignal: ismcsignal + var_ismcprompt: ismcprompt + var_ismcfd: ismcfd + var_ismcbkg: ismcbkg + var_ismcrefl: ismcrefl + isstd: [[1], []] + ismcsignal: [[0], []] + ismcprompt: [[0], [1]] + ismcfd: [[1], [0]] + ismcbkg: [[], [1]] + ismcrefl: [[1], [1]] # probably missing from tree creator + + #region dfs + dfs: + read: + # evtorig: + # index: fIndexCJetCO + # level: data + # trees: + # O2cjetco: [fPosZ] + # filter: "abs(fPosZ) < 10." + collcnt: + trees: + O2collcount: + [ + fReadCounts, + fReadCountsWithTVX, + fReadCountsWithTVXAndZVertexAndSel8, + fReadCountsWithTVXAndZVertexAndSelMC, + ] + # bccnt: + # trees: + # O2bccount: + # [ + # fReadCountsWithTVX, + # fReadCountsWithTVXAndNoTFB, + # fReadCountsWithTVXAndNoTFBAndNoITSROFB, + # ] + + collgen: # TODO: check if we can use the HF collision table instead + level: gen + index: fIndexCMCPJETCOS + trees: + O2cmcpjetco: [fPosZ, fCentrality, fEventSel] + filter: "abs(fPosZ) < 10." + jetgen: + level: gen + index: fIndexCMCPJETOS + trees: + O2cmcpjeto: + [ + fIndexCMCPJETCOS, + fJetPt, + fJetPhi, + fJetEta, + fJetNConstituents, + fJetR, + ] + O2cmcpjetmo: + [ + fIndexArrayCMCDJETOS_hf, + fIndexArrayCMCDJETOS_geo, + fIndexArrayCMCDJETOS_pt, + ] + O2cmcpjetsso: + [ + fEnergyMother, + fPtLeading, + fPtSubLeading, + fTheta, + fNSub2DR, + fNSub1, + fNSub2, + fAngularity, + fPairPt, + fPairEnergy, + fPairTheta, + ] + filter: "abs(fJetEta) < (.9 - (fJetR / 100.)) and fJetPt > 5." # TODO: check jet eta cut + extra: + fPt: 5. + fM: 1.86 + + colldet: + level: det + index: fIndexCMCDJETCOS + trees: + O2cmcdjetco: [fPosZ, fCentrality, fEventSel] + filter: "abs(fPosZ) < 10." + jetdet: + level: det + index: fIndexCMCDJETOS + trees: # add EEC columns + O2cmcdjeto: + [ + fIndexCMCDJETCOS, + fJetPt, + fJetPhi, + fJetEta, + fJetNConstituents, + fJetR, + ] + O2cmcdjetmo: + [ + fIndexArrayCMCPJETOS_hf, + fIndexArrayCMCPJETOS_geo, + fIndexArrayCMCPJETOS_pt, + ] + O2cmcdjetsso: + [ + fEnergyMother, + fPtLeading, + fPtSubLeading, + fTheta, + fNSub2DR, + fNSub1, + fNSub2, + fAngularity, + fPairPt, + fPairEnergy, + fPairTheta, + ] + filter: "abs(fJetEta) < (.9 - (fJetR / 100.))" # TODO: check jet eta cut + extra: + fPt: 5. + fM: 1.86 + + colldata: + level: data + index: fIndexCJETCOS + trees: + O2cjetco: [fPosZ, fCentrality, fEventSel] + filter: "abs(fPosZ) < 10." # systematics? + jetdata: + level: data + index: fIndexCJETOS + trees: + O2cjeto: + [ + fIndexCJETCOS, + fJetPt, + fJetPhi, + fJetEta, + fJetNConstituents, + fJetR, + ] + O2cjetsso: + [ + fEnergyMother, + fPtLeading, + fPtSubLeading, + fTheta, + fNSub2DR, + fNSub1, + fNSub2, + fAngularity, + fPairPt, + fPairEnergy, + fPairTheta, + ] + filter: "abs(fJetEta) < (.9 - (fJetR / 100.))" # TODO: check jet eta cut + extra: + fPt: 5. + fM: 1.86 + + merge: + # - { base: jetgen, ref: collgen } + - { base: jetdet, ref: colldet } + - { base: jetdata, ref: colldata } + + write: + jetgen: + level: gen + file: AnalysisResultsGen.parquet + jetdet: + level: det + file: AnalysisResultsReco.parquet + jetdata: + level: data + file: AnalysisResultsReco.parquet + colldata: + level: data + file: AnalysisResultsEvtOrig.parquet + collgen: + level: mc + file: AnalysisResultsEvtOrig.parquet + # colldata_all: + # level: data + # file: AnalysisResultsEvt.parquet + # collgen_all: + # level: mc + # file: AnalysisResultsEvt.parquet + collcnt: + level: all + file: AnalysisResultsCollCnt.parquet + # bccnt: + # level: all + # file: AnalysisResultsBcCnt.parquet + + variables: + var_all: + [ + fDecayLength, + fDecayLengthXY, + fDecayLengthNormalised, + fDecayLengthXYNormalised, + fCpa, + fCpaXY, + fImpactParameter0, + fImpactParameter1, + fErrorImpactParameter0, + fErrorImpactParameter1, + fNSigTpcPiExpPi, + fNSigTpcKaExpPi, + fNSigTpcPiExpKa, + fNSigTpcKaExpKa, + ] + var_training: + [ + [ + fDecayLength, + fDecayLengthXY, + fDecayLengthNormalised, + fDecayLengthXYNormalised, + fCpa, + fCpaXY, + fImpactParameter0, + fImpactParameter1, + fErrorImpactParameter0, + fErrorImpactParameter1, + fNSigTpcPiExpPi, + fNSigTpcKaExpPi, + fNSigTpcPiExpKa, + fNSigTpcKaExpKa, + ], + [ + fDecayLength, + fDecayLengthXY, + fDecayLengthNormalised, + fDecayLengthXYNormalised, + fCpa, + fCpaXY, + fImpactParameter0, + fImpactParameter1, + fErrorImpactParameter0, + fErrorImpactParameter1, + fNSigTpcPiExpPi, + fNSigTpcKaExpPi, + fNSigTpcPiExpKa, + fNSigTpcKaExpKa, + ], + [ + fDecayLength, + fDecayLengthXY, + fDecayLengthNormalised, + fDecayLengthXYNormalised, + fCpa, + fCpaXY, + fImpactParameter0, + fImpactParameter1, + fErrorImpactParameter0, + fErrorImpactParameter1, + fNSigTpcPiExpPi, + fNSigTpcKaExpPi, + fNSigTpcPiExpKa, + fNSigTpcKaExpKa, + ], + [ + fDecayLength, + fDecayLengthXY, + fDecayLengthNormalised, + fDecayLengthXYNormalised, + fCpa, + fCpaXY, + fImpactParameter0, + fImpactParameter1, + fErrorImpactParameter0, + fErrorImpactParameter1, + fNSigTpcPiExpPi, + fNSigTpcKaExpPi, + fNSigTpcPiExpKa, + fNSigTpcKaExpKa, + ], + [ + fDecayLength, + fDecayLengthXY, + fDecayLengthNormalised, + fDecayLengthXYNormalised, + fCpa, + fCpaXY, + fImpactParameter0, + fImpactParameter1, + fErrorImpactParameter0, + fErrorImpactParameter1, + fNSigTpcPiExpPi, + fNSigTpcKaExpPi, + fNSigTpcPiExpKa, + fNSigTpcKaExpKa, + ], + [ + fDecayLength, + fDecayLengthXY, + fDecayLengthNormalised, + fDecayLengthXYNormalised, + fCpa, + fCpaXY, + fImpactParameter0, + fImpactParameter1, + fErrorImpactParameter0, + fErrorImpactParameter1, + fNSigTpcPiExpPi, + fNSigTpcKaExpPi, + fNSigTpcPiExpKa, + fNSigTpcKaExpKa, + ], + [ + fDecayLength, + fDecayLengthXY, + fDecayLengthNormalised, + fDecayLengthXYNormalised, + fCpa, + fCpaXY, + fImpactParameter0, + fImpactParameter1, + fErrorImpactParameter0, + fErrorImpactParameter1, + fNSigTpcPiExpPi, + fNSigTpcKaExpPi, + fNSigTpcPiExpKa, + fNSigTpcKaExpKa, + ], + ] + #TODO: add new variables for dca, max_norm_d0d0exp + # sel_skim_binmin bins + var_boundaries: [fCosThetaStar, fPtProng] + var_correlation: + - [fCosThetaStar] # TODO: update + - [fPtProng0] + var_signal: signal + var_class: class + var_inv_mass: fM + var_y: fY + var_evt_sel: fIsEventReject + var_cuts: + - [fPtProng0, lt, null] + - [fPtProng1, lt, null] + + plot_options: + prob_cut_scan: + fPtProng0: + xlim: + - 0 + - 1 + fPtProng1: + xlim: + - 0 + - 1 + eff_cut_scan: + fPtProng0: + xlim: + - 0 + - 1 + fPtProng1: + xlim: + - 0 + - 1 + + files_names: + namefile_unmerged_tree: AO2D.root + namefile_reco: AnalysisResultsReco.parquet + namefile_evt: AnalysisResultsEvt.parquet + namefile_collcnt: AnalysisResultsCollCnt.parquet + # namefile_bccnt: AnalysisResultsBcCnt.parquet + namefile_evtvalroot: AnalysisResultsROOTEvtVal.root + namefile_evtorig: AnalysisResultsEvtOrig.parquet + namefile_gen: AnalysisResultsGen.parquet + namefile_reco_applieddata: AnalysisResultsRecoAppliedData.parquet + namefile_reco_appliedmc: AnalysisResultsRecoAppliedMC.parquet + namefile_mcweights: mcweights.root + treeoutput: "D0tree" + histofilename: "masshisto.root" + efffilename: "effhisto.root" + respfilename: "resphisto.root" + crossfilename: "cross_section_tot.root" + resultfilename: "results.root" + + #region multi + multi: + data: + nprocessesparallel: 10 + maxfiles: [-1] #list of periods + chunksizeunp: [100] #list of periods + chunksizeskim: [100] #list of periods + fracmerge: [.1] #list of periods + seedmerge: [12] #list of periods + period: [LHC22o] #list of periods + select_period: [1] + prefix_dir: /data2/MLhep/real/train_282844/ + unmerged_tree_dir: [/alice/] #list of periods + pkl: ["${USER}/jet/pkl"] #list of periods + pkl_skimmed: ["${USER}/jet/pklsk"] #list of periods + pkl_skimmed_merge_for_ml: ["${USER}/jet/pklskml"] #list of periods + pkl_skimmed_merge_for_ml_all: "${USER}/jet/pp_data_mltot" + pkl_evtcounter_all: "${USER}/jet/pp_data_evttot" + mcreweights: [../Analyses] #list of periods + mc: + nprocessesparallel: 80 + maxfiles: [-1] #list of periods + chunksizeunp: [100] #list of periods + chunksizeskim: [1000] #list of periods + fracmerge: [1.] #list of periods + seedmerge: [12] #list of periods + period: [LHC24d3b] #list of periods + select_period: [1] + prefix_dir: /data2/MLhep/sim/train_325030/ + unmerged_tree_dir: [alice/] + pkl: ["${USER}/jet/pkl"] #list of periods + pkl_skimmed: ["${USER}/jet/pklsk"] #list of periods + pkl_skimmed_merge_for_ml: ["${USER}/jet/pklskml"] #list of periods + pkl_skimmed_merge_for_ml_all: "${USER}/jet/pp_mc_prod_mltot" + pkl_evtcounter_all: "${USER}/jet/pp_mc_prod_evttot" + mcreweights: [../Analyses] #list of periods + + ml: + evtsel: null # TODO: fIsEventReject == 0 + triggersel: + data: null + mc: null + + nbkg: 500000 + nsig: 500000 + mult_bkg: [1, 1, 1, 1, 1, 1, 1] + nclasses: [20000, 20000] + sampletags: [0, 1] + equalise_sig_bkg: True + # sampletagforsignal: 1 + # sampletagforbkg: 0 + sel_ml: [fM<1.8 or fM>1.92, ismcprompt == 1] + sel_bkg: fM<1.8 or fM>1.92 + class_labels: [bkg, sig] + nkfolds: 5 + rnd_shuffle: 12 + rnd_splt: 12 + rnd_all: 12 + test_frac: 0.2 + binmin: [1, 2, 4, 6, 8, 12, 24] # must be equal to sel_skim_binmin (sel_skim_binmin bins) + binmax: [2, 4, 6, 8, 12, 24, 48] # must be equal to sel_skim_binmax (sel_skim_binmin bins) + mltype: BinaryClassification + ncorescrossval: 10 + prefix_dir_ml: /data2/jklein/MLhep/ + mlplot: mlplot + mlout: mlout + + opt: + isFONLLfromROOT: true + filename_fonll: "data/fonll/D0DplusDstarPredictions_13TeV_y05_all_300416_BDShapeCorrected.root" # file with FONLL predictions + fonll_particle: "hD0Kpipred" + fonll_pred: "max" # edge of the FONLL prediction + FF: 0.6086 # fragmentation fraction + sigma_MB: 57.8e-3 # Minimum Bias cross section (pp) 50.87e-3 [b], 1 for Pb-Pb + Taa: 1 # 23260 [b^-1] in 0-10% Pb-Pb, 3917 [b^-1] in 30-50% Pb-Pb, 1 for pp + BR: 3.95e-2 # branching ratio of the decay D0 -> K- pi+ + f_prompt: 0.9 # estimated fraction of prompt candidates + bkg_data_fraction: 0.1 # fraction of real data used in the estimation + num_steps: 111 # number of steps used in efficiency and signif. estimation + bkg_function: pol2 # fit function for bkg (among TH1 predefined fit functions, e.g. expo, pol1, pol2, ...) + save_fit: True # save bkg fits with the various cuts on ML output + raahp: [1, 1, 1, 1, 1, 1, 1] # sel_skim_binmin bins + # presel_gen_eff: "abs(fY) < 0.5 and abs(fPosZ) < 10" + presel_gen_eff: "fPt > 0." # TODO: fix + + mlapplication: + data: + prefix_dir_app: /data2/jklein/ + pkl_skimmed_dec: [LHC22pp/MLapplication/prod_LHC22o/skpkldecdata] #list of periods + pkl_skimmed_decmerged: [ + LHC22pp/MLapplication/prod_LHC22o/skpkldecdatamerged, + ] #list of periods + mc: + prefix_dir_app: /data2/jklein/ + pkl_skimmed_dec: [LHC22pp_mc/MLapplication/prod_LHC22b1b/skpkldecmc] #list of periods + pkl_skimmed_decmerged: [ + LHC22pp_mc/MLapplication/prod_LHC22b1b/skpkldecmcmerged, + ] #list of periods + modelname: xgboost + modelsperptbin: [ + xgboost_classifierD0pp_jet_dfselection_fPt_1.0_2.0.sav, + xgboost_classifierD0pp_jet_dfselection_fPt_2.0_4.0.sav, + xgboost_classifierD0pp_jet_dfselection_fPt_4.0_6.0.sav, + xgboost_classifierD0pp_jet_dfselection_fPt_6.0_8.0.sav, + xgboost_classifierD0pp_jet_dfselection_fPt_8.0_12.0.sav, + xgboost_classifierD0pp_jet_dfselection_fPt_12.0_24.0.sav, + xgboost_classifierD0pp_jet_dfselection_fPt_24.0_48.0.sav, + ] # sel_skim_binmin bins + probcutpresel: + data: [0.75, 0.75, 0.65, 0.65, 0.45, 0.45, 0.45, .45] # sel_skim_binmin bins + mc: [0.75, 0.75, 0.65, 0.65, 0.45, 0.45, 0.45, .45] # sel_skim_binmin bins + probcutoptimal: [0.92, 0.90, 0.82, 0.80, 0.60, 0.60, 0.60, .45] # sel_skim_binmin bins + + #region analysis + analysis: + anahptspectrum: "D0Kpi" #D0Kpi, DplusKpipi, DstarD0pi, DsKKpi, LctopKpi, LcK0Sp # used in analysis/analyzerdhadrons_mult.py + fd_method: "Nb" #fc, Nb + cctype: "pp" + inputfonllpred: data/fonll/D0DplusDstarPredictions_13TeV_y05_all_300416_BDShapeCorrected.root # used in machine_learning_hep/hf_pt_spectrum.py + dir_general_plots: /data2/jklein/data/analysis_plots + + jet_obs: &jet_default + hfjet: false + sel_an_binmin: [1] # hadron pt bins (sel_an_binmin bins) + sel_an_binmax: [48] # hadron pt bins (sel_an_binmin bins) # FIXME: move the last edge in sel_an_binmin + bins_ptjet: [5, 7, 15, 30, 50, 70] # systematics, TODO: split rec and gen binning + bins_ptjet_eff: [2, 5, 7, 15, 30, 50, 70, 90] # systematics, TODO: split rec and gen binning + # cand_collidx: fIndexHfD0CollBases + counter_read_data: fReadCountsWithTVXAndZVertexAndSel8 + counter_read_mc: fReadCountsWithTVXAndZVertexAndSelMC + counter_tvx: fReadCountsWithTVX + xsection_inel: 59.4 # (mb) cross-section of minimum-bias events # used # systematics + lumi_scale_mc: 408 # charm enhancement factor in MC to scale the MC luminosity + branching_ratio: 3.947e-2 # used + + observables: + zg: + bins_gen_var: + [-.1, .0, .05, .1, .15, .2, .25, .3, .35, .4, .45, .5] + bins_det_var: + [-.1, .0, .05, .1, .15, .2, .25, .3, .35, .4, .45, .5] + label: "#it{z}_{g}" + label_y: "(1/#it{N}_{jet ch}) d#it{N}/d#it{z}_{g}" + nsd: + bins_gen_fix: [7, -.5, 6.5] + bins_det_fix: [7, -.5, 6.5] + label: "#it{n}_{SD}" + label_y: "(1/#it{N}_{jet ch}) d#it{N}/d#it{n}_{SD}" + rg: + bins_gen_var: + [-.1, .0, .05, .1, .15, .2, .25, .3, .35, .4, .45, .5] + bins_det_var: + [-.1, .0, .05, .1, .15, .2, .25, .3, .35, .4, .45, .5] + label: "#it{R}_{g}" + label_y: "(1/#it{N}_{jet ch}) d#it{N}/d#it{R}_{g}" + # zpar: + # # bins_gen_fix: [10, 0., 1.] + # # bins_det_fix: [10, 0., 1.] + # bins_gen_var: + # [ + # 0.2, + # 0.25, + # 0.3, + # 0.35, + # 0.4, + # 0.45, + # 0.5, + # 0.55, + # 0.6, + # 0.65, + # 0.7, + # 0.75, + # 0.8, + # 0.85, + # 0.9, + # 0.95, + # 1., + # ] + # bins_det_var: + # [ + # 0.2, + # 0.25, + # 0.3, + # 0.35, + # 0.4, + # 0.45, + # 0.5, + # 0.55, + # 0.6, + # 0.65, + # 0.7, + # 0.75, + # 0.8, + # 0.85, + # 0.9, + # 0.95, + # 1., + # ] + # label: "#it{z}_{#parallel}" + # label_y: "(1/#it{N}_{jet ch}) d#it{N}/d#it{z}_{#parallel}" + # dr: + # bins_gen_fix: [10, 0., 1.] + # bins_det_fix: [10, 0., 1.] + # label: "#Delta#it{r}" + lntheta: + bins_gen_fix: [10, 0., 5.] + bins_det_fix: [10, 0., 5.] + label: "#minusln(#it{#theta})" + arraycols: [3] + lnkt: + bins_gen_fix: [10, -8., 2.] + bins_det_fix: [10, -8., 2.] + label: "ln(#it{k}_{T}/(GeV/#it{c}))" + arraycols: [3] + lntheta-lnkt: + arraycols: [3, 4] + + # data_selections: + # mcsig: + # level: mc + # query: "(isd0 & seld0) or (isd0bar & seld0bar)" + # mcrefl: + # level: mc + # query: "(isd0 & seld0bar) or (isd0bar & seld0)" + + corr_refl: false # systematics + fit_levels: ["mc", "data"] + mass_roofit: + - level: mcsig + datasel: mcsig + range: [1.69, 2.04] # systematics? or propagate bg fit uncertainty directly? + components: + sig: + fn: "Gaussian::peak(m[1.,5.], mean[1.85,1.89], sigma_g1[.01,.08])" + bkg: + fn: "Gaussian::wide(m, mean, sigma_wide[.05,1.])" + model: + fn: "SUM::sig(frac_wide[0.,.3]*wide, peak)" + - level: mcrefl + ptrange: [1., 3.] + datasel: mcrefl + range: [1.69, 2.04] + components: + refl_l: + fn: "Gaussian::refl_l(m, mean_l[1.7,1.865], sigma_l[.01,.1])" + refl_r: + fn: "Gaussian::refl_r(m, mean_r[1.865,2.0], sigma_r[.01,.1])" + model: + fn: "SUM::refl(frac_l[0.1,.9]*refl_l, refl_r)" + - level: mcrefl + ptrange: [3., 4.] + datasel: mcrefl + range: [1.68, 2.06] + components: + refl_l: + fn: "Gaussian::refl_l(m, mean_l[1.7,1.865], sigma_l[.01,.2])" + refl_r: + fn: "Gaussian::refl_r(m, mean_r[1.865,2.0], sigma_r[.01,.2])" + model: + fn: "SUM::refl(frac_l[0.,1.]*refl_l, refl_r)" + - level: mcrefl + ptrange: [4., 5.] + datasel: mcrefl + range: [1.64, 2.08] + components: + refl_l: + fn: "Gaussian::refl_l(m, mean_l[1.7,1.865], sigma_l[.01,.2])" + refl_r: + fn: "Gaussian::refl_r(m, mean_r[1.865,2.0], sigma_r[.01,.2])" + model: + fn: "SUM::refl(frac_l[0.,1.]*refl_l, refl_r)" + - level: mcrefl + ptrange: [5., 6.] + datasel: mcrefl + range: [1.64, 2.10] + components: + refl_l: + fn: "Gaussian::refl_l(m, mean_l[1.8,1.865], sigma_l[.01,.2])" + refl_r: + fn: "Gaussian::refl_r(m, mean_r[1.865,2.0], sigma_r[.01,.2])" + model: + fn: "SUM::refl(frac_l[0.,1.]*refl_l, refl_r)" + - level: mcrefl + ptrange: [6., 8.] + datasel: mcrefl + range: [1.60, 2.14] + components: + refl_l: + fn: "Gaussian::refl_l(m, mean_l[1.7,1.865], sigma_l[.01,.2])" + refl_r: + fn: "Gaussian::refl_r(m, mean_r[1.865,2.1], sigma_r[.01,.2])" + model: + fn: "SUM::refl(frac_l[0.,1.]*refl_l, refl_r)" + - level: mcrefl + ptrange: [8., 12.] + datasel: mcrefl + range: [1.52, 2.30] + components: + refl_l: + fn: "Gaussian::refl_l(m, mean_l[1.7,1.865], sigma_l[.01,.2])" + refl_r: + fn: "Gaussian::refl_r(m, mean_r[1.865,2.1], sigma_r[.01,.2])" + model: + fn: "SUM::refl(frac_l[0.,1.]*refl_l, refl_r)" + - level: mcrefl + ptrange: [12., 48.] + datasel: mcrefl + range: [1.40, 2.4] + components: + refl_l: + fn: "Gaussian::refl_l(m, mean_l[1.7,1.865], sigma_l[.01,.2])" + refl_r: + fn: "Gaussian::refl_r(m, mean_r[1.865,2.1], sigma_r[.01,.2])" + model: + fn: "SUM::refl(frac_l[0.,1.]*refl_l, refl_r)" + - level: mc + ptrange: [1., 3.] + range: [1.69, 2.04] + fix_params: + [ + "frac_l", + "mean_l", + "mean_r", + "sigma_l", + "sigma_r", + "frac_wide", + "sigma_g1", + "sigma_wide", + ] + components: + model: + fn: "SUM::sigrefl(frac_refl[0.,1.]*refl, sig)" + - level: mc + ptrange: [3., 4.] + range: [1.68, 2.06] + fix_params: + [ + "frac_l", + "mean_l", + "mean_r", + "sigma_l", + "sigma_r", + "frac_wide", + "sigma_g1", + "sigma_wide", + ] + components: + model: + fn: "SUM::sigrefl(frac_refl[0.,1.]*refl, sig)" + - level: mc + ptrange: [4., 5.] + range: [1.64, 2.08] + fix_params: + [ + "frac_l", + "mean_l", + "mean_r", + "sigma_l", + "sigma_r", + "frac_wide", + "sigma_g1", + "sigma_wide", + ] + components: + model: + fn: "SUM::sigrefl(frac_refl[0.,1.]*refl, sig)" + - level: mc + ptrange: [5., 6.] + range: [1.64, 2.10] + fix_params: + [ + "frac_l", + "mean_l", + "mean_r", + "sigma_l", + "sigma_r", + "frac_wide", + "sigma_g1", + "sigma_wide", + ] + components: + model: + fn: "SUM::sigrefl(frac_refl[0.,1.]*refl, sig)" + - level: mc + ptrange: [6., 8.] + range: [1.60, 2.14] + fix_params: + [ + "frac_l", + "mean_l", + "mean_r", + "sigma_l", + "sigma_r", + "frac_wide", + "sigma_g1", + "sigma_wide", + ] + components: + model: + fn: "SUM::sigrefl(frac_refl[0.,1.]*refl, sig)" + - level: mc + ptrange: [8., 12.] + range: [1.52, 2.30] + fix_params: + [ + "frac_l", + "mean_l", + "mean_r", + "sigma_l", + "sigma_r", + "frac_wide", + "sigma_g1", + "sigma_wide", + ] + components: + model: + fn: "SUM::sigrefl(frac_refl[0.,1.]*refl, sig)" + - level: mc + ptrange: [12., 48.] + range: [1.40, 2.40] + fix_params: + [ + "frac_l", + "mean_l", + "mean_r", + "sigma_l", + "sigma_r", + "frac_wide", + "sigma_g1", + "sigma_wide", + ] + components: + model: + fn: "SUM::sigrefl(frac_refl[0.,1.]*refl, sig)" + - ptrange: [1., 2.] + per_ptjet: true + range: [1.72, 2.02] + fix_params: ["frac_refl"] + free_params: ["sigma_g1"] + fix_params_ptjet: ["mean", "sigma_g1"] + components: + bkg: + fn: "Exponential::bkg(m, alpha[-100,0])" + model: + fn: "SUM::sum(frac[0.,1.]*sigrefl, bkg)" + - ptrange: [2., 3.] + per_ptjet: true + fix_params: ["frac_refl"] + free_params: ["sigma_g1"] + fix_params_ptjet: ["mean", "sigma_g1"] + range: [1.72, 2.04] + components: + bkg: + fn: "Exponential::bkg(m, alpha[-100,0])" + model: + fn: "SUM::sum(frac[0.,1.]*sigrefl, bkg)" + - ptrange: [3., 4.] + per_ptjet: true + fix_params: ["frac_refl"] + free_params: ["sigma_g1"] + fix_params_ptjet: ["mean", "sigma_g1"] + range: [1.72, 2.06] + components: + bkg: + fn: "Exponential::bkg(m, alpha[-100,0])" + model: + fn: "SUM::sum(frac[0.,1.]*sigrefl, bkg)" + - ptrange: [4., 5.] + per_ptjet: true + fix_params: ["frac_refl"] + free_params: ["sigma_g1"] + fix_params_ptjet: ["mean", "sigma_g1"] + # fix_params: ['mean', 'sigma_g1', 'frac_refl'] + range: [1.72, 2.08] + components: + bkg: + fn: "Exponential::bkg(m, alpha[-100,0])" + model: + fn: "SUM::sum(frac[0.,1.]*sigrefl, bkg)" + - ptrange: [5., 6.] + per_ptjet: true + fix_params: ["frac_refl"] + free_params: ["sigma_g1"] + fix_params_ptjet: ["mean", "sigma_g1"] + range: [1.72, 2.10] + components: + bkg: + fn: "Exponential::bkg(m, alpha[-100,0])" + model: + fn: "SUM::sum(frac[0.,1.]*sigrefl, bkg)" + - ptrange: [6., 8.] + per_ptjet: true + fix_params: ["frac_refl"] + free_params: ["sigma_g1"] + fix_params_ptjet: ["mean", "sigma_g1"] + range: [1.72, 2.14] + components: + bkg: + fn: "Exponential::bkg(m, alpha[-100,0])" + model: + fn: "SUM::sum(frac[0.,1.]*sigrefl, bkg)" + - range: [1.72, 2.20] + per_ptjet: true + fix_params: ["frac_refl"] + free_params: ["sigma_g1"] + fix_params_ptjet: ["mean", "sigma_g1"] + components: + bkg: + fn: "Exponential::bkg(m, alpha[-100,0])" + model: + fn: "SUM::sum(frac[0.,1.]*sigrefl, bkg)" + + sidesub_per_ptjet: true + sidesub: + - level: mc + regions: + left: [-2., -2.] + signal: [-2., 2.] + right: [3., 3.] + - ptrange: [16., 100.] + regions: + left: [-2., -2.] + signal: [-2., 2.] # systematics + right: [3., 5.] # systematics + - regions: + left: [-5.5, -3.] # systematics + signal: [-2., 2.] # systematics + right: [3., 5.5] # systematics + + # mass_fit: + # func_sig: 'gaus' + # func_bkg: 'expo' + # # par_start: {1: 1.86} + # # par_fix: {1: 1.86} + # par_constrain: {1: [1.85, 1.87], 2: [.01, .08]} + # range: [1.695, 2.06] + mass_fit_lim: [1.5, 2.22] # histogram range of the invariant mass distribution [GeV/c^2]; used in processer for binarray_mass (not used for fit range!) + bin_width: 0.001 # bin width of the invariant mass histogram # systematics? + n_rebin: 3 # number of mass bins to merge + efficiency: + # extra_cols: ["isd0", "isd0bar", "seld0", "seld0bar"] + # filter_det: "(isd0 & seld0) or (isd0bar & seld0bar)" + index_match: fIndexArrayCMCPJETOS_geo + correction_method: run3 + + unfolding_iterations: 8 # used, maximum iteration + unfolding_iterations_sel: 5 # used, selected iteration # systematics + unfolding_prior_flatness: 0. # ranges from 0. (no flatness) to 1. (flat) + + closure: + pure_signal: false + # exclude_feeddown_det: 'ismcsignal==1 & ismcprompt==1' + # exclude_feeddown_gen: 'ismcsignal_gen==1 & ismcprompt_gen==1' + # filter_reflections: '(isd0 & seld0) or (isd0bar & seld0bar)' + use_matched: true + + frac_mcana: .2 # fraction of MC sample for the closure + fd_root: "/data2/vkucera/powheg/trees_powheg_fd_central.root" # systematics + fd_parquet: "/data2/jklein/powheg/trees_powheg_fd_central.parquet" # systematics + + # obsolete? + proc_type: Jets # used + useperiod: [1] #list of periods # used + # usejetptbinned_deff: false + # doeff_resp: true #efficiency correction for the response matrix + # unmatched_gen: true + latexnamehadron: "D^{0}" + # latexnamedecay: "K^{#minus} #pi^{#plus}" + var_binning2: pt_jet + # var_binning2_gen: pt_gen_jet + latexbin2var: "#it{p}_{T}^{jet ch}" + # sel_binmin2_reco: [5, 7, 15, 30] # rec jet pt bins (sel_binmin2_reco bins) + # sel_binmax2_reco: [7, 15, 30, 50] # rec jet pt bins (sel_binmin2_reco bins) + # sel_binmin2_gen: [5, 7, 15, 30] # rec jet pt bins (sel_binmin2_reco bins) + # sel_binmax2_gen: [7, 15, 30, 50] # rec jet pt bins (sel_binmin2_reco bins) + # var_binningshape: zg_jet + # var_binningshape_gen: zg_gen_jet + # var_shape_latex: "shape" + # sel_binminshape_reco: [-0.1,0.1,0.2,0.3,0.4] + # sel_binmaxshape_reco: [0.1,0.2,0.3,0.4,0.5] + # sel_binminshape_gen: [-0.1,0.1,0.2,0.3,0.4] + # sel_binmaxshape_gen: [0.1,0.2,0.3,0.4,0.5] + # sel_closure_frac: 0.2 + # triggerbit: INT7 + #jetsel_gen: "abs(y_cand) < 0.8 and abs(z_vtx_gen) < 10 and abs(eta_jet) < 0.5" + #jetsel_sim: "abs(y_cand) < 0.8 and abs(eta_jet) < 0.5" # jet selection in simulations + #jetsel_reco: "abs(y_cand) < 0.8 and abs(z_vtx_reco) < 10 and abs(eta_jet) < 0.5" + #jetsel_gen_matched_reco: "abs(eta_gen_jet) < 5.0" + # jetsel_gen: "abs(y_cand) < 0.5 and abs(z_vtx_gen) < 10 and abs(eta_jet) < 0.5" + # jetsel_sim: "abs(y_cand) < 0.5 and abs(eta_jet) < 0.5" # jet selection in simulations + # jetsel_reco: "abs(y_cand) < 0.5 and abs(z_vtx_reco) < 10 and abs(eta_jet) < 0.5" + # jetsel_gen_matched_reco: "abs(y_cand) < 0.5 and abs(z_vtx_gen) < 10 and abs(eta_gen_jet) < 0.5" + evtsel: null # fIsEventReject==0 + triggersel: + data: "trigger_hasbit_INT7==1" + mc: null + data: &data_out_default + runselection: [null] #FIXME # used but useless + results: [ + "/home/${USER}/mlhep/jet/jet_obs/default/default/data/results", + ] #list of periods + resultsallp: "/home/${USER}/mlhep/jet/jet_obs/default/default/data/results_all" + mc: &mc_out_default + runselection: [null] #FIXME # used but useless + results: [ + "/home/${USER}/mlhep/jet/jet_obs/default/default/mc/results", + ] #list of periods + resultsallp: "/home/${USER}/mlhep/jet/jet_obs/default/default/mc/results_all" + data_proc: # alternative processor output used as the analyzer input + <<: *data_out_default + mc_proc: # alternative processor output used as the analyzer input + <<: *mc_out_default + + # simple fitter START # used in cplusutilities/mass_fitter.C + # sgnfunc: [0,0,0,0,0,0,0,0,0,0,0,0] # kGaus=0, k2Gaus=1, k2GausSigmaRatioPar=2 (sel_an_binmin bins) + # bkgfunc: [0,0,0,0,0,0,0,0,0,0,0,0] # kExpo=0, kLin=1, kPol2=2, kNoBk=3, kPow=4, kPowEx=5 (sel_an_binmin bins) + # masspeak: 1.864 + # massmin: [1.66,1.66,1.66,1.66,1.66,1.66,1.66,1.66,1.66,1.66,1.66,1.66] # sel_an_binmin bins, fit region of the invariant mass distribution [GeV/c^2] + # massmax: [2.06,2.06,2.06,2.06,2.06,2.06,2.06,2.06,2.06,2.06,2.06,2.06] # sel_an_binmin bins, fit region of the invariant mass distribution [GeV/c^2] + # rebin: [6,6,6,6,6,6,6,6,6,6,6,6] # sel_an_binmin bins + # fix_mean: [false, false, false, false, false, false, false, false, false, false, false, false] # sel_an_binmin bins + # masspeaksec: 1.864 + + # obsolete (uses Ali... fitter) + # If SetArraySigma true: sigma_initial is taken from sigmaarray; false: sigma_initial is taken from MC + # If SetFixGaussianSigma true: sigma fixed to sigma_initial + # SetFixGaussianSigma: [false, false, false, false, false, false, false, false, false, false, false, false] # sel_an_binmin bins + # SetFixGaussianSigma: [true, true, true, true, true, true, true, true, true, true, true, true] # sel_an_binmin bins + # SetArraySigma: [false, false, false, false, false, false, false, false, false, false, false, false] # sel_an_binmin bins + # sigmaarray: [0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01] # initial sigma (sel_an_binmin bins) + + # fix_sigmasec: [true, true, true, true, true, true, true, true, true, true, true, true] # sel_an_binmin bins + # sigmaarraysec: [0.007497,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01] # sel_an_binmin bins + # use_reflections: true + # simple fitter END + + # signal_sigma: 2.0 + # sigma_scale: 0.9545 + # sideband_sigma_1_left: 4 + # sideband_sigma_2_left: 9 + # sideband_sigma_1_right: 4 + # sideband_sigma_2_right: 9 + # sidebandleftonly: false + + # niterunfolding: 15 + # niterunfoldingchosen: 4 + + # doprior: false + # domodeldep: false + # path_modeldep: /home/nzardosh/PYTHIA_Sim/PYTHIA8_Simulations/Plots/D0_Substructure_Simulations_Output.root + + # replace with fd_root... + # powheg_path_nonprompt: /data/POWHEG/trees_powheg_fd_central.root + # powheg_path_prompt: /data/POWHEG/trees_powheg_pr_central.root + # powheg_prompt_variations_path: /data/POWHEG/trees_powheg_pr_ + # powheg_prompt_variations: ["F1_R05","F05_R1","F2_R1","F1_R2","F2_R2","F05_R05","Mhigh","Mlow"] + + # pythia8_prompt_variations_path: /data/PYTHIA8/trees_pythia8_pr_ + # pythia8_prompt_variations: ["default", "charm_lo"] #["default","colour0soft"] + # pythia8_prompt_variations_legend: ["PYTHIA 8 (Monash)", "PYTHIA 8 charm LO"] # ["PYTHIA 8 (Monash)","PYTHIA 8 SoftQCD, mode 0"] + + variations_db: database_variations_Jet_pp_jet_obs.yml + + # Additional cuts applied before mass histogram is filled + use_cuts: False # systematics + cuts: [null] + + systematics: # used in machine_learning_hep/analysis/systematics.py + probvariation: + useperiod: [0, 0, 1] #period from where to define prob cuts + ncutvar: 10 #number of looser and tighter variations + maxperccutvar: 0.25 #max diff in efficiency for loosest/tightest var + cutvarminrange: [0.80, 0.80, 0.6, 0.3, 0.3] #Min starting point for scan + cutvarmaxrange: [0.98, 0.95, 0.95, 0.95, 0.95] #Max starting point for scan + fixedmean: True #Fix mean cutvar histo to central fit + fixedsigma: True #Fix sigma cutvar histo to central fit + mcptshape: + #FONLL / generated LHC19h4c1 + weights: [1.000000] + #From SetPtWeightsFromFONLL13overLHC17c3a12 in AliPhysics + #weights: [1.429770] + weights_min_pt: 0 + weights_max_pt: 40 + weights_bins: 400 diff --git a/machine_learning_hep/processer.py b/machine_learning_hep/processer.py index 37f71ad8fd..0b8f9a8e17 100644 --- a/machine_learning_hep/processer.py +++ b/machine_learning_hep/processer.py @@ -111,8 +111,8 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # pylint: disab self.n_reco = datap["files_names"]["namefile_reco"] self.n_evt = datap["files_names"]["namefile_evt"] self.n_collcnt = datap["files_names"]["namefile_collcnt"] - self.n_bccnt = datap["files_names"]["namefile_bccnt"] - self.n_evtorig = datap["files_names"]["namefile_evtorig"] + self.n_bccnt = datap["files_names"].get("namefile_bccnt") + self.n_evtorig = datap["files_names"].get("namefile_evtorig") self.n_evt_count_ml = datap["files_names"].get("namefile_evt_count", "evtcount.yaml") self.n_gen = datap["files_names"]["namefile_gen"] self.n_filemass = datap["files_names"]["histofilename"] @@ -377,6 +377,7 @@ def dfuse(df_spec): if 'extra' in df_spec: self.logger.debug(' %s -> extra', df_name) for col_name, col_val in df_spec['extra'].items(): + self.logger.debug(' %s -> %s', col_name, col_val) dfs[df_name][col_name] = dfs[df_name].eval(col_val) if 'extract_component' in df_spec: self.logger.debug(' %s -> extract_component', df_name) diff --git a/machine_learning_hep/processer_jet.py b/machine_learning_hep/processer_jet.py index f50422e15a..2e32a520f6 100644 --- a/machine_learning_hep/processer_jet.py +++ b/machine_learning_hep/processer_jet.py @@ -123,20 +123,14 @@ def _verify_variables(self, dfi): def _calculate_variables(self, df, verify=False): # pylint: disable=invalid-name self.logger.info('calculating variables') if len(df) == 0: + df['nsub21'] = None + df['zg'] = None + df['rg'] = None + df['nsd'] = None + df['lnkt'] = None + df['lntheta'] = None return df - df['dr'] = np.sqrt((df.fJetEta - df.fEta)**2 + ((df.fJetPhi - df.fPhi + math.pi) % math.tau - math.pi)**2) - df['jetPx'] = df.fJetPt * np.cos(df.fJetPhi) - df['jetPy'] = df.fJetPt * np.sin(df.fJetPhi) - df['jetPz'] = df.fJetPt * np.sinh(df.fJetEta) - df['hfPx'] = df.fPt * np.cos(df.fPhi) - df['hfPy'] = df.fPt * np.sin(df.fPhi) - df['hfPz'] = df.fPt * np.sinh(df.fEta) - df['zpar_num'] = df.jetPx * df.hfPx + df.jetPy * df.hfPy + df.jetPz * df.hfPz - df['zpar_den'] = df.jetPx * df.jetPx + df.jetPy * df.jetPy + df.jetPz * df.jetPz - df['zpar'] = df.zpar_num / df.zpar_den - df[df['zpar'] >= 1.]['zpar'] = .999 # move 1 to last bin df['nsub21'] = df.fNSub2 / df.fNSub1 - self.logger.debug('zg') df['zg_array'] = np.array(.5 - abs(df.fPtSubLeading / (df.fPtLeading + df.fPtSubLeading) - .5)) zcut = self.cfg('zcut', .1) @@ -150,6 +144,20 @@ def _calculate_variables(self, df, verify=False): # pylint: disable=invalid-name (lambda ar: np.log(ar.fPtSubLeading * np.sin(ar.fTheta))), axis=1) df['lntheta'] = df['fTheta'].apply(lambda x: -np.log(x)) # df['lntheta'] = np.array(-np.log(df.fTheta)) + + if self.cfg('hfjet', True): + df['dr'] = np.sqrt((df.fJetEta - df.fEta)**2 + ((df.fJetPhi - df.fPhi + math.pi) % math.tau - math.pi)**2) + df['jetPx'] = df.fJetPt * np.cos(df.fJetPhi) + df['jetPy'] = df.fJetPt * np.sin(df.fJetPhi) + df['jetPz'] = df.fJetPt * np.sinh(df.fJetEta) + df['hfPx'] = df.fPt * np.cos(df.fPhi) + df['hfPy'] = df.fPt * np.sin(df.fPhi) + df['hfPz'] = df.fPt * np.sinh(df.fEta) + df['zpar_num'] = df.jetPx * df.hfPx + df.jetPy * df.hfPy + df.jetPz * df.hfPz + df['zpar_den'] = df.jetPx * df.jetPx + df.jetPy * df.jetPy + df.jetPz * df.jetPz + df['zpar'] = df.zpar_num / df.zpar_den + df[df['zpar'] >= 1.]['zpar'] = .999 # move 1 to last bin + self.logger.debug('done') if verify: self._verify_variables(df) @@ -165,6 +173,7 @@ def split_df(self, dfi, frac): # region histomass + # pylint: disable=too-many-branches def process_histomass_single(self, index): self.logger.info('Processing (histomass) %s', self.l_evtorig[index]) @@ -172,18 +181,20 @@ def process_histomass_single(self, index): dfevtorig = read_df(self.l_evtorig[index]) histonorm = TH1F("histonorm", "histonorm", 4, 0, 4) histonorm.SetBinContent(1, len(dfquery(dfevtorig, self.s_evtsel))) - dfcollcnt = read_df(self.l_collcnt[index]) - ser_collcnt = dfcollcnt[self.cfg(f'counter_read_{self.mcordata}')] - collcnt_read = functools.reduce(lambda x,y: float(x)+float(y), (ar[0] for ar in ser_collcnt)) - ser_collcnt = dfcollcnt[self.cfg('counter_tvx')] - collcnt_tvx = functools.reduce(lambda x,y: float(x)+float(y), (ar[0] for ar in ser_collcnt)) - dfbccnt = read_df(self.l_bccnt[index]) - ser_bccnt = dfbccnt[self.cfg('counter_tvx')] - bccnt_tvx = functools.reduce(lambda x,y: float(x)+float(y), (ar[0] for ar in ser_bccnt)) - self.logger.info('sampled %g collisions', collcnt_read) - histonorm.SetBinContent(2, collcnt_read) - histonorm.SetBinContent(3, collcnt_tvx) - histonorm.SetBinContent(4, bccnt_tvx) + if self.l_collcnt: + dfcollcnt = read_df(self.l_collcnt[index]) + ser_collcnt = dfcollcnt[self.cfg(f'counter_read_{self.mcordata}')] + collcnt_read = functools.reduce(lambda x,y: float(x)+float(y), (ar[0] for ar in ser_collcnt)) + self.logger.info('sampled %g collisions', collcnt_read) + histonorm.SetBinContent(2, collcnt_read) + ser_collcnt = dfcollcnt[self.cfg('counter_tvx')] + collcnt_tvx = functools.reduce(lambda x,y: float(x)+float(y), (ar[0] for ar in ser_collcnt)) + histonorm.SetBinContent(3, collcnt_tvx) + if self.l_bccnt: + dfbccnt = read_df(self.l_bccnt[index]) + ser_bccnt = dfbccnt[self.cfg('counter_tvx')] + bccnt_tvx = functools.reduce(lambda x,y: float(x)+float(y), (ar[0] for ar in ser_bccnt)) + histonorm.SetBinContent(4, bccnt_tvx) get_axis(histonorm, 0).SetBinLabel(1, 'N_{evt}') get_axis(histonorm, 0).SetBinLabel(2, 'N_{coll}') get_axis(histonorm, 0).SetBinLabel(3, 'N_{coll}^{TVX}') @@ -315,21 +326,26 @@ def process_efficiency_single(self, index): with TFile.Open(self.l_histoeff[index], "recreate") as rfile: # TODO: avoid hard-coding values here (check if restriction is needed at all) - cols = ['ismcprompt', 'ismcsignal', 'ismcfd', 'fPt', 'fEta', 'fPhi', 'fJetPt', 'fJetEta', 'fJetPhi', - 'fPtLeading', 'fPtSubLeading', 'fTheta', 'fNSub2DR', 'fNSub1', 'fNSub2'] + cols = ['ismcprompt', 'ismcsignal', 'ismcfd', + 'fPt', 'fEta', 'fPhi', 'fJetPt', 'fJetEta', 'fJetPhi', 'fPtLeading', 'fPtSubLeading', 'fTheta', + 'fNSub2DR', 'fNSub1', 'fNSub2'] if self.cfg('hfjet', True) else None # read generator level dfgen_orig = pd.concat(read_df(self.mptfiles_gensk[bin][index], columns=cols) for bin in self.active_bins_skim) df = self._calculate_variables(dfgen_orig) df = df.rename(lambda name: name + '_gen', axis=1) - dfgen = {'pr': df.loc[(df.ismcsignal_gen == 1) & (df.ismcprompt_gen == 1)], - 'np': df.loc[(df.ismcsignal_gen == 1) & (df.ismcfd_gen == 1)]} + if self.cfg('hfjet', True): + dfgen = {'pr': df.loc[(df.ismcsignal_gen == 1) & (df.ismcprompt_gen == 1)], + 'np': df.loc[(df.ismcsignal_gen == 1) & (df.ismcfd_gen == 1)]} + else: + dfgen = {'pr': df, 'np': df} # read detector level - cols.extend(self.cfg('efficiency.extra_cols', [])) - if idx := self.cfg('efficiency.index_match'): - cols.append(idx) + if cols: + cols.extend(self.cfg('efficiency.extra_cols', [])) + if idx := self.cfg('efficiency.index_match'): + cols.append(idx) df = pd.concat(read_df(self.mptfiles_recosk[bin][index], columns=cols) for bin in self.active_bins_skim) @@ -342,8 +358,11 @@ def process_efficiency_single(self, index): else: self.logger.warning('No matching criterion specified, cannot match det and gen') df = self._calculate_variables(df) - dfdet = {'pr': df.loc[(df.ismcsignal == 1) & (df.ismcprompt == 1)], - 'np': df.loc[(df.ismcsignal == 1) & (df.ismcfd == 1)]} + if self.cfg('hfjet', True): + dfdet = {'pr': df.loc[(df.ismcsignal == 1) & (df.ismcprompt == 1)], + 'np': df.loc[(df.ismcsignal == 1) & (df.ismcfd == 1)]} + else: + dfdet = {'pr': df, 'np': df} dfmatch = {cat: pd.merge(dfdet[cat], dfgen[cat], left_on=['df', 'idx_match'], right_index=True) for cat in cats if 'idx_match' in dfdet[cat]} diff --git a/machine_learning_hep/utilities.py b/machine_learning_hep/utilities.py index 14cb170c47..5414321a51 100644 --- a/machine_learning_hep/utilities.py +++ b/machine_learning_hep/utilities.py @@ -207,8 +207,7 @@ def seldf_singlevar(dataframe, var, minval, maxval): """ Make projection on variable using [X,Y), e.g. pT or multiplicity """ - return dataframe.loc[(dataframe[var] >= minval) & (dataframe[var] < maxval)] - + return dataframe.loc[(dataframe[var] >= minval) & (dataframe[var] < maxval)] if var is not None else dataframe def seldf_singlevar_inclusive(dataframe, var, minval, maxval): """ diff --git a/machine_learning_hep/utilities_files.py b/machine_learning_hep/utilities_files.py index 606660f032..a852f47ac8 100644 --- a/machine_learning_hep/utilities_files.py +++ b/machine_learning_hep/utilities_files.py @@ -131,6 +131,8 @@ def createlist(prefolder: str, mylistfolder: list[str], namefile: str): """ Appends base foldername + filename in list """ + if not namefile: + return [] listfiles = appendfiletolist(mylistfolder, namefile) listfiles = appendmainfoldertolist(prefolder, listfiles) return listfiles diff --git a/run_hfjets.py b/run_hfjets.py index d99c306769..10e13a3f1d 100755 --- a/run_hfjets.py +++ b/run_hfjets.py @@ -24,19 +24,24 @@ # parser.add_argument('--dryrun', '-n', action='store_true') args = parser.parse_args() -if args.case == 'd0jet': - DB = 'machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp.yml' - # DB = 'machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp_fitting_rebin_0.yml' - # DB = 'machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp_fitting_rebin_1.yml' - # DB = 'machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp_fitting_rebin_2.yml' - # DB = 'machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp_fitting_bkgfunc.yml' -elif args.case == 'd0jetr2': - DB = 'machine_learning_hep/data/data_run3/database_ml_parameters_D0pp_jet_run2cmp.yml' -elif args.case == 'lcjet': - DB = 'machine_learning_hep/data/data_run3/database_ml_parameters_LcJet_pp.yml' -else: - print(f'Unknown case <{args.case}>') - sys.exit(-1) +match args.case: + case 'jet': + DB = 'machine_learning_hep/data/data_run3/database_ml_parameters_Jet_pp.yml' + case 'd0jet': + DB = 'machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp.yml' + # DB = 'machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp_fitting_rebin_0.yml' + # DB = 'machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp_fitting_rebin_1.yml' + # DB = 'machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp_fitting_rebin_2.yml' + # DB = 'machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp_fitting_bkgfunc.yml' + case 'd0jetr2': + DB = 'machine_learning_hep/data/data_run3/database_ml_parameters_D0pp_jet_run2cmp.yml' + case 'lcjet': + DB = 'machine_learning_hep/data/data_run3/database_ml_parameters_LcJet_pp.yml' + case 'jpsijet': + DB = 'machine_learning_hep/data/data_run3/database_ml_parameters_JPsiJet_pp.yml' + case _: + print(f'Unknown case <{args.case}>') + sys.exit(-1) for step in args.steps: subprocess.run(f'mlhep -r machine_learning_hep/submission/{step}.yml ' +