To prepare the vertexing jobs and jobs for producing the n-tuples needed for the BDT training, one starts by making a list of the reconstruction output files made from the vvZ (350 GeV) stdhep files:
ls -1 /nfs/slac/g/lcd/ilc_data4/snowmass/ILC350/flavorTraining/sidloi3/slcio/reco/*.slcio > ilc350flav.lst
The following submits the jobs to the batch queue:
source subdstlst-tup-from-rec-job ilc350flav.lst
...
Code Block |
---|
makentuple-all-batch.xml:
<marlin>
<execute>
<processor name="JetClustering"/>
<processor name="MakeNtuple"/>
</execute>
<global>
<parameter name="LCIOInputFiles">
input-dst.slcio
</parameter>
<parameter name="GearXMLFile">/u/ey/homer/sidhome/lcfi/steering/clic_sid.gear</parameter>
<parameter name="MaxRecordNumber" value="-1" />
<parameter name="SkipNEvents" value="0" />
<parameter name="SupressCheck" value="false" />
<parameter name="Verbosity" options="DEBUG0-4,MESSAGE0-4,WARNING0-4,ERROR0-4,SILENT">WARNING</parameter>
</global>
<processor name="JetClustering" type="LcfiplusProcessor">
<!-- run primary and secondary vertex finders -->
<parameter name="Algorithms" type="stringVec"> JetClustering JetVertexRefiner</parameter>
<!-- general parameters -->
<parameter name="PFOCollection" type="string" value="PandoraPFOCollection" /> <!-- input PFO collection -->
<parameter name="UseMCP" type="int" value="0" /> <!-- MC info not used -->
<parameter name="MCPCollection" type="string" value="" /> <!-- not used -->
<parameter name="MCPFORelation" type="string" value="" /> <!-- not used -->
<parameter name="ReadSubdetectorEnergies" type="int" value="0"/> <!-- true for ILD -->
<parameter name="UpdateVertexRPDaughters" type="int" value="0"/> <!-- false for non-updative PandoraPFOs -->
<parameter name="MagneticField" type="float" value="5"/>
<!-- jet clustering parameters -->
<parameter name="JetClustering.InputVertexCollectionName" type="string" value="BuildUpVertex" /> <!-- vertex collections to be used in JC -->
<parameter name="JetClustering.OutputJetCollectionName" type="stringVec" value="VertexJets" /> <!-- output collection name, may be multiple -->
<parameter name="JetClustering.NJetsRequested" type="intVec" value="2" /> <!-- Multiple NJets can be specified -->
<parameter name="JetClustering.YCut" type="doubleVec" value="0." /> <!-- specify 0 if not used -->
<parameter name="JetClustering.UseMuonID" type="int" value="1" /> <!-- jet-muon ID for jet clustering -->
<parameter name="JetClustering.VertexSelectionMinimumDistance" type="double" value="0.3" /> <!-- in mm -->
<parameter name="JetClustering.VertexSelectionMaximumDistance" type="double" value="30." /> <!-- in mm -->
<parameter name="JetClustering.VertexSelectionK0MassWidth" type="double" value="0.02" /> <!-- in GeV -->
<parameter name="JetClustering.YAddedForJetVertexVertex" type="double" value="100"/> <!-- add penalty for combining vertices -->
<parameter name="JetClustering.YAddedForJetLeptonVertex" type="double" value="100"/> <!-- add penalty for combining lepton and vertex -->
<parameter name="JetClustering.YAddedForJetLeptonLepton" type="double" value="100"/> <!-- add penalty for combining leptons -->
<!-- vertex refiner parameters -->
<parameter name="JetVertexRefiner.InputJetCollectionName" type="string" value="VertexJets" />
<parameter name="JetVertexRefiner.OutputJetCollectionName" type="string" value="RefinedJets" />
<parameter name="JetVertexRefiner.PrimaryVertexCollectionName" type="string" value="PrimaryVertex" />
<parameter name="JetVertexRefiner.InputVertexCollectionName" type="string" value="BuildUpVertex" />
<parameter name="JetVertexRefiner.V0VertexCollectionName" type="string" value="BuildUpVertex_V0" />
<parameter name="JetVertexRefiner.OutputVertexCollectionName" type="string" value="RefinedVertex" />
<parameter name="JetVertexRefiner.MinPosSingle" type="double" value="0.3" />
<parameter name="JetVertexRefiner.MaxPosSingle" type="double" value="30." />
<parameter name="JetVertexRefiner.MinEnergySingle" type="double" value="1." />
<parameter name="JetVertexRefiner.MaxAngleSingle" type="double" value="0.5" />
<parameter name="JetVertexRefiner.MaxSeparationPerPosSingle" type="double" value="0.1" />
<parameter name="JetVertexRefiner.mind0sigSingle" type="double" value="5." />
<parameter name="JetVertexRefiner.minz0sigSingle" type="double" value="5." />
<parameter name="JetVertexRefiner.OneVertexProbThreshold" type="double" value="0.001" />
<parameter name="JetVertexRefiner.MaxCharmFlightLengthPerJetEnergy" type="double" value="0.1" />
</processor>
<processor name="MakeNtuple" type="LcfiplusProcessor">
<parameter name="Algorithms" type="stringVec">FlavorTag MakeNtuple</parameter>
<!-- general parameters -->
<parameter name="PFOCollection" type="string" value="PandoraPFOCollection" /> <!-- input PFO collection -->
<parameter name="UseMCP" type="int" value="0" /> <!-- MC info not used -->
<parameter name="MCPCollection" type="string" value="" /> <!-- not used -->
<parameter name="MCPFORelation" type="string" value="" /> <!-- not used -->
<parameter name="ReadSubdetectorEnergies" type="int" value="0"/> <!-- true for ILD -->
<parameter name="UpdateVertexRPDaughters" type="int" value="0"/> <!-- false for non-updative PandoraPFOs -->
<parameter name="PrimaryVertexCollectionName" type="string" value="PrimaryVertex" />
<parameter name="FlavorTag.JetCollectionName" type="string" value="RefinedJets" />
<parameter name="MakeNtuple.AuxiliaryInfo" type="int" value="-1" />
<parameter name="FlavorTag.D0ProbFileName" type="string" value="/u/ey/homer/sidhome/lcfi/steering/d0prob_zpole.root"/>
<parameter name="FlavorTag.Z0ProbFileName" type="string" value="/u/ey/homer/sidhome/lcfi/steering/z0prob_zpole.root"/>
<parameter name="MakeNtuple.OutputRootFileName" type="string" value="output-ntpl.root" />
</processor>
</marlin>
|
To do the training:
Marlin training-350.xml
where:
Code Block |
---|
training-350.xml:
<marlin>
<execute>
<processor name="MyLcfiplusProcessor"/>
</execute>
<global>
<parameter name="LCIOInputFiles"> </parameter>
<parameter name="MaxRecordNumber" value="-1" />
<parameter name="SkipNEvents" value="0" />
<parameter name="SupressCheck" value="false" />
<parameter name="GearXMLFile">clic_sid.gear</parameter>
<parameter name="Verbosity" options="DEBUG0-4,MESSAGE0-4,WARNING0-4,ERROR0-4,SILENT">WARNING</parameter>
</global>
<processor name="MyLcfiplusProcessor" type="LcfiplusProcessor">
<parameter name="Algorithms" type="stringVec">TrainMVA</parameter>
<parameter name="ReadSubdetectorEnergies" type="int" value="0"/> <!-- true for ILD -->
<parameter name="PFOCollection" type="string" value="PandoraPFOCollection" />
<parameter name="MagneticField" type="float" value="5"/>
<parameter name="FlavorTag.WeightsDirectory" type="string" value="/u/ey/homer/sidhome/lcfi/steering/weights-350" />
<parameter name="FlavorTag.WeightsPrefix" type="string" value="flavwgts" />
<parameter name="FlavorTag.BookName" type="string" value="bdt" />
<parameter name="FlavorTag.CategoryDefinition1" type="string">nvtx==0</parameter>
<parameter name="FlavorTag.CategoryPreselection1" type="string">trk1d0sig!=0</parameter>
<parameter name="FlavorTag.CategoryVariables1" type="stringVec">
trk1d0sig trk2d0sig trk1z0sig trk2z0sig trk1pt_jete trk2pt_jete jprobr jprobz
</parameter>
<parameter name="FlavorTag.CategorySpectators1" type="stringVec">
aux nvtx
</parameter>
<parameter name="FlavorTag.CategoryDefinition2" type="string">nvtx==1&&nvtxall==1</parameter>
<parameter name="FlavorTag.CategoryPreselection2" type="string">trk1d0sig!=0</parameter>
<parameter name="FlavorTag.CategoryVariables2" type="stringVec">
trk1d0sig trk2d0sig trk1z0sig trk2z0sig trk1pt_jete trk2pt_jete jprobr jprobz
vtxlen1_jete vtxsig1_jete vtxdirang1_jete vtxmom1_jete vtxmass1 vtxmult1 vtxmasspc vtxprob
d0bprob d0cprob d0qprob z0bprob z0cprob z0qprob
trkmass
</parameter>
<parameter name="FlavorTag.CategorySpectators2" type="stringVec">
aux nvtx
</parameter>
<parameter name="FlavorTag.CategoryDefinition3" type="string">nvtx==1&&nvtxall==2</parameter>
<parameter name="FlavorTag.CategoryPreselection3" type="string">trk1d0sig!=0</parameter>
<parameter name="FlavorTag.CategoryVariables3" type="stringVec">
trk1d0sig trk2d0sig trk1z0sig trk2z0sig trk1pt_jete trk2pt_jete jprobr jprobz
vtxlen1_jete vtxsig1_jete vtxdirang1_jete vtxmom1_jete vtxmass1 vtxmult1 vtxmasspc vtxprob
1vtxprob vtxlen12all_jete vtxmassall
</parameter>
<parameter name="FlavorTag.CategorySpectators3" type="stringVec">
aux nvtx
</parameter>
<parameter name="FlavorTag.CategoryDefinition4" type="string">nvtx>=2</parameter>
<parameter name="FlavorTag.CategoryPreselection4" type="string">trk1d0sig!=0</parameter>
<parameter name="FlavorTag.CategoryVariables4" type="stringVec">
trk1d0sig trk2d0sig trk1z0sig trk2z0sig trk1pt_jete trk2pt_jete jprobr jprobz
vtxlen1_jete vtxsig1_jete vtxdirang1_jete vtxmom1_jete vtxmass1 vtxmult1 vtxmasspc vtxprob
vtxlen2_jete vtxsig2_jete vtxdirang2_jete vtxmom2_jete vtxmass2 vtxmult2
vtxlen12_jete vtxsig12_jete vtxdirang12_jete vtxmom_jete vtxmass vtxmult
1vtxprob
</parameter>
<parameter name="FlavorTag.CategorySpectators4" type="stringVec">
aux nvtx
</parameter>
<parameter name="TrainMVA.Verbose" type="bool" value="true" />
<parameter name="TrainMVA.BookType" type="string" value="BDT" />
<parameter name="TrainMVA.BookOptions" type="string">
!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.50:nCuts=20:NNodesMax=8
</parameter>
<parameter name="TrainMVA.InputRootFileB" type="string" value="tupparts/bb350ntp.root" />
<parameter name="TrainMVA.InputRootFileC" type="string" value="tupparts/cc350ntp.root" />
<parameter name="TrainMVA.InputRootFileO" type="string" value="tupparts/qq350ntp.root" />
<parameter name="TrainMVA.TreeNameB" type="string" value="ntp" />
<parameter name="TrainMVA.TreeNameC" type="string" value="ntp" />
<parameter name="TrainMVA.TreeNameO" type="string" value="ntp" />
</processor>
</marlin>
|
Code Block |
---|
Results:
Code Block |
---|
The training/testing event counts used were:
[ VERBOSE "MyLcfiplusProcessor"] --- DataSetFactory : Number of training and testing events after rescaling:
[ VERBOSE "MyLcfiplusProcessor"] --- DataSetFactory : ------------------------------------------------------
[ VERBOSE "MyLcfiplusProcessor"] --- DataSetFactory : jetB -- training entries : 14418 (sum of weights: 14418)
[ VERBOSE "MyLcfiplusProcessor"] --- DataSetFactory : jetB -- testing entries : 14419 (sum of weights: 14419)
[ VERBOSE "MyLcfiplusProcessor"] --- DataSetFactory : jetB -- training and testing entries: 28837 (sum of weights: 28837)
[ VERBOSE "MyLcfiplusProcessor"] --- DataSetFactory : jetC -- training entries : 30296 (sum of weights: 30296)
[ VERBOSE "MyLcfiplusProcessor"] --- DataSetFactory : jetC -- testing entries : 30297 (sum of weights: 30297)
[ VERBOSE "MyLcfiplusProcessor"] --- DataSetFactory : jetC -- training and testing entries: 60593 (sum of weights: 60593)
[ VERBOSE "MyLcfiplusProcessor"] --- DataSetFactory : jetO -- training entries : 85074 (sum of weights: 85074)
[ VERBOSE "MyLcfiplusProcessor"] --- DataSetFactory : jetO -- testing entries : 85074 (sum of weights: 85074)
[ VERBOSE "MyLcfiplusProcessor"] --- DataSetFactory : jetO -- training and testing entries: 170148 (sum of weights: 170148)
The performance for each BDT category is shown below:
Category 0:
: : Evaluation results ranked by best signal efficiency times signal purity :
--------------------------------------------------------------------------------
: MVA Method jetB jetC jetO : --------------------------------------------------------------------------------
: bdt 0.286 0.272 0.750 : --------------------------------------------------------------------------------
Category 1:
: : Evaluation results ranked by best signal efficiency times signal purity :
--------------------------------------------------------------------------------
: MVA Method jetB jetC jetO : --------------------------------------------------------------------------------
: bdt 0.715 0.673 0.266 : --------------------------------------------------------------------------------
Category 2:
: : Evaluation results ranked by best signal efficiency times signal purity :
--------------------------------------------------------------------------------
: MVA Method jetB jetC jetO : --------------------------------------------------------------------------------
: bdt 0.940 0.264 0.151 : --------------------------------------------------------------------------------
Category 3:
: : Evaluation results ranked by best signal efficiency times signal purity :
--------------------------------------------------------------------------------
: MVA Method jetB jetC jetO : --------------------------------------------------------------------------------
: bdt 0.983 0.143 0.068 : --------------------------------------------------------------------------------
where:
[noric02] ~/sidhome/lcfi/steering $ grep CategoryDefinition training-350.xml
<parameter name="FlavorTag.CategoryDefinition1" type="string">nvtx==0</parameter>
<parameter name="FlavorTag.CategoryDefinition2" type="string">nvtx==1&&nvtxall==1</parameter>
<parameter name="FlavorTag.CategoryDefinition3" type="string">nvtx==1&&nvtxall==2</parameter>
<parameter name="FlavorTag.CategoryDefinition4" type="string">nvtx>=2</parameter>
The training output files are in:
/nfs/slac/g/lcd/mc/prj/users/homer/lcfi/steering/weights-350
total 112192
-rw-r--r-- 1 homer ey 5711212 Jun 30 21:39 flavwgts_c0_bdt.weights.xml
-rw-r--r-- 1 homer ey 522220 Jun 30 21:39 flavwgts_c0_bdt.class.C
-rw-r--r-- 1 homer ey 31472363 Jun 30 22:11 flavwgts_c0.root
-rw-r--r-- 1 homer ey 5784088 Jun 30 22:28 flavwgts_c1_bdt.weights.xml
-rw-r--r-- 1 homer ey 529682 Jun 30 22:28 flavwgts_c1_bdt.class.C
-rw-r--r-- 1 homer ey 20522889 Jun 30 22:39 flavwgts_c1.root
-rw-r--r-- 1 homer ey 5750585 Jun 30 22:46 flavwgts_c2_bdt.weights.xml
-rw-r--r-- 1 homer ey 527296 Jun 30 22:46 flavwgts_c2_bdt.class.C
-rw-r--r-- 1 homer ey 26826174 Jun 30 22:50 flavwgts_c2.root
-rw-r--r-- 1 homer ey 5660157 Jun 30 22:58 flavwgts_c3_bdt.weights.xml
-rw-r--r-- 1 homer ey 523184 Jun 30 22:58 flavwgts_c3_bdt.class.C
-rw-r--r-- 1 homer ey 10526460 Jun 30 23:02 flavwgts_c3.root
|