Index: trunk/Sample_signal.root =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: trunk/Sample_signal.root =================================================================== --- trunk/Sample_signal.root (revision 1) +++ trunk/Sample_signal.root (revision 2) Property changes on: trunk/Sample_signal.root ___________________________________________________________________ Added: svn:mime-type ## -0,0 +1 ## +application/octet-stream \ No newline at end of property Index: trunk/Sample_background.root =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: trunk/Sample_background.root =================================================================== --- trunk/Sample_background.root (revision 1) +++ trunk/Sample_background.root (revision 2) Property changes on: trunk/Sample_background.root ___________________________________________________________________ Added: svn:mime-type ## -0,0 +1 ## +application/octet-stream \ No newline at end of property Index: trunk/TreeCreator.py =================================================================== --- trunk/TreeCreator.py (revision 0) +++ trunk/TreeCreator.py (revision 2) @@ -0,0 +1,52 @@ +import sys +import ROOT +import ObsDict +from array import array + +def processTrees( names = ["vPx", "vPy", "vPz", "vE"] ): + print "=======> Initializing TTree reprocessor..." + print "=======> Reading branches " + str(names) + "..." + + #Opening the files that will be reprocessed + #These files contain only four branches, indicated in "names" list + Signal = ROOT.TFile(sys.argv[1], "read") + Background = ROOT.TFile(sys.argv[2], "read") + Data = ROOT.TFile(sys.argv[3], "read") + fileList = {Signal:"Signal", Background:"Background", Data:"Data"} + + print "=======> Creating input TTrees..." + #Creating new files, that will be used for classification + #These files will contain new branches, which correspond to the observables indicated in ObsDict.py + fileN = 1 + for file in fileList: #Looping over the files (signal, background and data) + print "=======> File " + sys.argv[fileN] + " is being reproduced..." + outputFile = ROOT.TFile("Reproc_" + fileList[file] + ".root", "recreate") + inputJetsTree = file.Get("jetsTree") + inputBosonTree = file.Get("bosonTree") + tree = ROOT.TTree("observablesTree", "observablesTree") + varN = 0 + for var in ObsDict.obsList: #Looping over the observables we want to construct (see ObsDict.py) + print " Variable " + str(ObsDict.obsList[varN]) + " is calculating..." + value = array('f', [0.]) #Creating a branch variable + tree.Branch(var, value, var + "/F") #Creating the branch itself + if inputJetsTree.GetEntries() != inputBosonTree.GetEntries(): + raise ValueError("The number of events is different in jets and boson TTrees! Check consistency of the samples!") + for i in range(inputJetsTree.GetEntries()): + #for events in inputJetsTree: #Looping over the events in input trees + inputJetsTree.GetEntry(i) + jet1 = ROOT.TLorentzVector() #Defining three TLorentzVector jets and + jet2 = ROOT.TLorentzVector() #filling them from the input trees + jet1.SetPxPyPzE(inputJetsTree.vPx[0], inputJetsTree.vPy[0], inputJetsTree.vPz[0], inputJetsTree.vE[0]) + jet2.SetPxPyPzE(inputJetsTree.vPx[1], inputJetsTree.vPy[1], inputJetsTree.vPz[1], inputJetsTree.vE[1]) + if ( jet1.E() == - jet2.E() ) or ( jet1.Px() == jet2.Px() and jet1.Py() == jet2.Py() ) or ( jet1.Eta() == jet2.Eta() ): continue + inputBosonTree.GetEntry(i) + boson = ROOT.TLorentzVector() + boson.SetPxPyPzE(inputBosonTree.vPx[0], inputBosonTree.vPy[0], inputBosonTree.vPz[0], inputBosonTree.vE[0]) + value[0] = ObsDict.observables(jet1, jet2, boson, var) #Taking the jets' four-vectors and calculating the observable + tree.Fill() #Filling the event in new tree + varN += 1 + outputFile.Write() #Writing the branch in tree + outputFile.Close() #Close the output file + print "=======> Done" + fileN += 1 + print "=======> Input TTees have been created!" \ No newline at end of file Index: trunk/Classifier.py =================================================================== --- trunk/Classifier.py (revision 0) +++ trunk/Classifier.py (revision 2) @@ -0,0 +1,93 @@ +import sys +import ROOT +import ObsDict +import TreeCreator +from array import array +from ROOT import gROOT, TMVA, TMath, TFile, TLorentzVector + +#Main function of the classifier. +def main(): + #Check that user has indicated filenames + if sys.argv[3] is "": + raise ValueError("You must specify signal, background and data filenames!") + + #If user provided False as a fourth argument, TTree reprocessing will not be performed + if len(sys.argv) == 5: + if sys.argv[4] == "False": + pass + elif sys.argv[4] == "True": + TreeCreator.processTrees() #This function is launching TTree reprocessing, i.e. Px, Py, Pz, E -> mjj, pTjj, ... + else: + raise ValueError("You must specify the correct reprocessing flag (True\False)") + else: + raise ValueError("You must specify the reprocessing flag at the end of the command line (True/False)") + + print "=======> Launching BDT classifier..." + + #The definition of reprocessed input TTrees + trainSignal = ROOT.TFile("Reproc_Signal.root", "read") #VBF + trainBackgr = ROOT.TFile("Reproc_Background.root", "read") #ggF + Data = ROOT.TFile("Reproc_Data.root", "read") + + #Define the output file + outputFile = ROOT.TFile("Output_BDT.root", "recreate") + + #Taking the TTrees from the input files + treeSignal = trainSignal.Get("observablesTree") + treeBackgr = trainBackgr.Get("observablesTree") + treeData = Data.Get("observablesTree") + + #Define the TMVA factory object + factory = TMVA.Factory("TMVAJob", outputFile) + + #Signal and background weights that will be used for training + signalWeight = 1.0 + backgrWeight = 1.0 + + #Adding signal, background and regression trees in the factory + factory.AddSignalTree(treeSignal, signalWeight) + factory.AddBackgroundTree(treeBackgr, backgrWeight) + factory.AddRegressionTree(treeData, signalWeight) + + #Tell the factory how to use the training and testing events + #factory.PrepareTrainingAndTestTree(mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V") + + #Including all the variables that we indicated in ObsDict.py + for var in ObsDict.obsList: + factory.AddVariable(var, 'F') + factory.AddTarget(var, 'F') + + #Apply additional cuts on the signal and background samples (can be different) + mycuts = "" + mycutb = "" + + #Booking the classifier method, it is possible to book several of them at once + factory.BookMethod( TMVA.Types.kBDT, "BDTG", + "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedBoost:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=50:MaxDepth=2" ) + + #Performing a training, testing and evaluation of all methods + factory.TrainAllMethods() + factory.TestAllMethods() + factory.EvaluateAllMethods(); + + outputFile.Close(); + + print "=======> Wrote root file: " + outputFile.GetName() + print "=======> TMVAClassificationCategory is done!" + + #Launching multi-class an regression GUI interfaces to check the plots + if not gROOT.IsBatch(): + TMVA.TMVAMultiClassGui(outputFile.GetName()) + TMVA.TMVARegGui(outputFile.GetName()) + + if not gROOT.IsBatch(): + TMVA.TMVARegGui(outputFile.GetName()) + TMVA.TMVAMultiClassGui(outputFile.GetName()) + + input("=======> Opening GUI interface. Press any key to continue.") + print "=======> Done!" + + return 0 + +if __name__ == "__main__": + main() Index: trunk/Sample_data.root =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: trunk/Sample_data.root =================================================================== --- trunk/Sample_data.root (revision 1) +++ trunk/Sample_data.root (revision 2) Property changes on: trunk/Sample_data.root ___________________________________________________________________ Added: svn:mime-type ## -0,0 +1 ## +application/octet-stream \ No newline at end of property Index: trunk/ObsDict.py =================================================================== --- trunk/ObsDict.py (revision 0) +++ trunk/ObsDict.py (revision 2) @@ -0,0 +1,25 @@ +import ROOT +from ROOT import TMath + +#Names of the observables we want to have in the reprocessed files +obsList = ("pTBalance", "Mjj", "dEtajj", "dRjj", "Sin0p5dFi", "EtaCentrality", "pTCentrality", "Ejets") + +#A function, that take two jets' four-vectors as an input and +#calculates the observable based on the provided key (obsName) +def observables( jet1, jet2, boson, obsName ): + funcObsList = { + obsList[0]:TMath.Sqrt( ( jet1.Px() + jet2.Px() + boson.Px() )*( jet1.Px() + jet2.Px() + boson.Px() ) + + ( jet1.Py() + jet2.Py() + boson.Py() )*( jet1.Py() + jet2.Py() + boson.Py() ) )/( jet1.Pt() + jet2.Pt() + boson.Pt() ), + obsList[1]:(jet1 + jet2).M(), + obsList[2]:TMath.Abs( jet1.Eta() - jet2.Eta() ), + obsList[3]:TMath.Sqrt( ( jet1.Eta() - jet2.Eta() )*( jet1.Eta() - jet2.Eta() ) + + ( jet1.Phi() - jet2.Phi() )*( jet1.Phi() - jet2.Phi() ) ), + obsList[4]:TMath.Sin( TMath.Abs( ( jet1.Phi() - jet2.Phi() )/2. ) ), + obsList[5]:TMath.Abs( ( boson.Eta() - ( jet1.Eta() - jet2.Eta() )/2. )/( jet1.Eta() - jet2.Eta() ) ), + obsList[6]:TMath.Sqrt( ( boson.Px() - jet1.Px()/2. - jet1.Px()/2. )*( boson.Px() - jet1.Px()/2. - jet1.Px()/2. ) + + ( boson.Py() - jet1.Py()/2. - jet1.Py()/2. )*( boson.Py() - jet1.Py()/2. - jet1.Py()/2. ) )/TMath.Sqrt( ( jet1.Px() - jet2.Px() )*( jet1.Px() - jet2.Px() ) + + ( jet1.Py() - jet2.Py() )*( jet1.Py() - jet2.Py() ) ), + obsList[7]:( jet1.Pt() + jet2.Pt() )/( jet1.E() + jet2.E() ) + } + if obsName in obsList: return funcObsList[obsList[obsList.index(obsName)]] + else: print "Variable " + ''.join(obsName) + " is not defined!" \ No newline at end of file