diff --git a/.gitignore b/.gitignore index 1a79a52..ab3dbf2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ *.pyc FeynRules/* *.log -ehdecay.in \ No newline at end of file +ehdecay.in +# vi temp files +*.swp diff --git a/dihiggs/TStest.py b/dihiggs/TStest.py index b968ddb..ff8fe85 100755 --- a/dihiggs/TStest.py +++ b/dihiggs/TStest.py @@ -1,28 +1,43 @@ #!/usr/bin/env python import numpy as np import os from python.functions import reweighter_from_histogram_and_file from python.functions import AnalyticalReweighter #os.system("python python/functions.py") #scriptpath = "python/functions.py" # Add the directory containing your module to the Python path (wants absolute paths) #import sys #sys.path.append(os.path.abspath("python/functions.py")) #import AnalyticalReweighter import argparse parser = argparse.ArgumentParser(prog='TStest', description='Return the closest shape benchmark topology of a EFT point') # arguments to chose the (B)SM point in training and application parser.add_argument('--kl', type=float, default=1.0, help='Benchmark to calculate the limit') parser.add_argument('--kt', type=float, default=1.0, help='Benchmark to calculate the limit') parser.add_argument('--c2', type=float, default=0.0, help='Benchmark to calculate the limit') parser.add_argument('--cg', type=float, default=0.0, help='Benchmark to calculate the limit') parser.add_argument('--c2g', type=float, default=0.0, help='Benchmark to calculate the limit') args = parser.parse_args() ar = reweighter_from_histogram_and_file() -BM = ar.TS_test(args.kl, args.kt, args.c2, args.cg, args.c2g) + +# First test example: find which benchmark point is the closest to the point passed as argument +BM, TS = ar.TS_test(args.kl, args.kt, args.c2, args.cg, args.c2g) +print("closest BM is # {} with TS {}".format(BM, TS)) + +# Second test example: find point in the kl scan that is the closest to each BM point +for ibm, bm in enumerate(ar.JHEP_BM): + bm_kl, bm_kt, bm_c2, bm_cg, bm_c2g = bm + kl_scan = [] + for ikl in xrange(-200, 201, 1): + kl = ikl / 10. + kt = 1. + kl_scan.append((kl, kt, 0., 0., 0.)) + BM, TS = ar.find_closest_points(bm_kl, bm_kt, bm_c2, bm_cg, bm_c2g, kl_scan) + coordinates = [kl_scan[x] for x in BM] + print("closest to BM # {} {} are points {} with TS {}".format(ibm, bm, coordinates, TS[0])) diff --git a/dihiggs/python/functions.py b/dihiggs/python/functions.py index 21f73d1..be88602 100644 --- a/dihiggs/python/functions.py +++ b/dihiggs/python/functions.py @@ -1,174 +1,201 @@ from __future__ import print_function import numpy as np #import array as array from array import array import math -from scipy.special import factorial, gammaln +from scipy.special import gammaln #from root_numpy import hist2array class AnalyticalReweighter(object): - def __init__(self, bins, ref_effs, den_effs, bin_coefs, xs_coefs): - self.bins = bins - self.ref_effs = ref_effs - self.den_effs = den_effs - self.bin_coefs = bin_coefs - self.xs_coefs = xs_coefs - - # load benchmarks - self.klJHEP=[1.0, 7.5, 1.0, 1.0, -3.5, 1.0, 2.4, 5.0, 15.0, 1.0, 10.0, 2.4, 15.0] - self.ktJHEP=[1.0, 1.0, 1.0, 1.0, 1.5, 1.0, 1.0, 1.0, 1.0, 1.0, 1.5, 1.0, 1.0] - self.c2JHEP=[0.0, -1.0, 0.5, -1.5, -3.0, 0.0, 0.0, 0.0, 0.0, 1.0, -1.0, 0.0, 1.0] - self.cgJHEP=[0.0, 0.0, -0.8, 0.0, 0.0, 0.8, 0.2, 0.2, -1.0, -0.6, 0.0, 1.0, 0.0] - self.c2gJHEP=[0.0, 0.0, 0.6, -0.8, 0.0, -1.0, -0.2,-0.2, 1.0, 0.6, 0.0, -1.0, 0.0] - print ("initialize") - - """ - Class to do event reweighting based on a fitted analytical formula. + def __init__(self, bins, ref_effs, den_effs, bin_coefs, xs_coefs): + self.bins = bins + self.ref_effs = ref_effs + self.den_effs = den_effs + self.bin_coefs = bin_coefs + self.xs_coefs = xs_coefs + + # load benchmarks + self.klJHEP=[1.0, 7.5, 1.0, 1.0, -3.5, 1.0, 2.4, 5.0, 15.0, 1.0, 10.0, 2.4, 15.0] + self.ktJHEP=[1.0, 1.0, 1.0, 1.0, 1.5, 1.0, 1.0, 1.0, 1.0, 1.0, 1.5, 1.0, 1.0] + self.c2JHEP=[0.0, -1.0, 0.5, -1.5, -3.0, 0.0, 0.0, 0.0, 0.0, 1.0, -1.0, 0.0, 1.0] + self.cgJHEP=[0.0, 0.0, -0.8, 0.0, 0.0, 0.8, 0.2, 0.2, -1.0, -0.6, 0.0, 1.0, 0.0] + self.c2gJHEP=[0.0, 0.0, 0.6, -0.8, 0.0, -1.0, -0.2,-0.2, 1.0, 0.6, 0.0, -1.0, 0.0] + # same thing, different format for easier argument passing + self.JHEP_BM = [] + for i in xrange(len(self.klJHEP)): + self.JHEP_BM.append((self.klJHEP[i], self.ktJHEP[i], self.c2JHEP[i], self.cgJHEP[i], self.c2gJHEP[i],)) + print ("initialize") + + """ + Class to do event reweighting based on a fitted analytical formula. Args: bins (list): list of array-like with bin edges used ref_effs (array-like): multi-dim array with reference efficiency for each bin (e.g. SM). den_effs (array-like): multi-dim array with denominator efficiency for each bin (e.g. SM). bin_coefs (array-like): multi-dim array with the analytical formula coeficients per bin xs_coefs (array-like): 1D array with the coeficients for the cross sections - """ - - @staticmethod - def analytical_formula(kl, kt, c2, cg, c2g, A): - return A[0]*kt**4 + A[1]*c2**2 + (A[2]*kt**2 + A[3]*cg**2)*kl**2 \ + """ + + @staticmethod + def analytical_formula(kl, kt, c2, cg, c2g, A): + return A[0]*kt**4 + A[1]*c2**2 + (A[2]*kt**2 + A[3]*cg**2)*kl**2 \ + A[4]*c2g**2 + ( A[5]*c2 + A[6]*kt*kl )*kt**2 + (A[7]*kt*kl \ + A[8]*cg*kl )*c2 + A[9]*c2*c2g + (A[10]*cg*kl \ + A[11]*c2g)*kt**2+ (A[12]*kl*cg + A[13]*c2g )*kt*kl \ + A[14]*cg*c2g*kl - """ - Returns the R value according to the fitted analytical formula. + """ + Returns the R value according to the fitted analytical formula. Returns: An array with the evaluation of the formula for those parameters, might have to be transposed to recover the original shape. """ - - def parameter_point(self, kl, kt, c2, cg, c2g): - #precompute formula results for all bins and xs - # transpose to have coefs in external index - self.bin_results = self.analytical_formula(kl, kt, c2, cg, c2g, + + def parameter_point(self, kl, kt, c2, cg, c2g): + #precompute formula results for all bins and xs + # transpose to have coefs in external index + self.bin_results = self.analytical_formula(kl, kt, c2, cg, c2g, self.bin_coefs.T).T - self.xs_results = self.analytical_formula(kl, kt, c2, cg, c2g, + self.xs_results = self.analytical_formula(kl, kt, c2, cg, c2g, self.xs_coefs.T).T - # compute numerator efficiency and total normalization - self.num_effs = self.ref_effs*self.bin_results/self.xs_results - self.Cnorm = self.num_effs.sum() - # compute weight per bin (normalized by sum) - self.bin_weights = (self.num_effs/self.den_effs)/self.Cnorm - bin_weights_ret = (self.ref_effs*self.bin_results/self.xs_results)/self.Cnorm - return bin_weights_ret - """ Precompute all required information for a certain EFT point. + # compute numerator efficiency and total normalization + self.num_effs = self.ref_effs*self.bin_results/self.xs_results + self.Cnorm = self.num_effs.sum() + # compute weight per bin (normalized by sum) + self.bin_weights = (self.num_effs/self.den_effs)/self.Cnorm + bin_weights_ret = (self.ref_effs*self.bin_results/self.xs_results)/self.Cnorm + return bin_weights_ret + """ Precompute all required information for a certain EFT point. Note: It has to be called before find_bin and weight methods. Args: kl (float): EFT parameter kt (float): EFT parameter c2 (float): EFT parameter cg (float): EFT parameter c2g (float): EFT parameter - """ - - def TS_test(self, kl, kt, c2, cg, c2g): - print ("Calculating TS") - normEv = 1200000000 - TS = np.zeros(13) - EvByBin = np.absolute(np.rint(normEv*self.parameter_point( kl, kt, c2, cg, c2g))) - logPoint = np.float64(np.log(np.float64(gammaln(EvByBin)))) - # factorial gives overflow, so we use gammaln as approximation - for bench in range(0,13) : - EvByBinBM = np.absolute(normEv*self.parameter_point(self.klJHEP[bench], self.ktJHEP[bench], self.c2JHEP[bench],self.cgJHEP[bench], self.c2gJHEP[bench] )) - #print (bench,self.Cnorm,EvByBin.sum(),EvByBinBM.sum()) - logBM = np.log(np.float64(gammaln(EvByBinBM))) # last bins are giving negative = increase fit precision - NSumInt = np.rint((EvByBin+EvByBinBM)/2) - logSum = np.float64(np.log(np.float64(gammaln(NSumInt)))) - test = np.float64(-2*(logPoint + logBM -2*logSum )) - TS[bench] = test.sum() - #print (np.absolute(TS)) - minTS = np.argmin(TS) - print ("Closest benchmark is:",minTS) - return int(minTS) - - - def find_bin(self, variables): - bin_ids = np.empty_like(variables, dtype=np.int64) - for i_v, bins_arr in enumerate(self.bins): - # substract 1 so bins start in 0 - bin_ids[:,i_v] = np.digitize(variables[:,i_v], bins=bins_arr) - 1 - return bin_ids + """ + + + def compute_TS(self, kl_1, kt_1, c2_1, cg_1, c2g_1, kl_2, kt_2, c2_2, cg_2, c2g_2): + """ + Compute the Test Statistics as defined in https://arxiv.org/pdf/1507.02245v4.pdf + for a pair of points in the 5D EFT parameter space + """ + normEv = 1200000000 + TS = np.zeros(1) + EvByBin_1 = normEv * self.parameter_point(kl_1, kt_1, c2_1, cg_1, c2g_1) + EvByBin_1 = np.absolute(np.rint(EvByBin_1)) # restrict weights to positive integers + log_n1 = gammaln(EvByBin_1) # same as log of factorial + EvByBin_2 = normEv * self.parameter_point(kl_2, kt_2, c2_2, cg_2, c2g_2) + EvByBin_2 = np.absolute(np.rint(EvByBin_2)) + log_n2 = gammaln(EvByBin_2) # last bins are giving negative = increase fit precision + EvByBin_3 = (EvByBin_1 + EvByBin_2) / 2 + log_n3 = gammaln(EvByBin_3) + test = np.float64(log_n1 + log_n2 - 2 * log_n3) + TS = -2 * test.sum() + return TS + + + def find_closest_points(self, kl, kt, c2, cg, c2g, tuple_list): + """ + Loop over a list of points to find the closest match, with the TS as a metric + returns the list of indices within the list, and the list of distances + """ + TS = np.zeros(len(tuple_list)) + for point in xrange(len(tuple_list)): + TS[point] = self.compute_TS(kl, kt, c2, cg, c2g, *tuple_list[point]) + closestBM = np.argmax(TS) + list_allmaxs = np.where(TS == TS.max())[0].tolist() + list_allTS = [TS[p] for p in list_allmaxs] + return list_allmaxs, list_allTS + + + def TS_test(self, kl, kt, c2, cg, c2g): + """ + Loop over the list of benchmark points to find the closest match, with the TS as a metric + """ + return self.find_closest_points(kl, kt, c2, cg, c2g, self.JHEP_BM) + + + def find_bin(self, variables): + bin_ids = np.empty_like(variables, dtype=np.int64) + for i_v, bins_arr in enumerate(self.bins): + # substract 1 so bins start in 0 + bin_ids[:,i_v] = np.digitize(variables[:,i_v], bins=bins_arr) - 1 + return bin_ids """ Finds the bins corresponding to each event. Args: variable (np.array): 2-D float np.array of shape (n_events, n_rw_vars) Returns: A 2D int64 array of shape (n_events, n_rw_vars) with the indexes for each event obtained using np.digitize for each variable. """ - def weight(self, variables): - bin_ids = self.find_bin(variables) - weights = self.bin_weights[[ bin_ids[:,i_v] for i_v in range(bin_ids.shape[1])]] - """ Finds the weight corresponding to each event. + + def weight(self, variables): + bin_ids = self.find_bin(variables) + weights = self.bin_weights[[ bin_ids[:,i_v] for i_v in range(bin_ids.shape[1])]] + """ Finds the weight corresponding to each event. Args: variable (np.array): 2-D float np.array of shape (n_events, n_rw_vars) Returns: A 1D float array of shape (n_events,) with the corresponding weight per event. - """ - return weights + """ + return weights + def reweighter_from_histogram_and_file(): - bins = [([ 250., 260., 270., 280., 290., 300., 310., + bins = [([ 250., 260., 270., 280., 290., 300., 310., 320., 330., 340., 350., 360., 370., 380., 390., 400., 410., 420., 430., 440., 450., 460., 470., 480., 490., 500., 510., 520., 530., 540., 550., 560., 570., 580., 590., 600., 610., 620., 630., 640., 650., 660., 670., 680., 690., 700., 750., 800., 850., 900., 950., 1000., 1100., 1200., 1300., 1400., 1500., 1750., 2000., 50000.]), ([ 0., 0.40000001, 0.60000002, 0.80000001, 1. ])] - n_ev_V0_sum=np.loadtxt('data/n_ev_V0_sum_59_4.out') - den_effs=n_ev_V0_sum/n_ev_V0_sum.sum() - - column_names = ["n_samples"] - var_center_names = [ "mhh_center", "costhst_center"] - column_names += var_center_names - n_ev_names = ["n_ev_SM", "n_ev_JHEP_sum"] - column_names += n_ev_names - coef_names = ["A_{}".format(i) for i in range(0,15)] - column_names += coef_names - coef_err_names = ["A_err_{}".format(i) for i in range(0,15)] - column_names += coef_err_names - - data_from_file = np.genfromtxt("data/coefficientsByBin_extended_3M_costHHSim_59-4.txt", names = column_names) - - # asumming consistent ordering in text file (could digitize otherwise) - n_ev_SM = data_from_file[n_ev_names[0]].reshape(den_effs.shape) - ref_effs = n_ev_SM/n_ev_SM.sum() - - bin_coefs = data_from_file[coef_names].view(np.float)\ + n_ev_V0_sum=np.loadtxt('data/n_ev_V0_sum_59_4.out') + den_effs=n_ev_V0_sum/n_ev_V0_sum.sum() + + column_names = ["n_samples"] + var_center_names = [ "mhh_center", "costhst_center"] + column_names += var_center_names + n_ev_names = ["n_ev_SM", "n_ev_JHEP_sum"] + column_names += n_ev_names + coef_names = ["A_{}".format(i) for i in range(0,15)] + column_names += coef_names + coef_err_names = ["A_err_{}".format(i) for i in range(0,15)] + column_names += coef_err_names + + data_from_file = np.genfromtxt("data/coefficientsByBin_extended_3M_costHHSim_59-4.txt", names = column_names) + + # asumming consistent ordering in text file (could digitize otherwise) + n_ev_SM = data_from_file[n_ev_names[0]].reshape(den_effs.shape) + ref_effs = n_ev_SM/n_ev_SM.sum() + + bin_coefs = data_from_file[coef_names].view(np.float)\ .reshape(den_effs.shape+(len(coef_names),)) - # hardcoded coeficients for total cross section - xs_coefs = np.array([2.09078, 10.1517, 0.282307, 0.101205, 1.33191, + # hardcoded coeficients for total cross section + xs_coefs = np.array([2.09078, 10.1517, 0.282307, 0.101205, 1.33191, -8.51168, -1.37309, 2.82636, 1.45767, -4.91761, -0.675197, 1.86189, 0.321422, -0.836276, -0.568156]) - analytical_reweighter = AnalyticalReweighter(bins, ref_effs, den_effs, bin_coefs, xs_coefs) + analytical_reweighter = AnalyticalReweighter(bins, ref_effs, den_effs, bin_coefs, xs_coefs) - return analytical_reweighter + return analytical_reweighter