Page Menu
Home
HEPForge
Search
Configure Global Search
Log In
Files
F10664249
hhprofessor.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
6 KB
Subscribers
None
hhprofessor.py
View Options
#! /usr/bin/env python
#
# Author: Hendrik Hoeth <hendrik.hoeth@cern.ch>
#
# professor is a MC tuning tool, based on the ideas described in "Tuning and
# Test of Fragmentation Models Based on Identified Particles and Precision
# Event Shape Data" (Z. Phys., C 73 (1996) 11-60) and earlier implemented and
# used by the Delphi collaboration.
#
# The method and the original program was developed at the University of
# Wuppertal. Unfortunately the original Fortran code has grown in a historic
# way, depends on naglib for the singular value decomposition, uses hbook and
# zebra for all the I/O and doesn't run on current linux distributions anymore.
# So I decided to rewrite the program in python and use the HepML format for
# reading and writing distributions.
#
# professor.py depends on
#
# - Numeric (pygsl and SciPy depend on Numeric)
# - libgsl and pygsl (for the singular value decomposition)
# - SciPy (for the chi^2 minimization)
# - PyXML (for reading the data files)
#
# 1. Read the data and the generated MC distributions
# 2. Fit the X_MC (p_1, ..., p_n) function to get the coeff. for each bin
# 3. minimise the chi^2
#
# We need at least N = 1/2 * (n (n+3) + 2) parameter sets for the fit.
from
pygsl.linalg
import
*
from
pygsl._numobj
import
*
from
scipy.optimize
import
fmin
class
Distributions
:
def
__init__
(
self
):
self
.
parameters
()
self
.
read
()
def
read
(
self
):
"""
Instead of reading real data we just generate some toy numbers.
This needs to be fixed.
"""
data
=
[]
for
i
in
range
(
len
(
self
.
params
)):
row
=
[]
for
x
in
range
(
4
):
row
.
append
(
self
.
params
[
i
][
0
]
*
x
+
self
.
params
[
i
][
1
]
*
x
**
2
)
data
.
append
(
row
)
self
.
data
=
array
(
data
,
Float
)
def
output
(
self
):
print
self
.
data
def
parameters
(
self
):
None
class
Data
(
Distributions
):
def
parameters
(
self
):
self
.
params
=
array
([[
1.
,
2.
]],
Float
)
class
MC
(
Distributions
):
def
parameters
(
self
):
self
.
params
=
array
([
[
0.9
,
2.1
],
[
0.94
,
1.95
],
[
1.0
,
1.9
],
[
1.0
,
2.03
],
[
1.05
,
2.05
],
[
1.02
,
2.04
],
[
1.1
,
2.0
]],
Float
)
class
Prediction
:
def
__init__
(
self
,
data
,
mc
):
"""
In the class "Prediction" the Monte Carlo response is modelled
as a quadratic function of the parameters which are to be tuned.
The Monte Carlo response for each bin is described as
X_mc (p_1, ..., p_n) = A_0 + \sum_1^n B_i p_i + \sum_1^n C_i p_i^2
+ \sum_1^(n-1) \sum_(i+1)^n D_ij p_i p_j
p_1, ..., p_n are the parameters. The coefficients A_0, B_i, C_i, D_ij
are calculated using a singular value decomposition in calc_coeff().
get_prediction() uses these coefficients to predict the MC response
for any parameter setting.
"""
if
(
len
(
data
.
data
[
0
,:])
!=
len
(
mc
.
data
[
0
,:])):
print
"Data and MC have different number of bins!"
self
.
nsets
=
len
(
mc
.
params
[:,
0
])
self
.
nparam
=
len
(
mc
.
params
[
0
,:])
self
.
calc_pmatrix
()
self
.
calc_coeff
()
def
calc_pmatrixrow
(
self
,
params
):
"""
calc_pmatrixrow() takes a set of parameters and returns a vector
which contains:
[1 p_1 ... p_n p_1^2 ... p_n^2 p_1*p_2 p_1*p_3 ... p_(n-1)*p_n]
"""
# 1 (for A_0)
a
=
[
1.
]
# p_i (for B_i)
for
i
in
range
(
self
.
nparam
):
a
.
append
(
params
[
i
])
# p_i^2 (for C_i)
for
i
in
range
(
self
.
nparam
):
a
.
append
(
params
[
i
]
**
2
)
# p_i * p_j (for D_ij)
for
i
in
range
(
self
.
nparam
-
1
):
for
j
in
range
(
i
+
1
,
self
.
nparam
):
a
.
append
(
params
[
i
]
*
params
[
j
])
return
a
def
calc_pmatrix
(
self
):
"""
Loop over all sets of parameters used for generating the MC. For each
set take calc_pmatrixrow and put it as a row into self.pmatrix. This
matrix will then be used as
"""
pmatrix
=
[]
for
set
in
range
(
self
.
nsets
):
a
=
self
.
calc_pmatrixrow
(
mc
.
params
[
set
,:])
pmatrix
.
append
(
a
)
self
.
pmatrix
=
array
(
pmatrix
,
Float
)
#print self.pmatrix
def
calc_coeff
(
self
):
"""
Solve pmatrix*coeff=b for each bin
coeff is the vector of coefficients (A_0, B_i, C_i, D_ij)
b is the MC response for the bin
"""
self
.
coeff
=
[]
for
bin
in
range
(
len
(
mc
.
data
[
0
,:])):
b
=
array
(
mc
.
data
[:,
bin
],
Float
)
#print b
(
u
,
v
,
s
)
=
SV_decomp
(
self
.
pmatrix
)
coeff
=
SV_solve
(
u
,
v
,
s
,
b
)
self
.
coeff
.
append
(
coeff
)
self
.
coeff
=
array
(
self
.
coeff
,
Float
)
def
get_prediction
(
self
,
bin
,
params
):
"""
get_prediction() uses the coefficients A_0, B_i, C_i and D_ij to
predict the MC response for any parameter setting.
"""
return
sum
(
self
.
calc_pmatrixrow
(
params
)
*
self
.
coeff
[
bin
])
class
Tune
:
def
__init__
(
self
,
data
,
prediction
):
"""
The class "Tune" takes data and the MC prediction and
optimizes the parameters to fit the data the best possible way.
"""
print
"Starting the optimization"
def
calc_chi2
(
self
,
params
):
"""
calc_chi2() calculates the chi^2 of the Monte Carlo
prediction wrt the data. This is minimized in optimize().
"""
# FIXME: We ignore the errors !!!!!!
sigma
=
1.
chi2
=
0.
for
bin
in
range
(
len
(
data
.
data
[
0
,:])):
Xmc
=
prediction
.
get_prediction
(
bin
,
params
)
Xdata
=
data
.
data
[
0
,
bin
]
chi2
+=
(
Xmc
-
Xdata
)
**
2
return
chi2
def
optimize
(
self
):
"""
optimize() minimizes the chi^2 to find the parameter set
that describes the data best. The used function fmin() is
part of the SciPy packages and implements a simplex algorithm.
As start value x0 for the fit we use the center of the
tuning interval.
"""
x0
=
[]
for
i
in
range
(
len
(
mc
.
params
[
0
,:])):
x0
.
append
(
1.
*
sum
(
mc
.
params
[:,
i
])
/
len
(
mc
.
params
[:,
i
]))
xopt
=
fmin
(
self
.
calc_chi2
,
x0
)
print
xopt
mc
=
MC
()
data
=
Data
()
#mc.output()
#data.output()
prediction
=
Prediction
(
data
,
mc
)
tune
=
Tune
(
data
,
prediction
)
tune
.
optimize
()
File Metadata
Details
Attached
Mime Type
text/x-python
Expires
Thu, Apr 24, 6:34 AM (1 d, 12 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
4887753
Default Alt Text
hhprofessor.py (6 KB)
Attached To
rPROFESSORSVN professorsvn
Event Timeline
Log In to Comment