Page Menu
Home
HEPForge
Search
Configure Global Search
Log In
Files
F19244796
lorpe_1d_weighted.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Award Token
Flag For Later
Size
5 KB
Referenced Files
None
Subscribers
None
lorpe_1d_weighted.py
View Options
#!/usr/bin/env python3
"""
This example illustrates how to perform LOrPE on weighted unbinned samples.
The cross-validation is not yet implemented for this type of density
estimation, so this program simply shows how to perform LOrPE with
a bandwidth chosen by the plug-in method.
"""
__author__
=
"Igor Volobouev (i.volobouev@ttu.edu)"
__version__
=
"1.0"
__date__
=
"June 13 2022"
# Perform necessary imports
import
npstat
as
ns
import
numpy
as
np
from
npstat_utils
import
HistoND
from
npstat_density_estimation
import
histoPluginBandwidth1D
import
matplotlib.pyplot
as
plt
# Some parameters for the script
sampleSize
=
500
# Number of points to generate (unweighted sample size)
xmin
=
0.0
# Left edge of the sample histogram
xmax
=
1.0
# Right edge of the sample histogram
numBins
=
50
# Number of bins for the sample histogram
symbetaPower
=
4
# Power parameter of the symmetric beta weight function
# w.r.t. which the orthogonal polynomial system will
# be generated. Use -1 to employ a Gaussian instead.
filterDegree
=
1.5
# Polynomial degree for the local expansion. This
# degree is just a guess here. Must be non-negative
# and does not have to be an integer. Polynomial degree
# of 0 corresponds to KDE with boundary kernels.
nDiscrete
=
1000
# Number of intervals for displaying the results.
normalize
=
True
# Do we want to normalize the density estimate?
# Here, this only ensures that the integral is 1
# and does not check for negative densities.
nIntegSplits
=
50
# Number of intervals to use for various quadratures.
nIntegPoints
=
4
# Number of Gauss-Legendre integration points to use
# on each interval for various quadratures.
# Here, we will use a uniform distribution on [xmin, xmax]
# in order to generate the original, unweighted random sample
d1
=
ns
.
Uniform1D
(
xmin
,
xmax
-
xmin
)
# Generate a random sample according to this density
rng
=
ns
.
MersenneTwister
()
sample
=
d1
.
generate
(
rng
,
sampleSize
)
# Assign a weight to each sample point. For the weight, we will
# use a mixture of a uniform and a Gaussian distribution.
truncatedGauss
=
ns
.
TruncatedDistribution1D
(
ns
.
Gauss1D
(
0.2
,
0.1
),
xmin
,
xmax
)
weightDistro
=
ns
.
DistributionMix1D
()
weightDistro
.
add
(
truncatedGauss
,
0.7
)
weightDistro
.
add
(
d1
,
0.3
)
weights
=
[
weightDistro
.
density
(
x
)
for
x
in
sample
]
# Create an object which combines the coordinates and the weights
weightedSample
=
list
(
zip
(
sample
,
weights
))
# Histogram for displaying the weighted sample. Note that
# the type of bins is double rather than integer.
h1
=
HistoND
(
"double"
,
ns
.
HistoAxis
(
numBins
,
xmin
,
xmax
,
"X"
),
"Sample Histogram"
,
"Weight"
)
h1
.
fill
(
weightedSample
)
# Choose boundary handling method. Only two methods are currently
# supported for unbinned LOrPE:
#
# "BM_TRUNCATE" -- Just truncate the weight function at the boundary.
#
# "BM_FOLD" -- Reflect (fold) the weight function at the boundary and
# add to the non-reflected part.
#
# It is not obvious a priori which boundary handling method will perform
# best for any particular case, so it might be useful to try different ones.
bh
=
ns
.
BoundaryHandling
(
"BM_TRUNCATE"
)
# Create the object that will be used to estimate the density
lorpeKernel
=
ns
.
LOrPE1DSymbetaKernel
(
symbetaPower
,
filterDegree
,
xmin
,
xmax
,
bh
)
lorpeDensityEstimator
=
ns
.
DoubleDoublePairLOrPE1D
(
lorpeKernel
,
weightedSample
)
# Select a reasonable bandwidth
acc
=
ns
.
DoubleWeightedSampleAccumulator
()
acc
.
accumulate
(
weightedSample
)
robustScale
=
acc
.
sigmaRange
()
wsum
=
acc
.
sumOfWeights
()
wsumsq
=
acc
.
sumOfSquaredWeights
()
effSampleSize
=
wsum
*
wsum
/
wsumsq
print
(
"Kish's effective sample size is {:.5g}"
.
format
(
effSampleSize
))
bw
=
ns
.
approxAmisePluginBwGauss
(
filterDegree
,
effSampleSize
,
robustScale
)
if
symbetaPower
>=
0
:
bw
*=
ns
.
approxSymbetaBandwidthRatio
(
symbetaPower
,
filterDegree
)
# Normalize the estimate
if
normalize
:
integral
=
lorpeDensityEstimator
.
densityIntegral
(
nIntegSplits
,
nIntegPoints
,
bw
)
print
(
"Density estimate integral before normalization is"
,
integral
)
currentNorm
=
lorpeDensityEstimator
.
normFactor
()
lorpeDensityEstimator
.
setNormFactor
(
currentNorm
/
integral
)
# Grid for displaying the results
coords
=
np
.
linspace
(
xmin
,
xmax
,
nDiscrete
+
1
)
# Density estimate by LOrPE
yunbinned
=
np
.
array
([
lorpeDensityEstimator
.
density
(
x
,
bw
)
for
x
in
coords
])
# Determine the ISE
ise
=
lorpeDensityEstimator
.
integratedSquaredError
(
weightDistro
,
nIntegSplits
,
nIntegPoints
,
bw
)
print
(
"LOrPE filter degree is {:.1f}, bandwidth is {:.4f}, ISE = {:.4g}"
.
format
(
filterDegree
,
bw
,
ise
))
# Plot the sample histogram
xh
,
yh
=
ns
.
histoOutline1D
(
h1
)
plt
.
plot
(
xh
,
yh
,
'k'
)
# Overlay the original density. Scale all densities so that
# their plots are compatible with the histogram height.
area
=
h1
.
integral
()
xdens
,
ydens
=
ns
.
scanDensity1D
(
weightDistro
,
xmin
,
xmax
,
nDiscrete
+
1
)
plt
.
plot
(
xdens
,
ydens
*
area
,
'g'
,
label
=
'True density'
)
# Overlay the unbinned LOrPE result
plt
.
plot
(
coords
,
yunbinned
*
area
,
'r'
,
label
=
'LOrPE estimate'
)
# Add labels and comments
plt
.
xlabel
(
'X'
)
plt
.
ylabel
(
'Sample weight / Density estimate'
)
plt
.
title
(
'Weighted Unbinned Density Estimation by LOrPE'
)
plt
.
legend
()
# Display the window
plt
.
show
()
File Metadata
Details
Attached
Mime Type
text/x-python
Expires
Tue, Sep 30, 4:45 AM (16 h, 4 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
6564812
Default Alt Text
lorpe_1d_weighted.py (5 KB)
Attached To
Mode
rNPSTATSVN npstatsvn
Attached
Detach File
Event Timeline
Log In to Comment