Page MenuHomeHEPForge

lorpe_1d_weighted.py
No OneTemporary

Size
5 KB
Referenced Files
None
Subscribers
None

lorpe_1d_weighted.py

#!/usr/bin/env python3
"""
This example illustrates how to perform LOrPE on weighted unbinned samples.
The cross-validation is not yet implemented for this type of density
estimation, so this program simply shows how to perform LOrPE with
a bandwidth chosen by the plug-in method.
"""
__author__="Igor Volobouev (i.volobouev@ttu.edu)"
__version__="1.0"
__date__ ="June 13 2022"
# Perform necessary imports
import npstat as ns
import numpy as np
from npstat_utils import HistoND
from npstat_density_estimation import histoPluginBandwidth1D
import matplotlib.pyplot as plt
# Some parameters for the script
sampleSize = 500 # Number of points to generate (unweighted sample size)
xmin = 0.0 # Left edge of the sample histogram
xmax = 1.0 # Right edge of the sample histogram
numBins = 50 # Number of bins for the sample histogram
symbetaPower = 4 # Power parameter of the symmetric beta weight function
# w.r.t. which the orthogonal polynomial system will
# be generated. Use -1 to employ a Gaussian instead.
filterDegree = 1.5 # Polynomial degree for the local expansion. This
# degree is just a guess here. Must be non-negative
# and does not have to be an integer. Polynomial degree
# of 0 corresponds to KDE with boundary kernels.
nDiscrete = 1000 # Number of intervals for displaying the results.
normalize = True # Do we want to normalize the density estimate?
# Here, this only ensures that the integral is 1
# and does not check for negative densities.
nIntegSplits = 50 # Number of intervals to use for various quadratures.
nIntegPoints = 4 # Number of Gauss-Legendre integration points to use
# on each interval for various quadratures.
# Here, we will use a uniform distribution on [xmin, xmax]
# in order to generate the original, unweighted random sample
d1 = ns.Uniform1D(xmin, xmax - xmin)
# Generate a random sample according to this density
rng = ns.MersenneTwister()
sample = d1.generate(rng, sampleSize)
# Assign a weight to each sample point. For the weight, we will
# use a mixture of a uniform and a Gaussian distribution.
truncatedGauss = ns.TruncatedDistribution1D(ns.Gauss1D(0.2, 0.1), xmin, xmax)
weightDistro = ns.DistributionMix1D()
weightDistro.add(truncatedGauss, 0.7)
weightDistro.add(d1, 0.3)
weights = [weightDistro.density(x) for x in sample]
# Create an object which combines the coordinates and the weights
weightedSample = list(zip(sample, weights))
# Histogram for displaying the weighted sample. Note that
# the type of bins is double rather than integer.
h1 = HistoND("double", ns.HistoAxis(numBins, xmin, xmax, "X"),
"Sample Histogram", "Weight")
h1.fill(weightedSample)
# Choose boundary handling method. Only two methods are currently
# supported for unbinned LOrPE:
#
# "BM_TRUNCATE" -- Just truncate the weight function at the boundary.
#
# "BM_FOLD" -- Reflect (fold) the weight function at the boundary and
# add to the non-reflected part.
#
# It is not obvious a priori which boundary handling method will perform
# best for any particular case, so it might be useful to try different ones.
bh = ns.BoundaryHandling("BM_TRUNCATE")
# Create the object that will be used to estimate the density
lorpeKernel = ns.LOrPE1DSymbetaKernel(symbetaPower, filterDegree, xmin, xmax, bh)
lorpeDensityEstimator = ns.DoubleDoublePairLOrPE1D(lorpeKernel, weightedSample)
# Select a reasonable bandwidth
acc = ns.DoubleWeightedSampleAccumulator()
acc.accumulate(weightedSample)
robustScale = acc.sigmaRange()
wsum = acc.sumOfWeights()
wsumsq = acc.sumOfSquaredWeights()
effSampleSize = wsum*wsum/wsumsq
print("Kish's effective sample size is {:.5g}".format(effSampleSize))
bw = ns.approxAmisePluginBwGauss(filterDegree, effSampleSize, robustScale)
if symbetaPower >= 0:
bw *= ns.approxSymbetaBandwidthRatio(symbetaPower, filterDegree)
# Normalize the estimate
if normalize:
integral = lorpeDensityEstimator.densityIntegral(
nIntegSplits, nIntegPoints, bw)
print("Density estimate integral before normalization is", integral)
currentNorm = lorpeDensityEstimator.normFactor()
lorpeDensityEstimator.setNormFactor(currentNorm/integral)
# Grid for displaying the results
coords = np.linspace(xmin, xmax, nDiscrete+1)
# Density estimate by LOrPE
yunbinned = np.array([lorpeDensityEstimator.density(x, bw) for x in coords])
# Determine the ISE
ise = lorpeDensityEstimator.integratedSquaredError(
weightDistro, nIntegSplits, nIntegPoints, bw)
print("LOrPE filter degree is {:.1f}, bandwidth is {:.4f}, ISE = {:.4g}".format(
filterDegree, bw, ise))
# Plot the sample histogram
xh, yh = ns.histoOutline1D(h1)
plt.plot(xh, yh, 'k')
# Overlay the original density. Scale all densities so that
# their plots are compatible with the histogram height.
area = h1.integral()
xdens, ydens = ns.scanDensity1D(weightDistro, xmin, xmax, nDiscrete+1)
plt.plot(xdens, ydens*area, 'g', label='True density')
# Overlay the unbinned LOrPE result
plt.plot(coords, yunbinned*area, 'r', label='LOrPE estimate')
# Add labels and comments
plt.xlabel('X')
plt.ylabel('Sample weight / Density estimate')
plt.title('Weighted Unbinned Density Estimation by LOrPE')
plt.legend()
# Display the window
plt.show()

File Metadata

Mime Type
text/x-python
Expires
Tue, Sep 30, 4:45 AM (1 d, 1 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
6564812
Default Alt Text
lorpe_1d_weighted.py (5 KB)

Event Timeline