lorpe_1d_weighted.py
No OneTemporary
Actions

Size

5 KB

Referenced Files

None

Subscribers

None

lorpe_1d_weighted.py
View Options

	#!/usr/bin/env python3
	"""
	This example illustrates how to perform LOrPE on weighted unbinned samples.
	The cross-validation is not yet implemented for this type of density
	estimation, so this program simply shows how to perform LOrPE with
	a bandwidth chosen by the plug-in method.
	"""

	__author__="Igor Volobouev (i.volobouev@ttu.edu)"
	__version__="1.0"
	__date__ ="June 13 2022"

	# Perform necessary imports
	import npstat as ns
	import numpy as np
	from npstat_utils import HistoND
	from npstat_density_estimation import histoPluginBandwidth1D
	import matplotlib.pyplot as plt

	# Some parameters for the script
	sampleSize = 500 # Number of points to generate (unweighted sample size)
	xmin = 0.0 # Left edge of the sample histogram
	xmax = 1.0 # Right edge of the sample histogram
	numBins = 50 # Number of bins for the sample histogram
	symbetaPower = 4 # Power parameter of the symmetric beta weight function
	# w.r.t. which the orthogonal polynomial system will
	# be generated. Use -1 to employ a Gaussian instead.
	filterDegree = 1.5 # Polynomial degree for the local expansion. This
	# degree is just a guess here. Must be non-negative
	# and does not have to be an integer. Polynomial degree
	# of 0 corresponds to KDE with boundary kernels.
	nDiscrete = 1000 # Number of intervals for displaying the results.
	normalize = True # Do we want to normalize the density estimate?
	# Here, this only ensures that the integral is 1
	# and does not check for negative densities.
	nIntegSplits = 50 # Number of intervals to use for various quadratures.
	nIntegPoints = 4 # Number of Gauss-Legendre integration points to use
	# on each interval for various quadratures.

	# Here, we will use a uniform distribution on [xmin, xmax]
	# in order to generate the original, unweighted random sample
	d1 = ns.Uniform1D(xmin, xmax - xmin)

	# Generate a random sample according to this density
	rng = ns.MersenneTwister()
	sample = d1.generate(rng, sampleSize)

	# Assign a weight to each sample point. For the weight, we will
	# use a mixture of a uniform and a Gaussian distribution.
	truncatedGauss = ns.TruncatedDistribution1D(ns.Gauss1D(0.2, 0.1), xmin, xmax)
	weightDistro = ns.DistributionMix1D()
	weightDistro.add(truncatedGauss, 0.7)
	weightDistro.add(d1, 0.3)
	weights = [weightDistro.density(x) for x in sample]

	# Create an object which combines the coordinates and the weights
	weightedSample = list(zip(sample, weights))

	# Histogram for displaying the weighted sample. Note that
	# the type of bins is double rather than integer.
	h1 = HistoND("double", ns.HistoAxis(numBins, xmin, xmax, "X"),
	"Sample Histogram", "Weight")
	h1.fill(weightedSample)

	# Choose boundary handling method. Only two methods are currently
	# supported for unbinned LOrPE:
	#
	# "BM_TRUNCATE" -- Just truncate the weight function at the boundary.
	#
	# "BM_FOLD" -- Reflect (fold) the weight function at the boundary and
	# add to the non-reflected part.
	#
	# It is not obvious a priori which boundary handling method will perform
	# best for any particular case, so it might be useful to try different ones.
	bh = ns.BoundaryHandling("BM_TRUNCATE")

	# Create the object that will be used to estimate the density
	lorpeKernel = ns.LOrPE1DSymbetaKernel(symbetaPower, filterDegree, xmin, xmax, bh)
	lorpeDensityEstimator = ns.DoubleDoublePairLOrPE1D(lorpeKernel, weightedSample)

	# Select a reasonable bandwidth
	acc = ns.DoubleWeightedSampleAccumulator()
	acc.accumulate(weightedSample)
	robustScale = acc.sigmaRange()
	wsum = acc.sumOfWeights()
	wsumsq = acc.sumOfSquaredWeights()
	effSampleSize = wsum*wsum/wsumsq
	print("Kish's effective sample size is {:.5g}".format(effSampleSize))
	bw = ns.approxAmisePluginBwGauss(filterDegree, effSampleSize, robustScale)
	if symbetaPower >= 0:
	bw *= ns.approxSymbetaBandwidthRatio(symbetaPower, filterDegree)

	# Normalize the estimate
	if normalize:
	integral = lorpeDensityEstimator.densityIntegral(
	nIntegSplits, nIntegPoints, bw)
	print("Density estimate integral before normalization is", integral)
	currentNorm = lorpeDensityEstimator.normFactor()
	lorpeDensityEstimator.setNormFactor(currentNorm/integral)

	# Grid for displaying the results
	coords = np.linspace(xmin, xmax, nDiscrete+1)

	# Density estimate by LOrPE
	yunbinned = np.array([lorpeDensityEstimator.density(x, bw) for x in coords])

	# Determine the ISE
	ise = lorpeDensityEstimator.integratedSquaredError(
	weightDistro, nIntegSplits, nIntegPoints, bw)

	print("LOrPE filter degree is {:.1f}, bandwidth is {:.4f}, ISE = {:.4g}".format(
	filterDegree, bw, ise))

	# Plot the sample histogram
	xh, yh = ns.histoOutline1D(h1)
	plt.plot(xh, yh, 'k')

	# Overlay the original density. Scale all densities so that
	# their plots are compatible with the histogram height.
	area = h1.integral()
	xdens, ydens = ns.scanDensity1D(weightDistro, xmin, xmax, nDiscrete+1)
	plt.plot(xdens, ydens*area, 'g', label='True density')

	# Overlay the unbinned LOrPE result
	plt.plot(coords, yunbinned*area, 'r', label='LOrPE estimate')

	# Add labels and comments
	plt.xlabel('X')
	plt.ylabel('Sample weight / Density estimate')
	plt.title('Weighted Unbinned Density Estimation by LOrPE')
	plt.legend()

	# Display the window
	plt.show()

File Metadata

Mime Type: text/x-python
Expires: Tue, Sep 30, 4:45 AM (16 h, 4 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 6564812
Default Alt Text: lorpe_1d_weighted.py (5 KB)

lorpe_1d_weighted.pyNo OneTemporaryActions

lorpe_1d_weighted.pyView Options

File Metadata

Event Timeline

lorpe_1d_weighted.py
No OneTemporary
Actions

lorpe_1d_weighted.py
View Options