Index: trunk/examples/Python/variable_bandwidth_kde_1d.py =================================================================== --- trunk/examples/Python/variable_bandwidth_kde_1d.py (revision 0) +++ trunk/examples/Python/variable_bandwidth_kde_1d.py (revision 617) @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +""" +This example illistrates how to perform variable-bandwidth univariate +kernel density estimation (KDE) with NPStat +""" + +__author__="Igor Volobouev (i.volobouev@ttu.edu)" +__version__="1.0" +__date__ ="Sep 17 2019" + +# Perform necessary imports +import npstat as ns +from npstat_utils import HistoND +import matplotlib.pyplot as plt + +# Some parameters for the script +sampleSize = 500 # Number of points to generate (sample size) +xmin = -3.5 # Left edge of the sample histogram +xmax = 3.5 # Right edge of the sample histogram +numBins = 70 # Number of bins for the sample histogram +nDiscrete = 1000 # Number of cells for sample discretization +symbetaPower = -1 # Power parameter of the symmetric beta kernel. + # Specify -1 to use a Gaussian kernel instead. +bwFactor = 0.5 # Pilot estimate bandwidth will be set to the product + # of the plug-in bandwidth and this factor. The choice + # of the factor is empirical here. + +# We will use a 1-d Gassian mixture to generate the random sample. +# See the "brute_force_kde_1d.py" script for more comments on this. +component1 = ns.GaussianMixtureEntry(3.0, -1.0, 0.5) +component2 = ns.GaussianMixtureEntry(7.0, 1.0, 0.5) +mixture = ns.GaussianMixture1D(0.0, 1.0, (component1, component2)) + +# Generate a random sample according to this density +rng = ns.MersenneTwister() +sample = mixture.generate(rng, sampleSize) + +# Histogram for displaying the sample +h1 = HistoND("int", ns.HistoAxis(numBins, xmin, xmax, "X"), + "Sample Histogram", "Counts") +any(h1.fill(x, 1) for x in sample) + +# Sample discretization for KDE. For smoothing purposes, the bin width +# should be substantially smaller than any physical scale of interest. +h2 = HistoND("int", ns.HistoAxis(nDiscrete, 2*xmin, 2*xmax, "X"), + "KDE Discretization Histogram", "Counts") +any(h2.fill(x, 1) for x in sample) + +# Perform variable-bandwidth KDE. "simpleVariableBandwidthSmooth1D" is the +# simplest function to use. It is appropriate when the histogrammed sample +# consists of unweighted points. Function "weightedVariableBandwidthSmooth1D" +# can be used if the points are weighted (it requires the user to specify +# the effective sample size). Function "variableBandwidthSmooth1D" is the +# most flexible one but it requires the user to come up with his/her own +# pilot bandwidth. Note that Python signatures of these functions are +# generated by the "npstat/wrap/variableBandwidthSmooth1DWrap.hh" header +# rather than "npstat/stat/variableBandwidthSmooth1D.hh". +ykde, bw = ns.simpleVariableBandwidthSmooth1D(h2, symbetaPower, bwFactor) +print("Pilot estimate bandwidth is", bw) + +# Create Matplotlib plot window +fig = plt.figure() +ax = fig.add_subplot(111) + +# Plot the sample histogram +xh, yh = ns.histoOutline1D(h1) +ax.plot(xh, yh, 'k') + +# Overlay the original density. Scale all densities so that their +# plots are compatible with the histogram height. +verticalScale = h1.binVolume()*sampleSize +xdens, ydens = ns.scanDensity1D(mixture, xmin, xmax, nDiscrete+1) +ax.plot(xdens, ydens*verticalScale, 'g', label='True density') + +# Overlay the LOrPE result. Scale it to the area of the histogram. +ax.plot(h2.axis(0).binCenters(), ykde*verticalScale, 'r', label='KDE') + +# Set the x range of the plot to that of the sample histogram +ax.set_xlim(h1.axis(0).min(), h1.axis(0).max()) + +# Add labels and comments +plt.xlabel('X') +plt.ylabel('Density') +plt.title('Variable-Bandwidth KDE') +plt.legend(loc=2) + +# Display the window +plt.show() Property changes on: trunk/examples/Python/variable_bandwidth_kde_1d.py ___________________________________________________________________ Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: trunk/examples/Python/lorpe_1d.py =================================================================== --- trunk/examples/Python/lorpe_1d.py (revision 616) +++ trunk/examples/Python/lorpe_1d.py (revision 617) @@ -1,93 +1,93 @@ #!/usr/bin/env python3 """ This example illistrates how to perform univariate density estimation utilizing local orthogonal polynomial expansion (LOrPE). For details of the method, see the article by D.P. Amali Dassanayake, Igor Volobouev, and A. Alexandre Trindade, "Local orthogonal polynomial expansion for density estimation", Journal of Nonparametric Statistics, 29:4, 806-830 (2017). https://doi.org/10.1080/10485252.2017.1371715 (also arXiv:1505.00275). """ __author__="Igor Volobouev (i.volobouev@ttu.edu)" __version__="1.0" __date__ ="Sep 16 2019" # Perform necessary imports import npstat as ns from npstat_utils import HistoND import matplotlib.pyplot as plt # Some parameters for the script sampleSize = 10000 # Number of points to generate (sample size) xmin = 0.0 # Left edge of the sample histogram xmax = 5.0 # Right edge of the sample histogram numBins = 100 # Number of bins for the sample histogram nDiscrete = 1000 # Number of cells for sample discretization symbetaPower = -1 # Power parameter of the symmetric beta weight function # w.r.t. to which the orthogonal polynomial system # will be generated. Use -1 to use a Gaussian instead. filterDegree = 2 # Polynomial degree for the local expansion bwFactor = 1.0 # LOrPE bandwidth will be set to the product of the # plug-in bandwidth and this factor # Here, we will use the exponential distribution truncated to [xmin, xmax] # in order to generate the random sample d1 = ns.TruncatedDistribution1D(ns.Exponential1D(0.0, 1.0), xmin, xmax) # Generate a random sample according to this density rng = ns.MersenneTwister() sample = d1.generate(rng, sampleSize) # Histogram for displaying the sample h1 = HistoND("int", ns.HistoAxis(numBins, xmin, xmax, "X"), "Sample Histogram", "Counts") any(h1.fill(x, 1) for x in sample) # Sample discretization for LOrPE. For smoothing purposes, the bin width -# should be substatially smaller than any physical scale of interest. +# should be substantially smaller than any physical scale of interest. h2 = HistoND("int", ns.HistoAxis(nDiscrete, xmin, xmax, "X"), "LOrPE Discretization Histogram", "Counts") any(h2.fill(x, 1) for x in sample) # LOrPE boundary handling method. Simple methods (which require no additional # tuning parameters) are: # # "BM_TRUNCATE" -- Just truncate the weight function at the boundary, # as described in the article by Dassanayake et. al. # "BM_STRETCH" -- Stretch the weight function by a factor that smoothly # changes from 1 to 2 as the boundary is approached. # "BM_FOLD" -- Reflect (fold) the weight function at the boundary and # add to the non-reflected part. # "BM_CONSTSQ" -- Stretch the weight function near the boundary in such # a way that the integral of the weight function squared # inside the bounded interval is preserved. # "BM_FOLDSQ" -- Both fold the weight function at the boundary and # preserve the integral of the weight function squared. # "BM_CONSTVAR" -- Stretch the weight function near the boundary so that # the variance of a variable distributed acording to # this weight function remains constant. # "BM_FOLDVAR" -- Both fold the weight function at the boundary and # stretch so that the variance remains constant. # # It is not obvious apriori which boundary handling method will perform # best for any particular sample, so it might be useful to try several. bh = ns.BoundaryHandling("BM_FOLD") # Perform density estimation ylorpe, bw = ns.lorpeSmooth1D(h2, symbetaPower, filterDegree, bh, bwFactor) print("LOrPE bandwidth is", bw) # Plot the sample histogram xh, yh = ns.histoOutline1D(h1) plt.plot(xh, yh, 'k') # Overlay the LOrPE result. Scale it to the area of the histogram. plt.plot(h2.axis(0).binCenters(), ylorpe*h1.integral(), 'r') # Add labels and comments plt.xlabel('X') plt.ylabel('Density') plt.title('LOrPE Density Estimate') # Display the window plt.show()