Page Menu
Home
HEPForge
Search
Configure Global Search
Log In
Files
F11221893
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
33 KB
Subscribers
None
View Options
Index: trunk/npstat/swig/npstat_density_estimation_1d.py
===================================================================
--- trunk/npstat/swig/npstat_density_estimation_1d.py (revision 628)
+++ trunk/npstat/swig/npstat_density_estimation_1d.py (revision 629)
@@ -1,458 +1,468 @@
"""
This module provides a number of useful wrapper classes and functions
related to 1-d density estimation
"""
__author__ = "Igor Volobouev (i.volobouev@ttu.edu)"
__version__ = "1.1"
__date__ = "Sep 20 2019"
import npstat
import numpy as np
from npstat_utils import histoBinType
def histoPluginBandwidth1D(h, sampleSize, deg, symbetaPower):
if h.dim() != 1:
raise ValueError("Expected 1-d histogram as an argument")
assert deg >= 0.0, "Filter degree can not be negative"
if sampleSize <= 0.0:
sampleSize = h.nFillsInRange()
qvalues = (0.15865525393145705, 0.84134474606854295)
quantiles = npstat.histoQuantiles(h, 0, qvalues)
sigmaEstimate = (quantiles[1] - quantiles[0])/2.0
pluginBw = npstat.approxAmisePluginBwGauss(deg, sampleSize, sigmaEstimate)
if symbetaPower >= 0.0:
pluginBw *= npstat.approxSymbetaBandwidthRatio(symbetaPower, deg);
return pluginBw
def BandwidthCVLeastSquares1D(typename, *args):
"""
Least squares cross-validation helper class
"""
funcs = {'unsigned char' : npstat.UCharBandwidthCVLeastSquares1D,
'int' : npstat.IntBandwidthCVLeastSquares1D,
'long long' : npstat.LLongBandwidthCVLeastSquares1D,
'float' : npstat.FloatBandwidthCVLeastSquares1D,
'double' : npstat.DoubleBandwidthCVLeastSquares1D}
return funcs[typename](*args)
def BandwidthCVPseudoLogli1D(typename, *args):
"""
Pseudo-likelihood cross-validation helper class
"""
funcs = {'unsigned char' : npstat.UCharBandwidthCVPseudoLogli1D,
'int' : npstat.IntBandwidthCVPseudoLogli1D,
'long long' : npstat.LLongBandwidthCVPseudoLogli1D,
'float' : npstat.FloatBandwidthCVPseudoLogli1D,
'double' : npstat.DoubleBandwidthCVPseudoLogli1D}
return funcs[typename](*args)
class _BinnedCVCalc1D:
def __init__(self, effectiveSampleSize, weightedSample):
self._effectiveSampleSize = effectiveSampleSize
self._weightedSample = weightedSample
def __call__(self, h, result, filter):
calc = self._makeCalc(h)
if self._weightedSample is not None:
return calc.cvWeightedSample(h, self._weightedSample, result, filter)
elif self._effectiveSampleSize is not None:
return calc.cvWeighted(h, self._effectiveSampleSize, result, filter)
else:
return calc.cv(h, result, filter)
def _makeCalc(self, h):
raise NotImplementedError
class BinnedRLCVCalc1D(_BinnedCVCalc1D):
def __init__(self, regParameter, effectiveSampleSize=None, weightedSample=None):
assert regParameter >= 0.0, "RLCV regularisation parameter can not be negative"
_BinnedCVCalc1D.__init__(self, effectiveSampleSize, weightedSample)
self._regParameter = regParameter
def _makeCalc(self, h):
return BandwidthCVPseudoLogli1D(histoBinType(h), self._regParameter)
class BinnedLSCVCalc1D(_BinnedCVCalc1D):
def __init__(self, effectiveSampleSize=None, weightedSample=None):
_BinnedCVCalc1D.__init__(self, effectiveSampleSize, weightedSample)
def _makeCalc(self, h):
return BandwidthCVLeastSquares1D(histoBinType(h))
class _DiscreteDEBase1D:
"""
Base class for 1-d density estimation on regular grids
"""
def __init__(self, prototypeHisto, i_allowNegativeDensities):
if prototypeHisto.dim() != 1:
raise ValueError("Expected 1-d histogram as a prototype")
if not prototypeHisto.isUniformlyBinned():
raise ValueError("Expected uniformy binned histogram as a prototype")
self._allowNegative = i_allowNegativeDensities
axis = prototypeHisto.axis(0)
self._nBins = axis.nBins()
self._binWidth = axis.binWidth(0)
def expectedNBins(self):
return self._nBins
def expectedBinWidth(self):
return self._binWidth
def allowNegativeDensities(self, b):
self._allowNegative = b
def areNegativeDensitiesAllowed(self):
return self._allowNegative
def isCompatible(self, h):
if h.dim() != 1:
return False
if not h.isUniformlyBinned():
return False
axis = h.axis(0)
return self._nBins == axis.nBins() and self._binWidth == axis.binWidth(0)
def validateData(self, h):
if not self.isCompatible(h):
raise ValueError("Incompatible input histogram")
def setData(self, h):
raise NotImplementedError
def isDataSet(self):
raise NotImplementedError
def estimateDataDensity(self, filterDegree, bandwidth=-1.0,
symbetaPower=-1, rememberFilter=True):
raise NotImplementedError
def estimateDensity(self, h, filterDegree, bandwidth=-1.0,
symbetaPower=-1, rememberFilter=True):
assert filterDegree >= 0.0, "Filter degree can not be negative"
self.setData(h)
return self.estimateDataDensity(filterDegree, bandwidth,
symbetaPower, rememberFilter)
def addFilter(self, filterDegree, bandwidth=-1.0, symbetaPower=-1):
raise NotImplementedError
def nFiltersAdded(self):
raise NotImplementedError
def clearFilters(self):
raise NotImplementedError
def bestFilterByCV(self, h, cvCalc):
raise NotImplementedError
class FFTKDE1D(_DiscreteDEBase1D):
"""
Fast 1-d KDE on regular grids utilizing FFT for convolutions
"""
def __init__(self, prototypeHisto, allowNegativeResults=True,
useMirroring=True):
_DiscreteDEBase1D.__init__(self, prototypeHisto, allowNegativeResults)
self._mirror = useMirroring
self._engine = npstat.ConvolutionEngine1D(2*self._nBins)
self._kernelTable = dict()
self._kernelProperties = list()
self._directId = 2147483647
self._directKernelKey = (0.0, -1.0, 0.0)
# Add more constructor parameters if you want to
# change the settings used below
self._bh = npstat.BoundaryHandling("BM_TRUNCATE")
self._gaussKernelNSigmas = 12.0
def _makeKernel(self, filterDegree, bandwidth, symbetaPower):
assert filterDegree >= 0.0, "Filter degree can not be negative"
assert bandwidth > 0.0, "Kernel bandwidth must be positive"
if symbetaPower < 0.0:
distro = npstat.TruncatedGauss1D(0.0, bandwidth, self._gaussKernelNSigmas)
else:
distro = npstat.SymmetricBeta1D(0.0, bandwidth, symbetaPower)
builder = npstat.getBoundaryFilter1DBuilder(self._bh, distro, self._binWidth)
taper = npstat.contDegreeTaper(filterDegree)
kernelCenter = self._nBins - 1
pf = builder.makeFilter(taper, len(taper)-1, kernelCenter, 2*self._nBins)
- selfContrib = pf.valueAt(pf.peakPosition())
- return (npstat.polyFilter1DToNumpy(pf, kernelCenter, 2*self._nBins), selfContrib)
+ selfContribution = pf.valueAt(pf.peakPosition())
+ return (npstat.polyFilter1DToNumpy(pf, kernelCenter, 2*self._nBins),
+ selfContribution)
def addFilter(self, filterDegree, bandwidth, symbetaPower):
"""
Add a kernel for future use in cross-validation
"""
assert filterDegree >= 0.0, "Filter degree can not be negative"
key = (filterDegree, bandwidth, symbetaPower)
if key not in self._kernelTable:
id = len(self._kernelProperties)
assert id < self._directId, "Too many kernels"
k, sc = self._makeKernel(filterDegree, bandwidth, symbetaPower)
self._engine.depositFilter(id, k, self._nBins+1)
self._kernelTable[key] = id
self._kernelProperties.append((filterDegree, bandwidth, symbetaPower, sc))
return self._kernelTable[key]
def nFiltersAdded(self):
sz = len(self._kernelProperties)
assert sz == self._engine.filterBankSize(), "Filter counting failure"
return sz
def clearFilters(self):
self._kernelTable = dict()
self._kernelProperties = list()
self._engine.clearFilterBank();
self._directKernelKey = (0.0, -1.0, 0.0)
def _getKernelId(self, filterDegree, bandwidth, symbetaPower, addKern):
assert filterDegree >= 0.0, "Filter degree can not be negative"
assert bandwidth > 0.0, "Kernel bandwidth must be positive"
if addKern:
return self.addFilter(filterDegree, bandwidth, symbetaPower)
key = (filterDegree, bandwidth, symbetaPower)
if key in self._kernelTable:
return self._kernelTable[key]
if key != self._directKernelKey:
k, sc = self._makeKernel(filterDegree, bandwidth, symbetaPower)
self._engine.depositFilter(self._directId, k, self._nBins+1)
self._directKernelKey = key
return self._directId
def setData(self, h):
self.validateData(h)
self._engine.setFilter(npstat.doubleHistoBins1D(h, True, self._mirror))
def isDataSet(self):
return self._engine.isFilterSet()
def _buildResult(self, id, makeNonNegative):
arr = self._engine.convolveFilterAndDeposit(id, makeNonNegative)
result = np.resize(arr, self._nBins)
# The result we have now is normalized to the histogram area
# in case of mirroring, and potentially has some leakage
# if there is no mirroring. Renormalize everything to 1.
area = np.sum(result)*self._binWidth
assert area > 0.0, "Result can not be normalized. "\
"Was the input histogram filled?"
result /= area
return result
def estimateDataDensity(self, filterDegree, bandwidth, symbetaPower, addKern=True):
assert self._engine.isFilterSet(), "Data is not set"
assert filterDegree >= 0.0, "Filter degree can not be negative"
id = self._getKernelId(filterDegree, bandwidth, symbetaPower, addKern)
makeNonNegative = (filterDegree == 0.0 or (not self._allowNegative))
return self._buildResult(id, makeNonNegative)
def bestFilterByCV(self, h, cvCalc):
"""
Choose the best kernel among kernels added previsously utilizing
cross-validation
"""
assert self.nFiltersAdded() > 0, "No kernels added"
self.setData(h)
bestCVValue = -1.7e308
bestId = -1
bestResult = None
for id, properties in enumerate(self._kernelProperties):
filterDegree, bandwidth, symbetaPower, sc = properties
makeNonNegative = (filterDegree == 0.0 or (not self._allowNegative))
result = self._buildResult(id, makeNonNegative)
dummyFilter = npstat.ContribConstant1D(sc, h.nBins())
cvValue = cvCalc(h, result, dummyFilter)
if cvValue > bestCVValue:
bestCVValue = cvValue
bestId = id
bestResult = result
assert bestId >= 0, "Kernel optimization scan failed"
filterDegree, bandwidth, symbetaPower, sc = self._kernelProperties[bestId]
key = (filterDegree, bandwidth, symbetaPower)
return (bestCVValue, key, bestResult)
class LOrPE1D(_DiscreteDEBase1D):
"""
Density estimation by LOrPE on regular 1-d grids
"""
def __init__(self, prototypeHisto, allowNegativeResults=True, bh = None):
_DiscreteDEBase1D.__init__(self, prototypeHisto, allowNegativeResults)
self._bh = bh
if bh is None:
self._bh = npstat.BoundaryHandling("BM_FOLD")
self._data = None
self._filterTable = dict()
def setData(self, h):
self.validateData(h)
self._data = npstat.doubleHistoBins1D(h)
def isDataSet(self):
return self._data is not None
def _makeFilter(self, filterDegree, bandwidth, symbetaPower):
assert filterDegree >= 0.0, "Filter degree can not be negative"
return npstat.symbetaLOrPEFilter1D(
symbetaPower, bandwidth, filterDegree,
self._nBins, self._binWidth, self._bh)
def estimateDataDensity(self, filterDegree, bandwidth,
symbetaPower, rememberFilter=True):
assert self.isDataSet(), "Data is not set"
assert filterDegree >= 0.0, "Filter degree can not be negative"
key = (filterDegree, bandwidth, symbetaPower)
if key in self._filterTable:
filt = self._filterTable[key]
elif rememberFilter:
filt = self.addFilter(filterDegree, bandwidth, symbetaPower)
else:
filt = self._makeFilter(filterDegree, bandwidth, symbetaPower)
makeNonNegative = (filterDegree == 0.0 or (not self._allowNegative))
result = filt.filter(self._data, makeNonNegative)
area = np.sum(result)*self._binWidth
assert area > 0.0, "Result can not be normalized. "\
"Was the input histogram filled?"
result /= area
return result
def addFilter(self, filterDegree, bandwidth, symbetaPower):
assert filterDegree >= 0.0, "Filter degree can not be negative"
key = (filterDegree, bandwidth, symbetaPower)
if key in self._filterTable:
filter = self._filterTable[key]
else:
filter = self._makeFilter(filterDegree, bandwidth, symbetaPower)
self._filterTable[key] = filter
return filter
def nFiltersAdded(self):
return len(self._filterTable)
def clearFilters(self):
self._filterTable = dict()
def bestFilterByCV(self, h, cvCalc):
assert self.nFiltersAdded() > 0, "No filters added"
self.setData(h)
bestCVValue = -1.7e308
bestKey = (0.0, -1.0, 0.0)
bestResult = None
buffer = np.empty((self._nBins,), dtype=np.double)
for key, filt in self._filterTable.items():
filterDegree, bandwidth, symbetaPower = key
makeNonNegative = (filterDegree == 0.0 or (not self._allowNegative))
filt.filterIntoBuffer(self._data, buffer, makeNonNegative)
area = np.sum(buffer)*self._binWidth
assert area > 0.0, "Result can not be normalized. "\
"Was the input histogram filled?"
buffer /= area
cvValue = cvCalc(h, buffer, filt)
if cvValue > bestCVValue:
bestCVValue = cvValue
bestKey = key
bestResult = np.copy(buffer)
assert bestResult is not None, "Filter optimization scan failed"
return (bestCVValue, bestKey, bestResult)
class DiscreteOSDE1D(_DiscreteDEBase1D):
"""
OSDE on regular 1-d grids
"""
- def __init__(self, prototypeHisto, allowNegativeResults=True, maxdeg=50):
+ def __init__(self, prototypeHisto, allowNegativeResults=True, maxdeg=50,
+ weightFunction=None):
_DiscreteDEBase1D.__init__(self, prototypeHisto, allowNegativeResults)
assert maxdeg >= 0, "Filter degree can not be negative"
self._data = None
self._filterTable = dict()
- self._maxdeg = max(self._nBins-1, maxdeg)
- self._poly = npstat.OrthoPoly1D(self._maxdeg, np.ones(self._maxdeg+1), self._binWidth)
+ self._maxdeg = min(self._nBins-1, maxdeg)
+ wF = weightFunction
+ if wF is None:
+ wF = np.ones(self._nBins)
+ else:
+ assert len(wF) == self._nBins, "Incompatible number of weights"
+ self._poly = npstat.OrthoPoly1D(self._maxdeg, wF, self._binWidth)
+
+ def maxDegree(self):
+ return self._maxdeg
def setData(self, h):
self.validateData(h)
self._data = np.empty((self._maxdeg+1,), dtype=np.double)
self._poly.calculateCoeffs(npstat.doubleHistoBins1D(h), self._data)
def isDataSet(self):
return self._data is not None
def _makeFilter(self, filterDegree):
assert filterDegree >= 0.0, "Filter degree can not be negative"
taper = npstat.contDegreeTaper(filterDegree)
if len(taper) > self._maxdeg+1:
raise ValueError("Filter degree is too large")
coeffs = np.array(taper, dtype=np.double)
response = np.empty((self._nBins,), dtype=np.double)
self._poly.globalFilterDiag(coeffs, response)
contrib = npstat.ContribVector1D(response)
return (coeffs, contrib)
def estimateDataDensity(self, filterDegree, rememberFilter=True):
assert self.isDataSet(), "Data is not set"
assert filterDegree >= 0.0, "Filter degree can not be negative"
key = filterDegree
if key in self._filterTable:
taper, contrib = self._filterTable[key]
elif rememberFilter:
taper, contrib = self.addFilter(filterDegree)
else:
taper, contrib = self._makeFilter(filterDegree)
coeffs = np.array([a*b for a,b in zip(taper,self._data)], dtype=np.double)
result = np.empty((self._nBins,), dtype=np.double)
self._poly.allSeries(coeffs, result, not self._allowNegative)
area = np.sum(result)*self._binWidth
assert area > 0.0, "Result can not be normalized. "\
"Was the input histogram filled?"
result /= area
return result
def estimateDensity(self, h, filterDegree, rememberFilter=True):
assert filterDegree >= 0.0, "Filter degree can not be negative"
self.setData(h)
return self.estimateDataDensity(filterDegree, rememberFilter)
def addFilter(self, filterDegree):
assert filterDegree >= 0.0, "Filter degree can not be negative"
key = filterDegree
if key in self._filterTable:
filter = self._filterTable[key]
else:
filter = self._makeFilter(filterDegree)
self._filterTable[key] = filter
return filter
def nFiltersAdded(self):
return len(self._filterTable)
def clearFilters(self):
self._filterTable = dict()
def bestFilterByCV(self, h, cvCalc):
assert self.nFiltersAdded() > 0, "No filters added"
self.setData(h)
bestCVValue = -1.7e308
bestKey = -1.0
bestResult = None
buffer = np.empty((self._nBins,), dtype=np.double)
for key, filt in self._filterTable.items():
taper, contrib = filt
coeffs = np.array([a*b for a,b in zip(taper,self._data)], dtype=np.double)
self._poly.allSeries(coeffs, buffer, not self._allowNegative)
area = np.sum(buffer)*self._binWidth
assert area > 0.0, "Result can not be normalized. "\
"Was the input histogram filled?"
buffer /= area
cvValue = cvCalc(h, buffer, contrib)
if cvValue > bestCVValue:
bestCVValue = cvValue
bestKey = key
bestResult = np.copy(buffer)
assert bestResult is not None, "Filter optimization scan failed"
return (bestCVValue, bestKey, bestResult)
Index: trunk/examples/Python/lorpe_1d_cv.py
===================================================================
--- trunk/examples/Python/lorpe_1d_cv.py (revision 628)
+++ trunk/examples/Python/lorpe_1d_cv.py (revision 629)
@@ -1,120 +1,119 @@
#!/usr/bin/env python3
"""
This example illistrates how to choose LOrPE filter settings
in 1-d by cross-validation
"""
__author__="Igor Volobouev (i.volobouev@ttu.edu)"
__version__="1.0"
__date__ ="Sep 19 2019"
# Perform necessary imports
import npstat as ns
from npstat_utils import HistoND
from npstat_density_estimation_1d import LOrPE1D, FFTKDE1D, \
histoPluginBandwidth1D, BinnedLSCVCalc1D, BinnedRLCVCalc1D
import matplotlib.pyplot as plt
import numpy as np
import math
# Some parameters for the script
sampleSize = 1000 # Number of points to generate (sample size)
xmin = 0.0 # Left edge of the sample histogram
xmax = 5.0 # Right edge of the sample histogram
numBins = 100 # Number of bins for the sample histogram
nDiscrete = 500 # Number of cells for sample discretization.
# Should be large enough so that the cell width
# is substantially smaller than any physical scale.
symbetaPower = 4 # Power parameter of the symmetric beta kernel.
# Specify -1 to use a Gaussian kernel instead.
pluginDegree = 2 # Degree of polynomial used to generate a simple
# plug-in estimate for comparison with cross-validation
# Here, we will use the exponential distribution truncated to [xmin, xmax]
# in order to generate the random sample
d1 = ns.TruncatedDistribution1D(ns.Exponential1D(0.0, 1.0), xmin, xmax)
# Generate a random sample according to this density
rng = ns.MersenneTwister()
sample = d1.generate(rng, sampleSize)
# Histogram for displaying the sample
h1 = HistoND("int", ns.HistoAxis(numBins, xmin, xmax, "X"),
"Sample Histogram", "Counts")
any(h1.fill(x, 1) for x in sample)
# Histogram for sample discretization
h2 = HistoND("int", ns.HistoAxis(nDiscrete, xmin, xmax, "X"),
"LOrPE Discretization Histogram", "Counts")
any(h2.fill(x, 1) for x in sample)
# Object which will perform LOrPE
smoother = LOrPE1D(h2)
# Add a simple density estimate with a plug-in bandwidth
bw0 = histoPluginBandwidth1D(h2, sampleSize, pluginDegree, symbetaPower)
print("Plug-in bandwidth is {:.4f} for degree {}".format(bw0, pluginDegree))
yPlugin = smoother.estimateDensity(h2, pluginDegree, bw0, symbetaPower)
# Add vanilla KDE for comparison (without mirroring and high order kernels)
kde = FFTKDE1D(h2, False, False)
bwkde = histoPluginBandwidth1D(h2, sampleSize, 0, symbetaPower)
yKDE = kde.estimateDensity(h2, 0, bwkde, symbetaPower)
print("KDE bandwidth is {:.4f}".format(bwkde))
# Add a number of filters with different filter degrees and bandwidth
# settings for cross-validation calculations
for filterDegree in range(1, 7):
bw0 = histoPluginBandwidth1D(h2, sampleSize, filterDegree, symbetaPower)
print("Building degree {} filters, plug-in bandwidth is {:.4f}".format(
filterDegree, bw0))
for bw in np.logspace(math.log10(bw0/4.0), math.log10(bw0*4.0), 50):
smoother.addFilter(filterDegree, bw, symbetaPower)
# Run the least squares cross-validation (LSCV)
lscvCalc = BinnedLSCVCalc1D()
bestLSCV, paramsLSCV, yLSCV = smoother.bestFilterByCV(h2, lscvCalc)
lscvDegree, lscvBandwidth, lscvSymbetaPower = paramsLSCV
print("LSCV: bandwidth is {:.4f}, degree is {}".format(lscvBandwidth, lscvDegree))
# Run the regularized likelihood cross-validation (RLCV)
regularisationParameter = 0.5
rlcvCalc = BinnedRLCVCalc1D(regularisationParameter)
bestRLCV, paramsRLCV, yRLCV = smoother.bestFilterByCV(h2, rlcvCalc)
rlcvDegree, rlcvBandwidth, rlcvSymbetaPower = paramsRLCV
print("RLCV: bandwidth is {:.4f}, degree is {}".format(rlcvBandwidth, rlcvDegree))
# Create Matplotlib plot window
fig = plt.figure()
ax = fig.add_subplot(111)
# Plot the sample histogram
xh, yh = ns.histoOutline1D(h1)
ax.plot(xh, yh, 'k')
# Overlay the original density. Scale all densities so that their
# plots are compatible with the histogram height.
verticalScale = h1.binVolume()*sampleSize
xdens, ydens = ns.scanDensity1D(d1, xmin, xmax, nDiscrete+1)
ax.plot(xdens, ydens*verticalScale, 'g', label='True density')
# Overlay the KDE
coords = h2.axis(0).binCenters()
ax.plot(coords, yKDE*verticalScale, 'm', label="KDE")
# Overlay the LOrPE results with various bandwidth values
-bandwidths = (bw0, rlcvBandwidth, lscvBandwidth)
colors = ('c', 'r', 'b' )
labels = ('Plug-in', 'RLCV', 'LSCV' )
results = (yPlugin, yRLCV, yLSCV )
-for bw, color, l, y in zip(bandwidths, colors, labels, results):
+for color, l, y in zip(colors, labels, results):
ax.plot(coords, y*verticalScale, color, label="{} estimate".format(l))
# Add labels and comments
plt.xlabel('X')
plt.ylabel('Density estimate')
plt.title('LOrPE with Cross-Validation')
plt.legend()
# Display the window
plt.show()
Index: trunk/examples/Python/discrete_osde_1d.py
===================================================================
--- trunk/examples/Python/discrete_osde_1d.py (revision 0)
+++ trunk/examples/Python/discrete_osde_1d.py (revision 629)
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+"""
+This example illistrates how to perform univariate orthogonal series
+density estimation (OSDE) on equdistant grids using NPStat
+"""
+
+__author__="Igor Volobouev (i.volobouev@ttu.edu)"
+__version__="1.0"
+__date__ ="Sep 20 2019"
+
+# Perform necessary imports
+import npstat as ns
+from npstat_utils import HistoND
+from npstat_density_estimation_1d import DiscreteOSDE1D, \
+ BinnedLSCVCalc1D, BinnedRLCVCalc1D
+import matplotlib.pyplot as plt
+import numpy as np
+
+# Some parameters for the script
+sampleSize = 500 # Number of points to generate (sample size)
+xmin = -3.5 # Left edge of the sample histogram
+xmax = 3.5 # Right edge of the sample histogram
+numBins = 70 # Number of bins for the sample histogram
+nDiscrete = 700 # Number of cells for sample discretization
+filterDegree = 10 # Initial guess for the OSDE degree
+
+# We will use a 1-d Gassian mixture to generate the random sample.
+# See the "brute_force_kde_1d.py" script for more comments on this.
+component1 = ns.GaussianMixtureEntry(3.0, -1.0, 0.5)
+component2 = ns.GaussianMixtureEntry(7.0, 1.0, 0.5)
+mixture = ns.GaussianMixture1D(0.0, 1.0, (component1, component2))
+
+# Generate a random sample according to this density
+rng = ns.MersenneTwister()
+sample = mixture.generate(rng, sampleSize)
+
+# Histogram for displaying the sample
+h1 = HistoND("int", ns.HistoAxis(numBins, xmin, xmax, "X"),
+ "Sample Histogram", "Counts")
+any(h1.fill(x, 1) for x in sample)
+
+# Histogram for sample discretization
+h2 = HistoND("int", ns.HistoAxis(nDiscrete, xmin, xmax, "X"),
+ "OSDE Discretization Histogram", "Counts")
+any(h2.fill(x, 1) for x in sample)
+
+# Object which will perform OSDE
+smoother = DiscreteOSDE1D(h2)
+
+# Initial guesstimate
+yFirst = smoother.estimateDensity(h2, filterDegree)
+print("Initial guess OSDE degree is ", filterDegree)
+
+# Add a number of filters with different degrees for cross-validation
+# calculations. Note that degrees do not have to be integer.
+for degree in np.linspace(2.0, 40.0, 77):
+ smoother.addFilter(degree)
+
+# Run the least squares cross-validation (LSCV)
+lscvCalc = BinnedLSCVCalc1D()
+bestLSCV, lscvFilterDegree, yLSCV = smoother.bestFilterByCV(h2, lscvCalc)
+print("LSCV OSDE degree is ", lscvFilterDegree)
+
+# Run the regularized likelihood cross-validation (RLCV)
+regularisationParameter = 0.5
+rlcvCalc = BinnedRLCVCalc1D(regularisationParameter)
+bestRLCV, rlcvFilterDegree, yRLCV = smoother.bestFilterByCV(h2, rlcvCalc)
+print("RLCV OSDE degree is ", rlcvFilterDegree)
+
+# We are done with density estimation at this point. The rest of the script
+# is just plotting results. To begin with, create a Matplotlib plot window.
+fig = plt.figure()
+ax = fig.add_subplot(111)
+
+# Plot the sample histogram
+xh, yh = ns.histoOutline1D(h1)
+ax.plot(xh, yh, 'k')
+
+# Overlay the original density. Scale all densities so that their
+# plots are compatible with the histogram height.
+verticalScale = h1.binVolume()*sampleSize
+xdens, ydens = ns.scanDensity1D(mixture, xmin, xmax, nDiscrete+1)
+ax.plot(xdens, ydens*verticalScale, 'g', label='True density')
+
+# Overlay the OSDE results with various degrees
+xcoords = h2.axis(0).binCenters()
+colors = ('c', 'r', 'b' )
+labels = ('Guess', 'RLCV', 'LSCV' )
+results = (yFirst, yRLCV, yLSCV )
+for color, l, y in zip(colors, labels, results):
+ ax.plot(xcoords, y*verticalScale, color, label="{} estimate".format(l))
+
+# Set the x range of the plot to that of the sample histogram
+ax.set_xlim(h1.axis(0).min(), h1.axis(0).max())
+
+# Add labels and comments
+plt.xlabel('X')
+plt.ylabel('Density estimate')
+plt.title('Legendre Basis OSDE')
+plt.legend(loc=2)
+
+# Display the window
+plt.show()
Property changes on: trunk/examples/Python/discrete_osde_1d.py
___________________________________________________________________
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Index: trunk/examples/Python/kde_1d_by_fft.py
===================================================================
--- trunk/examples/Python/kde_1d_by_fft.py (revision 628)
+++ trunk/examples/Python/kde_1d_by_fft.py (revision 629)
@@ -1,120 +1,119 @@
#!/usr/bin/env python3
"""
This example illistrates how to perform constant-bandwidth univariate
kernel density estimation (KDE) utilizing fast Fourier transforms (FFT)
with the help of the NPStat package.
"""
__author__="Igor Volobouev (i.volobouev@ttu.edu)"
__version__="1.0"
__date__ ="Sep 19 2019"
# Perform necessary imports
import npstat as ns
from npstat_utils import HistoND
from npstat_density_estimation_1d import FFTKDE1D, histoPluginBandwidth1D, \
BinnedLSCVCalc1D, BinnedRLCVCalc1D
import matplotlib.pyplot as plt
import numpy as np
import math
# Some parameters for the script
sampleSize = 500 # Number of points to generate (sample size)
xmin = -3.5 # Left edge of the sample histogram
xmax = 3.5 # Right edge of the sample histogram
numBins = 70 # Number of bins for the sample histogram
nDiscrete = 1024 # Number of cells for sample discretization.
# Should be a power of 2 (this results in the
# most efficient application of FFT).
symbetaPower = -1 # Power parameter of the symmetric beta kernel.
# Specify -1 to use a Gaussian kernel instead.
filterDegree = 2 # Degree of polynomial used to generate high-order kernels.
# In general, even numbers make the most sense here.
# Kernel order will then be filterDegree + 2. The
# kernel will be a bona-fide density (i.e., non-negative
# everywhere) only if filterDegree is 0.
# We will use a 1-d Gassian mixture to generate the random sample.
# See the "brute_force_kde_1d.py" script for more comments on this.
component1 = ns.GaussianMixtureEntry(3.0, -1.0, 0.5)
component2 = ns.GaussianMixtureEntry(7.0, 1.0, 0.5)
mixture = ns.GaussianMixture1D(0.0, 1.0, (component1, component2))
# Generate a random sample according to this density
rng = ns.MersenneTwister()
sample = mixture.generate(rng, sampleSize)
# Histogram for displaying the sample
h1 = HistoND("int", ns.HistoAxis(numBins, xmin, xmax, "X"),
"Sample Histogram", "Counts")
any(h1.fill(x, 1) for x in sample)
# Histogram for sample discretization. Note that the axis range
# is increased, to catch all possible tails. Such an increase may
# or may not make sense, depending on application.
h2 = HistoND("int", ns.HistoAxis(nDiscrete, 1.5*xmin, 1.5*xmax, "X"),
"KDE Discretization Histogram", "Counts")
any(h2.fill(x, 1) for x in sample)
# Object which will perform KDE
smoother = FFTKDE1D(h2)
# Make an initial guess for the KDE bandwidth. This guess will use
# a robust measure of the sample scale, estimated from the histogram.
bw0 = histoPluginBandwidth1D(h2, sampleSize, filterDegree, symbetaPower)
print("Plug-in bandwidth is {:.4f}".format(bw0))
yPlugin = smoother.estimateDensity(h2, filterDegree, bw0, symbetaPower)
# Add a number of kernels with different bandwidth values
# for cross-validation calculations
for bw in np.logspace(math.log10(bw0/4.0), math.log10(bw0*2.0), 100):
smoother.addFilter(filterDegree, bw, symbetaPower)
# Run the least squares cross-validation (LSCV)
lscvCalc = BinnedLSCVCalc1D()
bestLSCV, paramsLSCV, yLSCV = smoother.bestFilterByCV(h2, lscvCalc)
lscvFilterDegree, lscvBandwidth, lscvSymbetaPower = paramsLSCV
print("LSCV bandwidth is {:.4f}".format(lscvBandwidth))
# Run the regularized likelihood cross-validation (RLCV)
regularisationParameter = 0.5
rlcvCalc = BinnedRLCVCalc1D(regularisationParameter)
bestRLCV, paramsRLCV, yRLCV = smoother.bestFilterByCV(h2, rlcvCalc)
rlcvFilterDegree, rlcvBandwidth, rlcvSymbetaPower = paramsRLCV
print("RLCV bandwidth is {:.4f}".format(rlcvBandwidth))
# We are done with density estimation at this point. The rest of the script
# is just plotting results. To begin with, create a Matplotlib plot window.
fig = plt.figure()
ax = fig.add_subplot(111)
# Plot the sample histogram
xh, yh = ns.histoOutline1D(h1)
ax.plot(xh, yh, 'k')
# Overlay the original density. Scale all densities so that their
# plots are compatible with the histogram height.
verticalScale = h1.binVolume()*sampleSize
xdens, ydens = ns.scanDensity1D(mixture, xmin, xmax, nDiscrete+1)
ax.plot(xdens, ydens*verticalScale, 'g', label='True density')
# Overlay the KDE results with various bandwidth values
xkde = h2.axis(0).binCenters()
-bandwidths = (bw0, rlcvBandwidth, lscvBandwidth)
colors = ('c', 'r', 'b' )
labels = ('Plug-in', 'RLCV', 'LSCV' )
results = (yPlugin, yRLCV, yLSCV )
-for bw, color, l, y in zip(bandwidths, colors, labels, results):
+for color, l, y in zip(colors, labels, results):
ax.plot(xkde, y*verticalScale, color, label="{} estimate".format(l))
# Set the x range of the plot to that of the sample histogram
ax.set_xlim(h1.axis(0).min(), h1.axis(0).max())
# Add labels and comments
plt.xlabel('X')
plt.ylabel('Density estimate')
plt.title('KDE by FFT')
plt.legend(loc=2)
# Display the window
plt.show()
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Wed, May 14, 11:04 AM (21 h, 45 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
5111337
Default Alt Text
(33 KB)
Attached To
rNPSTATSVN npstatsvn
Event Timeline
Log In to Comment