No OneTemporary
Actions

Size

33 KB

Subscribers

None

View Options

	Index: trunk/npstat/swig/npstat_density_estimation_1d.py
	===================================================================
	--- trunk/npstat/swig/npstat_density_estimation_1d.py (revision 628)
	+++ trunk/npstat/swig/npstat_density_estimation_1d.py (revision 629)
	@@ -1,458 +1,468 @@
	"""
	This module provides a number of useful wrapper classes and functions
	related to 1-d density estimation
	"""

	__author__ = "Igor Volobouev (i.volobouev@ttu.edu)"
	__version__ = "1.1"
	__date__ = "Sep 20 2019"

	import npstat
	import numpy as np
	from npstat_utils import histoBinType


	def histoPluginBandwidth1D(h, sampleSize, deg, symbetaPower):
	if h.dim() != 1:
	raise ValueError("Expected 1-d histogram as an argument")
	assert deg >= 0.0, "Filter degree can not be negative"
	if sampleSize <= 0.0:
	sampleSize = h.nFillsInRange()
	qvalues = (0.15865525393145705, 0.84134474606854295)
	quantiles = npstat.histoQuantiles(h, 0, qvalues)
	sigmaEstimate = (quantiles[1] - quantiles[0])/2.0
	pluginBw = npstat.approxAmisePluginBwGauss(deg, sampleSize, sigmaEstimate)
	if symbetaPower >= 0.0:
	pluginBw *= npstat.approxSymbetaBandwidthRatio(symbetaPower, deg);
	return pluginBw


	def BandwidthCVLeastSquares1D(typename, *args):
	"""
	Least squares cross-validation helper class
	"""
	funcs = {'unsigned char' : npstat.UCharBandwidthCVLeastSquares1D,
	'int' : npstat.IntBandwidthCVLeastSquares1D,
	'long long' : npstat.LLongBandwidthCVLeastSquares1D,
	'float' : npstat.FloatBandwidthCVLeastSquares1D,
	'double' : npstat.DoubleBandwidthCVLeastSquares1D}
	return funcs[typename](*args)


	def BandwidthCVPseudoLogli1D(typename, *args):
	"""
	Pseudo-likelihood cross-validation helper class
	"""
	funcs = {'unsigned char' : npstat.UCharBandwidthCVPseudoLogli1D,
	'int' : npstat.IntBandwidthCVPseudoLogli1D,
	'long long' : npstat.LLongBandwidthCVPseudoLogli1D,
	'float' : npstat.FloatBandwidthCVPseudoLogli1D,
	'double' : npstat.DoubleBandwidthCVPseudoLogli1D}
	return funcs[typename](*args)


	class _BinnedCVCalc1D:
	def __init__(self, effectiveSampleSize, weightedSample):
	self._effectiveSampleSize = effectiveSampleSize
	self._weightedSample = weightedSample

	def __call__(self, h, result, filter):
	calc = self._makeCalc(h)
	if self._weightedSample is not None:
	return calc.cvWeightedSample(h, self._weightedSample, result, filter)
	elif self._effectiveSampleSize is not None:
	return calc.cvWeighted(h, self._effectiveSampleSize, result, filter)
	else:
	return calc.cv(h, result, filter)

	def _makeCalc(self, h):
	raise NotImplementedError


	class BinnedRLCVCalc1D(_BinnedCVCalc1D):
	def __init__(self, regParameter, effectiveSampleSize=None, weightedSample=None):
	assert regParameter >= 0.0, "RLCV regularisation parameter can not be negative"
	_BinnedCVCalc1D.__init__(self, effectiveSampleSize, weightedSample)
	self._regParameter = regParameter

	def _makeCalc(self, h):
	return BandwidthCVPseudoLogli1D(histoBinType(h), self._regParameter)


	class BinnedLSCVCalc1D(_BinnedCVCalc1D):
	def __init__(self, effectiveSampleSize=None, weightedSample=None):
	_BinnedCVCalc1D.__init__(self, effectiveSampleSize, weightedSample)

	def _makeCalc(self, h):
	return BandwidthCVLeastSquares1D(histoBinType(h))


	class _DiscreteDEBase1D:
	"""
	Base class for 1-d density estimation on regular grids
	"""
	def __init__(self, prototypeHisto, i_allowNegativeDensities):
	if prototypeHisto.dim() != 1:
	raise ValueError("Expected 1-d histogram as a prototype")
	if not prototypeHisto.isUniformlyBinned():
	raise ValueError("Expected uniformy binned histogram as a prototype")
	self._allowNegative = i_allowNegativeDensities
	axis = prototypeHisto.axis(0)
	self._nBins = axis.nBins()
	self._binWidth = axis.binWidth(0)

	def expectedNBins(self):
	return self._nBins

	def expectedBinWidth(self):
	return self._binWidth

	def allowNegativeDensities(self, b):
	self._allowNegative = b

	def areNegativeDensitiesAllowed(self):
	return self._allowNegative

	def isCompatible(self, h):
	if h.dim() != 1:
	return False
	if not h.isUniformlyBinned():
	return False
	axis = h.axis(0)
	return self._nBins == axis.nBins() and self._binWidth == axis.binWidth(0)

	def validateData(self, h):
	if not self.isCompatible(h):
	raise ValueError("Incompatible input histogram")

	def setData(self, h):
	raise NotImplementedError

	def isDataSet(self):
	raise NotImplementedError

	def estimateDataDensity(self, filterDegree, bandwidth=-1.0,
	symbetaPower=-1, rememberFilter=True):
	raise NotImplementedError

	def estimateDensity(self, h, filterDegree, bandwidth=-1.0,
	symbetaPower=-1, rememberFilter=True):
	assert filterDegree >= 0.0, "Filter degree can not be negative"
	self.setData(h)
	return self.estimateDataDensity(filterDegree, bandwidth,
	symbetaPower, rememberFilter)

	def addFilter(self, filterDegree, bandwidth=-1.0, symbetaPower=-1):
	raise NotImplementedError

	def nFiltersAdded(self):
	raise NotImplementedError

	def clearFilters(self):
	raise NotImplementedError

	def bestFilterByCV(self, h, cvCalc):
	raise NotImplementedError


	class FFTKDE1D(_DiscreteDEBase1D):
	"""
	Fast 1-d KDE on regular grids utilizing FFT for convolutions
	"""
	def __init__(self, prototypeHisto, allowNegativeResults=True,
	useMirroring=True):
	_DiscreteDEBase1D.__init__(self, prototypeHisto, allowNegativeResults)
	self._mirror = useMirroring
	self._engine = npstat.ConvolutionEngine1D(2*self._nBins)
	self._kernelTable = dict()
	self._kernelProperties = list()
	self._directId = 2147483647
	self._directKernelKey = (0.0, -1.0, 0.0)
	# Add more constructor parameters if you want to
	# change the settings used below
	self._bh = npstat.BoundaryHandling("BM_TRUNCATE")
	self._gaussKernelNSigmas = 12.0

	def _makeKernel(self, filterDegree, bandwidth, symbetaPower):
	assert filterDegree >= 0.0, "Filter degree can not be negative"
	assert bandwidth > 0.0, "Kernel bandwidth must be positive"
	if symbetaPower < 0.0:
	distro = npstat.TruncatedGauss1D(0.0, bandwidth, self._gaussKernelNSigmas)
	else:
	distro = npstat.SymmetricBeta1D(0.0, bandwidth, symbetaPower)
	builder = npstat.getBoundaryFilter1DBuilder(self._bh, distro, self._binWidth)
	taper = npstat.contDegreeTaper(filterDegree)
	kernelCenter = self._nBins - 1
	pf = builder.makeFilter(taper, len(taper)-1, kernelCenter, 2*self._nBins)
	- selfContrib = pf.valueAt(pf.peakPosition())
	- return (npstat.polyFilter1DToNumpy(pf, kernelCenter, 2*self._nBins), selfContrib)
	+ selfContribution = pf.valueAt(pf.peakPosition())
	+ return (npstat.polyFilter1DToNumpy(pf, kernelCenter, 2*self._nBins),
	+ selfContribution)

	def addFilter(self, filterDegree, bandwidth, symbetaPower):
	"""
	Add a kernel for future use in cross-validation
	"""
	assert filterDegree >= 0.0, "Filter degree can not be negative"
	key = (filterDegree, bandwidth, symbetaPower)
	if key not in self._kernelTable:
	id = len(self._kernelProperties)
	assert id < self._directId, "Too many kernels"
	k, sc = self._makeKernel(filterDegree, bandwidth, symbetaPower)
	self._engine.depositFilter(id, k, self._nBins+1)
	self._kernelTable[key] = id
	self._kernelProperties.append((filterDegree, bandwidth, symbetaPower, sc))
	return self._kernelTable[key]

	def nFiltersAdded(self):
	sz = len(self._kernelProperties)
	assert sz == self._engine.filterBankSize(), "Filter counting failure"
	return sz

	def clearFilters(self):
	self._kernelTable = dict()
	self._kernelProperties = list()
	self._engine.clearFilterBank();
	self._directKernelKey = (0.0, -1.0, 0.0)

	def _getKernelId(self, filterDegree, bandwidth, symbetaPower, addKern):
	assert filterDegree >= 0.0, "Filter degree can not be negative"
	assert bandwidth > 0.0, "Kernel bandwidth must be positive"
	if addKern:
	return self.addFilter(filterDegree, bandwidth, symbetaPower)
	key = (filterDegree, bandwidth, symbetaPower)
	if key in self._kernelTable:
	return self._kernelTable[key]
	if key != self._directKernelKey:
	k, sc = self._makeKernel(filterDegree, bandwidth, symbetaPower)
	self._engine.depositFilter(self._directId, k, self._nBins+1)
	self._directKernelKey = key
	return self._directId

	def setData(self, h):
	self.validateData(h)
	self._engine.setFilter(npstat.doubleHistoBins1D(h, True, self._mirror))

	def isDataSet(self):
	return self._engine.isFilterSet()

	def _buildResult(self, id, makeNonNegative):
	arr = self._engine.convolveFilterAndDeposit(id, makeNonNegative)
	result = np.resize(arr, self._nBins)
	# The result we have now is normalized to the histogram area
	# in case of mirroring, and potentially has some leakage
	# if there is no mirroring. Renormalize everything to 1.
	area = np.sum(result)*self._binWidth
	assert area > 0.0, "Result can not be normalized. "\
	"Was the input histogram filled?"
	result /= area
	return result

	def estimateDataDensity(self, filterDegree, bandwidth, symbetaPower, addKern=True):
	assert self._engine.isFilterSet(), "Data is not set"
	assert filterDegree >= 0.0, "Filter degree can not be negative"
	id = self._getKernelId(filterDegree, bandwidth, symbetaPower, addKern)
	makeNonNegative = (filterDegree == 0.0 or (not self._allowNegative))
	return self._buildResult(id, makeNonNegative)

	def bestFilterByCV(self, h, cvCalc):
	"""
	Choose the best kernel among kernels added previsously utilizing
	cross-validation
	"""
	assert self.nFiltersAdded() > 0, "No kernels added"
	self.setData(h)
	bestCVValue = -1.7e308
	bestId = -1
	bestResult = None
	for id, properties in enumerate(self._kernelProperties):
	filterDegree, bandwidth, symbetaPower, sc = properties
	makeNonNegative = (filterDegree == 0.0 or (not self._allowNegative))
	result = self._buildResult(id, makeNonNegative)
	dummyFilter = npstat.ContribConstant1D(sc, h.nBins())
	cvValue = cvCalc(h, result, dummyFilter)
	if cvValue > bestCVValue:
	bestCVValue = cvValue
	bestId = id
	bestResult = result
	assert bestId >= 0, "Kernel optimization scan failed"
	filterDegree, bandwidth, symbetaPower, sc = self._kernelProperties[bestId]
	key = (filterDegree, bandwidth, symbetaPower)
	return (bestCVValue, key, bestResult)


	class LOrPE1D(_DiscreteDEBase1D):
	"""
	Density estimation by LOrPE on regular 1-d grids
	"""
	def __init__(self, prototypeHisto, allowNegativeResults=True, bh = None):
	_DiscreteDEBase1D.__init__(self, prototypeHisto, allowNegativeResults)
	self._bh = bh
	if bh is None:
	self._bh = npstat.BoundaryHandling("BM_FOLD")
	self._data = None
	self._filterTable = dict()

	def setData(self, h):
	self.validateData(h)
	self._data = npstat.doubleHistoBins1D(h)

	def isDataSet(self):
	return self._data is not None

	def _makeFilter(self, filterDegree, bandwidth, symbetaPower):
	assert filterDegree >= 0.0, "Filter degree can not be negative"
	return npstat.symbetaLOrPEFilter1D(
	symbetaPower, bandwidth, filterDegree,
	self._nBins, self._binWidth, self._bh)

	def estimateDataDensity(self, filterDegree, bandwidth,
	symbetaPower, rememberFilter=True):
	assert self.isDataSet(), "Data is not set"
	assert filterDegree >= 0.0, "Filter degree can not be negative"
	key = (filterDegree, bandwidth, symbetaPower)
	if key in self._filterTable:
	filt = self._filterTable[key]
	elif rememberFilter:
	filt = self.addFilter(filterDegree, bandwidth, symbetaPower)
	else:
	filt = self._makeFilter(filterDegree, bandwidth, symbetaPower)
	makeNonNegative = (filterDegree == 0.0 or (not self._allowNegative))
	result = filt.filter(self._data, makeNonNegative)
	area = np.sum(result)*self._binWidth
	assert area > 0.0, "Result can not be normalized. "\
	"Was the input histogram filled?"
	result /= area
	return result

	def addFilter(self, filterDegree, bandwidth, symbetaPower):
	assert filterDegree >= 0.0, "Filter degree can not be negative"
	key = (filterDegree, bandwidth, symbetaPower)
	if key in self._filterTable:
	filter = self._filterTable[key]
	else:
	filter = self._makeFilter(filterDegree, bandwidth, symbetaPower)
	self._filterTable[key] = filter
	return filter

	def nFiltersAdded(self):
	return len(self._filterTable)

	def clearFilters(self):
	self._filterTable = dict()

	def bestFilterByCV(self, h, cvCalc):
	assert self.nFiltersAdded() > 0, "No filters added"
	self.setData(h)
	bestCVValue = -1.7e308
	bestKey = (0.0, -1.0, 0.0)
	bestResult = None
	buffer = np.empty((self._nBins,), dtype=np.double)
	for key, filt in self._filterTable.items():
	filterDegree, bandwidth, symbetaPower = key
	makeNonNegative = (filterDegree == 0.0 or (not self._allowNegative))
	filt.filterIntoBuffer(self._data, buffer, makeNonNegative)
	area = np.sum(buffer)*self._binWidth
	assert area > 0.0, "Result can not be normalized. "\
	"Was the input histogram filled?"
	buffer /= area
	cvValue = cvCalc(h, buffer, filt)
	if cvValue > bestCVValue:
	bestCVValue = cvValue
	bestKey = key
	bestResult = np.copy(buffer)
	assert bestResult is not None, "Filter optimization scan failed"
	return (bestCVValue, bestKey, bestResult)


	class DiscreteOSDE1D(_DiscreteDEBase1D):
	"""
	OSDE on regular 1-d grids
	"""
	- def __init__(self, prototypeHisto, allowNegativeResults=True, maxdeg=50):
	+ def __init__(self, prototypeHisto, allowNegativeResults=True, maxdeg=50,
	+ weightFunction=None):
	_DiscreteDEBase1D.__init__(self, prototypeHisto, allowNegativeResults)
	assert maxdeg >= 0, "Filter degree can not be negative"
	self._data = None
	self._filterTable = dict()
	- self._maxdeg = max(self._nBins-1, maxdeg)
	- self._poly = npstat.OrthoPoly1D(self._maxdeg, np.ones(self._maxdeg+1), self._binWidth)
	+ self._maxdeg = min(self._nBins-1, maxdeg)
	+ wF = weightFunction
	+ if wF is None:
	+ wF = np.ones(self._nBins)
	+ else:
	+ assert len(wF) == self._nBins, "Incompatible number of weights"
	+ self._poly = npstat.OrthoPoly1D(self._maxdeg, wF, self._binWidth)
	+
	+ def maxDegree(self):
	+ return self._maxdeg

	def setData(self, h):
	self.validateData(h)
	self._data = np.empty((self._maxdeg+1,), dtype=np.double)
	self._poly.calculateCoeffs(npstat.doubleHistoBins1D(h), self._data)

	def isDataSet(self):
	return self._data is not None

	def _makeFilter(self, filterDegree):
	assert filterDegree >= 0.0, "Filter degree can not be negative"
	taper = npstat.contDegreeTaper(filterDegree)
	if len(taper) > self._maxdeg+1:
	raise ValueError("Filter degree is too large")
	coeffs = np.array(taper, dtype=np.double)
	response = np.empty((self._nBins,), dtype=np.double)
	self._poly.globalFilterDiag(coeffs, response)
	contrib = npstat.ContribVector1D(response)
	return (coeffs, contrib)

	def estimateDataDensity(self, filterDegree, rememberFilter=True):
	assert self.isDataSet(), "Data is not set"
	assert filterDegree >= 0.0, "Filter degree can not be negative"
	key = filterDegree
	if key in self._filterTable:
	taper, contrib = self._filterTable[key]
	elif rememberFilter:
	taper, contrib = self.addFilter(filterDegree)
	else:
	taper, contrib = self._makeFilter(filterDegree)
	coeffs = np.array([a*b for a,b in zip(taper,self._data)], dtype=np.double)
	result = np.empty((self._nBins,), dtype=np.double)
	self._poly.allSeries(coeffs, result, not self._allowNegative)
	area = np.sum(result)*self._binWidth
	assert area > 0.0, "Result can not be normalized. "\
	"Was the input histogram filled?"
	result /= area
	return result

	def estimateDensity(self, h, filterDegree, rememberFilter=True):
	assert filterDegree >= 0.0, "Filter degree can not be negative"
	self.setData(h)
	return self.estimateDataDensity(filterDegree, rememberFilter)

	def addFilter(self, filterDegree):
	assert filterDegree >= 0.0, "Filter degree can not be negative"
	key = filterDegree
	if key in self._filterTable:
	filter = self._filterTable[key]
	else:
	filter = self._makeFilter(filterDegree)
	self._filterTable[key] = filter
	return filter

	def nFiltersAdded(self):
	return len(self._filterTable)

	def clearFilters(self):
	self._filterTable = dict()

	def bestFilterByCV(self, h, cvCalc):
	assert self.nFiltersAdded() > 0, "No filters added"
	self.setData(h)
	bestCVValue = -1.7e308
	bestKey = -1.0
	bestResult = None
	buffer = np.empty((self._nBins,), dtype=np.double)
	for key, filt in self._filterTable.items():
	taper, contrib = filt
	coeffs = np.array([a*b for a,b in zip(taper,self._data)], dtype=np.double)
	self._poly.allSeries(coeffs, buffer, not self._allowNegative)
	area = np.sum(buffer)*self._binWidth
	assert area > 0.0, "Result can not be normalized. "\
	"Was the input histogram filled?"
	buffer /= area
	cvValue = cvCalc(h, buffer, contrib)
	if cvValue > bestCVValue:
	bestCVValue = cvValue
	bestKey = key
	bestResult = np.copy(buffer)
	assert bestResult is not None, "Filter optimization scan failed"
	return (bestCVValue, bestKey, bestResult)
	Index: trunk/examples/Python/lorpe_1d_cv.py
	===================================================================
	--- trunk/examples/Python/lorpe_1d_cv.py (revision 628)
	+++ trunk/examples/Python/lorpe_1d_cv.py (revision 629)
	@@ -1,120 +1,119 @@
	#!/usr/bin/env python3
	"""
	This example illistrates how to choose LOrPE filter settings
	in 1-d by cross-validation
	"""

	__author__="Igor Volobouev (i.volobouev@ttu.edu)"
	__version__="1.0"
	__date__ ="Sep 19 2019"

	# Perform necessary imports
	import npstat as ns
	from npstat_utils import HistoND
	from npstat_density_estimation_1d import LOrPE1D, FFTKDE1D, \
	histoPluginBandwidth1D, BinnedLSCVCalc1D, BinnedRLCVCalc1D
	import matplotlib.pyplot as plt
	import numpy as np
	import math

	# Some parameters for the script
	sampleSize = 1000 # Number of points to generate (sample size)
	xmin = 0.0 # Left edge of the sample histogram
	xmax = 5.0 # Right edge of the sample histogram
	numBins = 100 # Number of bins for the sample histogram
	nDiscrete = 500 # Number of cells for sample discretization.
	# Should be large enough so that the cell width
	# is substantially smaller than any physical scale.
	symbetaPower = 4 # Power parameter of the symmetric beta kernel.
	# Specify -1 to use a Gaussian kernel instead.
	pluginDegree = 2 # Degree of polynomial used to generate a simple
	# plug-in estimate for comparison with cross-validation

	# Here, we will use the exponential distribution truncated to [xmin, xmax]
	# in order to generate the random sample
	d1 = ns.TruncatedDistribution1D(ns.Exponential1D(0.0, 1.0), xmin, xmax)

	# Generate a random sample according to this density
	rng = ns.MersenneTwister()
	sample = d1.generate(rng, sampleSize)

	# Histogram for displaying the sample
	h1 = HistoND("int", ns.HistoAxis(numBins, xmin, xmax, "X"),
	"Sample Histogram", "Counts")
	any(h1.fill(x, 1) for x in sample)

	# Histogram for sample discretization
	h2 = HistoND("int", ns.HistoAxis(nDiscrete, xmin, xmax, "X"),
	"LOrPE Discretization Histogram", "Counts")
	any(h2.fill(x, 1) for x in sample)

	# Object which will perform LOrPE
	smoother = LOrPE1D(h2)

	# Add a simple density estimate with a plug-in bandwidth
	bw0 = histoPluginBandwidth1D(h2, sampleSize, pluginDegree, symbetaPower)
	print("Plug-in bandwidth is {:.4f} for degree {}".format(bw0, pluginDegree))
	yPlugin = smoother.estimateDensity(h2, pluginDegree, bw0, symbetaPower)

	# Add vanilla KDE for comparison (without mirroring and high order kernels)
	kde = FFTKDE1D(h2, False, False)
	bwkde = histoPluginBandwidth1D(h2, sampleSize, 0, symbetaPower)
	yKDE = kde.estimateDensity(h2, 0, bwkde, symbetaPower)
	print("KDE bandwidth is {:.4f}".format(bwkde))

	# Add a number of filters with different filter degrees and bandwidth
	# settings for cross-validation calculations
	for filterDegree in range(1, 7):
	bw0 = histoPluginBandwidth1D(h2, sampleSize, filterDegree, symbetaPower)
	print("Building degree {} filters, plug-in bandwidth is {:.4f}".format(
	filterDegree, bw0))
	for bw in np.logspace(math.log10(bw0/4.0), math.log10(bw0*4.0), 50):
	smoother.addFilter(filterDegree, bw, symbetaPower)

	# Run the least squares cross-validation (LSCV)
	lscvCalc = BinnedLSCVCalc1D()
	bestLSCV, paramsLSCV, yLSCV = smoother.bestFilterByCV(h2, lscvCalc)
	lscvDegree, lscvBandwidth, lscvSymbetaPower = paramsLSCV
	print("LSCV: bandwidth is {:.4f}, degree is {}".format(lscvBandwidth, lscvDegree))

	# Run the regularized likelihood cross-validation (RLCV)
	regularisationParameter = 0.5
	rlcvCalc = BinnedRLCVCalc1D(regularisationParameter)
	bestRLCV, paramsRLCV, yRLCV = smoother.bestFilterByCV(h2, rlcvCalc)
	rlcvDegree, rlcvBandwidth, rlcvSymbetaPower = paramsRLCV
	print("RLCV: bandwidth is {:.4f}, degree is {}".format(rlcvBandwidth, rlcvDegree))

	# Create Matplotlib plot window
	fig = plt.figure()
	ax = fig.add_subplot(111)

	# Plot the sample histogram
	xh, yh = ns.histoOutline1D(h1)
	ax.plot(xh, yh, 'k')

	# Overlay the original density. Scale all densities so that their
	# plots are compatible with the histogram height.
	verticalScale = h1.binVolume()*sampleSize
	xdens, ydens = ns.scanDensity1D(d1, xmin, xmax, nDiscrete+1)
	ax.plot(xdens, ydens*verticalScale, 'g', label='True density')

	# Overlay the KDE
	coords = h2.axis(0).binCenters()
	ax.plot(coords, yKDE*verticalScale, 'm', label="KDE")

	# Overlay the LOrPE results with various bandwidth values
	-bandwidths = (bw0, rlcvBandwidth, lscvBandwidth)
	colors = ('c', 'r', 'b' )
	labels = ('Plug-in', 'RLCV', 'LSCV' )
	results = (yPlugin, yRLCV, yLSCV )
	-for bw, color, l, y in zip(bandwidths, colors, labels, results):
	+for color, l, y in zip(colors, labels, results):
	ax.plot(coords, y*verticalScale, color, label="{} estimate".format(l))

	# Add labels and comments
	plt.xlabel('X')
	plt.ylabel('Density estimate')
	plt.title('LOrPE with Cross-Validation')
	plt.legend()

	# Display the window
	plt.show()
	Index: trunk/examples/Python/discrete_osde_1d.py
	===================================================================
	--- trunk/examples/Python/discrete_osde_1d.py (revision 0)
	+++ trunk/examples/Python/discrete_osde_1d.py (revision 629)
	@@ -0,0 +1,103 @@
	+#!/usr/bin/env python3
	+"""
	+This example illistrates how to perform univariate orthogonal series
	+density estimation (OSDE) on equdistant grids using NPStat
	+"""
	+
	+__author__="Igor Volobouev (i.volobouev@ttu.edu)"
	+__version__="1.0"
	+__date__ ="Sep 20 2019"
	+
	+# Perform necessary imports
	+import npstat as ns
	+from npstat_utils import HistoND
	+from npstat_density_estimation_1d import DiscreteOSDE1D, \
	+ BinnedLSCVCalc1D, BinnedRLCVCalc1D
	+import matplotlib.pyplot as plt
	+import numpy as np
	+
	+# Some parameters for the script
	+sampleSize = 500 # Number of points to generate (sample size)
	+xmin = -3.5 # Left edge of the sample histogram
	+xmax = 3.5 # Right edge of the sample histogram
	+numBins = 70 # Number of bins for the sample histogram
	+nDiscrete = 700 # Number of cells for sample discretization
	+filterDegree = 10 # Initial guess for the OSDE degree
	+
	+# We will use a 1-d Gassian mixture to generate the random sample.
	+# See the "brute_force_kde_1d.py" script for more comments on this.
	+component1 = ns.GaussianMixtureEntry(3.0, -1.0, 0.5)
	+component2 = ns.GaussianMixtureEntry(7.0, 1.0, 0.5)
	+mixture = ns.GaussianMixture1D(0.0, 1.0, (component1, component2))
	+
	+# Generate a random sample according to this density
	+rng = ns.MersenneTwister()
	+sample = mixture.generate(rng, sampleSize)
	+
	+# Histogram for displaying the sample
	+h1 = HistoND("int", ns.HistoAxis(numBins, xmin, xmax, "X"),
	+ "Sample Histogram", "Counts")
	+any(h1.fill(x, 1) for x in sample)
	+
	+# Histogram for sample discretization
	+h2 = HistoND("int", ns.HistoAxis(nDiscrete, xmin, xmax, "X"),
	+ "OSDE Discretization Histogram", "Counts")
	+any(h2.fill(x, 1) for x in sample)
	+
	+# Object which will perform OSDE
	+smoother = DiscreteOSDE1D(h2)
	+
	+# Initial guesstimate
	+yFirst = smoother.estimateDensity(h2, filterDegree)
	+print("Initial guess OSDE degree is ", filterDegree)
	+
	+# Add a number of filters with different degrees for cross-validation
	+# calculations. Note that degrees do not have to be integer.
	+for degree in np.linspace(2.0, 40.0, 77):
	+ smoother.addFilter(degree)
	+
	+# Run the least squares cross-validation (LSCV)
	+lscvCalc = BinnedLSCVCalc1D()
	+bestLSCV, lscvFilterDegree, yLSCV = smoother.bestFilterByCV(h2, lscvCalc)
	+print("LSCV OSDE degree is ", lscvFilterDegree)
	+
	+# Run the regularized likelihood cross-validation (RLCV)
	+regularisationParameter = 0.5
	+rlcvCalc = BinnedRLCVCalc1D(regularisationParameter)
	+bestRLCV, rlcvFilterDegree, yRLCV = smoother.bestFilterByCV(h2, rlcvCalc)
	+print("RLCV OSDE degree is ", rlcvFilterDegree)
	+
	+# We are done with density estimation at this point. The rest of the script
	+# is just plotting results. To begin with, create a Matplotlib plot window.
	+fig = plt.figure()
	+ax = fig.add_subplot(111)
	+
	+# Plot the sample histogram
	+xh, yh = ns.histoOutline1D(h1)
	+ax.plot(xh, yh, 'k')
	+
	+# Overlay the original density. Scale all densities so that their
	+# plots are compatible with the histogram height.
	+verticalScale = h1.binVolume()*sampleSize
	+xdens, ydens = ns.scanDensity1D(mixture, xmin, xmax, nDiscrete+1)
	+ax.plot(xdens, ydens*verticalScale, 'g', label='True density')
	+
	+# Overlay the OSDE results with various degrees
	+xcoords = h2.axis(0).binCenters()
	+colors = ('c', 'r', 'b' )
	+labels = ('Guess', 'RLCV', 'LSCV' )
	+results = (yFirst, yRLCV, yLSCV )
	+for color, l, y in zip(colors, labels, results):
	+ ax.plot(xcoords, y*verticalScale, color, label="{} estimate".format(l))
	+
	+# Set the x range of the plot to that of the sample histogram
	+ax.set_xlim(h1.axis(0).min(), h1.axis(0).max())
	+
	+# Add labels and comments
	+plt.xlabel('X')
	+plt.ylabel('Density estimate')
	+plt.title('Legendre Basis OSDE')
	+plt.legend(loc=2)
	+
	+# Display the window
	+plt.show()

	Property changes on: trunk/examples/Python/discrete_osde_1d.py
	___________________________________________________________________
	Added: svn:executable
	## -0,0 +1 ##
	+*
	\ No newline at end of property
	Index: trunk/examples/Python/kde_1d_by_fft.py
	===================================================================
	--- trunk/examples/Python/kde_1d_by_fft.py (revision 628)
	+++ trunk/examples/Python/kde_1d_by_fft.py (revision 629)
	@@ -1,120 +1,119 @@
	#!/usr/bin/env python3
	"""
	This example illistrates how to perform constant-bandwidth univariate
	kernel density estimation (KDE) utilizing fast Fourier transforms (FFT)
	with the help of the NPStat package.
	"""

	__author__="Igor Volobouev (i.volobouev@ttu.edu)"
	__version__="1.0"
	__date__ ="Sep 19 2019"

	# Perform necessary imports
	import npstat as ns
	from npstat_utils import HistoND
	from npstat_density_estimation_1d import FFTKDE1D, histoPluginBandwidth1D, \
	BinnedLSCVCalc1D, BinnedRLCVCalc1D
	import matplotlib.pyplot as plt
	import numpy as np
	import math

	# Some parameters for the script
	sampleSize = 500 # Number of points to generate (sample size)
	xmin = -3.5 # Left edge of the sample histogram
	xmax = 3.5 # Right edge of the sample histogram
	numBins = 70 # Number of bins for the sample histogram
	nDiscrete = 1024 # Number of cells for sample discretization.
	# Should be a power of 2 (this results in the
	# most efficient application of FFT).
	symbetaPower = -1 # Power parameter of the symmetric beta kernel.
	# Specify -1 to use a Gaussian kernel instead.
	filterDegree = 2 # Degree of polynomial used to generate high-order kernels.
	# In general, even numbers make the most sense here.
	# Kernel order will then be filterDegree + 2. The
	# kernel will be a bona-fide density (i.e., non-negative
	# everywhere) only if filterDegree is 0.

	# We will use a 1-d Gassian mixture to generate the random sample.
	# See the "brute_force_kde_1d.py" script for more comments on this.
	component1 = ns.GaussianMixtureEntry(3.0, -1.0, 0.5)
	component2 = ns.GaussianMixtureEntry(7.0, 1.0, 0.5)
	mixture = ns.GaussianMixture1D(0.0, 1.0, (component1, component2))

	# Generate a random sample according to this density
	rng = ns.MersenneTwister()
	sample = mixture.generate(rng, sampleSize)

	# Histogram for displaying the sample
	h1 = HistoND("int", ns.HistoAxis(numBins, xmin, xmax, "X"),
	"Sample Histogram", "Counts")
	any(h1.fill(x, 1) for x in sample)

	# Histogram for sample discretization. Note that the axis range
	# is increased, to catch all possible tails. Such an increase may
	# or may not make sense, depending on application.
	h2 = HistoND("int", ns.HistoAxis(nDiscrete, 1.5xmin, 1.5xmax, "X"),
	"KDE Discretization Histogram", "Counts")
	any(h2.fill(x, 1) for x in sample)

	# Object which will perform KDE
	smoother = FFTKDE1D(h2)

	# Make an initial guess for the KDE bandwidth. This guess will use
	# a robust measure of the sample scale, estimated from the histogram.
	bw0 = histoPluginBandwidth1D(h2, sampleSize, filterDegree, symbetaPower)
	print("Plug-in bandwidth is {:.4f}".format(bw0))
	yPlugin = smoother.estimateDensity(h2, filterDegree, bw0, symbetaPower)

	# Add a number of kernels with different bandwidth values
	# for cross-validation calculations
	for bw in np.logspace(math.log10(bw0/4.0), math.log10(bw0*2.0), 100):
	smoother.addFilter(filterDegree, bw, symbetaPower)

	# Run the least squares cross-validation (LSCV)
	lscvCalc = BinnedLSCVCalc1D()
	bestLSCV, paramsLSCV, yLSCV = smoother.bestFilterByCV(h2, lscvCalc)
	lscvFilterDegree, lscvBandwidth, lscvSymbetaPower = paramsLSCV
	print("LSCV bandwidth is {:.4f}".format(lscvBandwidth))

	# Run the regularized likelihood cross-validation (RLCV)
	regularisationParameter = 0.5
	rlcvCalc = BinnedRLCVCalc1D(regularisationParameter)
	bestRLCV, paramsRLCV, yRLCV = smoother.bestFilterByCV(h2, rlcvCalc)
	rlcvFilterDegree, rlcvBandwidth, rlcvSymbetaPower = paramsRLCV
	print("RLCV bandwidth is {:.4f}".format(rlcvBandwidth))

	# We are done with density estimation at this point. The rest of the script
	# is just plotting results. To begin with, create a Matplotlib plot window.
	fig = plt.figure()
	ax = fig.add_subplot(111)

	# Plot the sample histogram
	xh, yh = ns.histoOutline1D(h1)
	ax.plot(xh, yh, 'k')

	# Overlay the original density. Scale all densities so that their
	# plots are compatible with the histogram height.
	verticalScale = h1.binVolume()*sampleSize
	xdens, ydens = ns.scanDensity1D(mixture, xmin, xmax, nDiscrete+1)
	ax.plot(xdens, ydens*verticalScale, 'g', label='True density')

	# Overlay the KDE results with various bandwidth values
	xkde = h2.axis(0).binCenters()
	-bandwidths = (bw0, rlcvBandwidth, lscvBandwidth)
	colors = ('c', 'r', 'b' )
	labels = ('Plug-in', 'RLCV', 'LSCV' )
	results = (yPlugin, yRLCV, yLSCV )
	-for bw, color, l, y in zip(bandwidths, colors, labels, results):
	+for color, l, y in zip(colors, labels, results):
	ax.plot(xkde, y*verticalScale, color, label="{} estimate".format(l))

	# Set the x range of the plot to that of the sample histogram
	ax.set_xlim(h1.axis(0).min(), h1.axis(0).max())

	# Add labels and comments
	plt.xlabel('X')
	plt.ylabel('Density estimate')
	plt.title('KDE by FFT')
	plt.legend(loc=2)

	# Display the window
	plt.show()

File Metadata

Mime Type: text/x-diff
Expires: Wed, May 14, 11:04 AM (21 h, 45 s)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 5111337
Default Alt Text: (33 KB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions