Index: trunk/examples/Python/kde_1d_by_fft.py =================================================================== --- trunk/examples/Python/kde_1d_by_fft.py (revision 622) +++ trunk/examples/Python/kde_1d_by_fft.py (revision 623) @@ -1,115 +1,116 @@ #!/usr/bin/env python3 """ This example illistrates how to perform constant-bandwidth univariate kernel density estimation (KDE) utilizing fast Fourier transforms (FFT) with the help of the NPStat package. """ __author__="Igor Volobouev (i.volobouev@ttu.edu)" __version__="1.0" __date__ ="Sep 19 2019" # Perform necessary imports import npstat as ns from npstat_utils import HistoND, FFTKDE1D, histoPluginBandwidth1D, \ - BandwidthCVPseudoLogli1D, BandwidthCVLeastSquares1D + BinnedLSCVCalc1D, BinnedRLCVCalc1D import matplotlib.pyplot as plt import numpy as np import math # Some parameters for the script sampleSize = 500 # Number of points to generate (sample size) xmin = -3.5 # Left edge of the sample histogram xmax = 3.5 # Right edge of the sample histogram numBins = 70 # Number of bins for the sample histogram nDiscrete = 1024 # Number of cells for sample discretization. # Should be a power of 2 (this results in the # most efficient application of FFT). symbetaPower = -1 # Power parameter of the symmetric beta kernel. # Specify -1 to use a Gaussian kernel instead. filterDegree = 2 # Degree of polynomial used to generate high-order kernels. # In general, even numbers make the most sense here. # Kernel order will then be filterDegree + 2. The # kernel will be a bona-fide density (i.e., non-negative # everywhere) only if filterDegree is 0. # We will use a 1-d Gassian mixture to generate the random sample. # See the "brute_force_kde_1d.py" script for more comments on this. component1 = ns.GaussianMixtureEntry(3.0, -1.0, 0.5) component2 = ns.GaussianMixtureEntry(7.0, 1.0, 0.5) mixture = ns.GaussianMixture1D(0.0, 1.0, (component1, component2)) # Generate a random sample according to this density rng = ns.MersenneTwister() sample = mixture.generate(rng, sampleSize) # Histogram for displaying the sample h1 = HistoND("int", ns.HistoAxis(numBins, xmin, xmax, "X"), "Sample Histogram", "Counts") any(h1.fill(x, 1) for x in sample) # Histogram for sample discretization h2 = HistoND("int", ns.HistoAxis(nDiscrete, 2*xmin, 2*xmax, "X"), "KDE Discretization Histogram", "Counts") any(h2.fill(x, 1) for x in sample) # Object which will perform KDE smoother = FFTKDE1D(h2) # Make an initial guess for the KDE bandwidth. It will use # a robust measure of the sample scale, estimated from the histogram. bw0 = histoPluginBandwidth1D(h2, sampleSize, symbetaPower, filterDegree) print("Plug-in bandwidth is", bw0) yPlugin = smoother.estimateDensity(h2, symbetaPower, filterDegree, bw0) # Add a number of kernels with different bandwidth values # for cross-validation calculations for bw in np.logspace(math.log10(bw0/4.0), math.log10(bw0*2.0), 100): smoother.addKernel(symbetaPower, filterDegree, bw) # Run the least squares cross-validation (LSCV) -bestLSCV, paramsLSCV, yLSCV = smoother.bestKernelCV(h2, sampleSize, "LSCV") -lscvBandwidth = paramsLSCV[2] +lscvCalc = BinnedLSCVCalc1D() +bestLSCV, paramsLSCV, yLSCV = smoother.bestKernelByCV(h2, lscvCalc) +lscvSymbetaPower, lscvFilterDegree, lscvBandwidth = paramsLSCV print("LSCV bandwidth is", lscvBandwidth) # Run the regularized likelihood cross-validation (RLCV) regularisationParameter = 0.5 -bestRLCV, paramsRLCV, yRLCV = smoother.bestKernelCV(h2, sampleSize, "RLCV", - regularisationParameter) -rlcvBandwidth = paramsRLCV[2] +rlcvCalc = BinnedRLCVCalc1D(regularisationParameter) +bestRLCV, paramsRLCV, yRLCV = smoother.bestKernelByCV(h2, rlcvCalc) +rlcvSymbetaPower, rlcvFilterDegree, rlcvBandwidth = paramsRLCV print("RLCV bandwidth is", rlcvBandwidth) # Create Matplotlib plot window fig = plt.figure() ax = fig.add_subplot(111) # Plot the sample histogram xh, yh = ns.histoOutline1D(h1) ax.plot(xh, yh, 'k') # Overlay the original density. Scale all densities so that their # plots are compatible with the histogram height. verticalScale = h1.binVolume()*sampleSize xdens, ydens = ns.scanDensity1D(mixture, xmin, xmax, nDiscrete+1) ax.plot(xdens, ydens*verticalScale, 'g', label='True density') # Overlay the KDE results with various bandwidth values xkde = h2.axis(0).binCenters() bandwidths = (bw0, rlcvBandwidth, lscvBandwidth) colors = ('c', 'r', 'b' ) labels = ('Plug-in', 'RLCV', 'LSCV' ) results = (yPlugin, yRLCV, yLSCV ) for bw, color, l, y in zip(bandwidths, colors, labels, results): plt.plot(xkde, y*verticalScale, color, label="{} estimate".format(l)) # Set the x range of the plot to that of the sample histogram ax.set_xlim(h1.axis(0).min(), h1.axis(0).max()) # Add labels and comments plt.xlabel('X') plt.ylabel('Density estimate') plt.title('KDE by FFT') plt.legend(loc=2) # Display the window plt.show() Index: trunk/npstat/swig/npstat_utils.py =================================================================== --- trunk/npstat/swig/npstat_utils.py (revision 622) +++ trunk/npstat/swig/npstat_utils.py (revision 623) @@ -1,1207 +1,1239 @@ """ This module provides a number of useful wrapper classes and functions for the "npstat" nonparametric statistics package (Python 3 version). """ __author__ = "Igor Volobouev (i.volobouev@ttu.edu)" __version__ = "2.1" __date__ = "Sep 15 2019" import ast import npstat import configparser from configparser import NoSectionError, NoOptionError import numpy as np _StorableNone = npstat.StorableNone() def arrayNDFromNumpy(a): 'Converts multidimensional arrays from "numpy" representation to "npstat"' funcs = {'bool' : npstat.arrayNDFromNumpyBool, 'int' : npstat.arrayNDFromNumpyInt, 'int8' : npstat.arrayNDFromNumpyInt, 'int16' : npstat.arrayNDFromNumpyInt, 'int32' : npstat.arrayNDFromNumpyInt, 'int64' : npstat.arrayNDFromNumpyLLong, 'uint8' : npstat.arrayNDFromNumpyUChar, 'uint16' : npstat.arrayNDFromNumpyInt, 'uint32' : npstat.arrayNDFromNumpyLLong, 'uint64' : npstat.arrayNDFromNumpyLLong, 'float' : npstat.arrayNDFromNumpyDouble, 'float16' : npstat.arrayNDFromNumpyFloat, 'float32' : npstat.arrayNDFromNumpyFloat, 'float64' : npstat.arrayNDFromNumpyDouble, 'complex' : npstat.arrayNDFromNumpyCDouble, 'complex64' : npstat.arrayNDFromNumpyCFloat, 'complex128' : npstat.arrayNDFromNumpyCDouble} return funcs[str(a.dtype)](a) # The opposite conversion, from "npstat" to "numpy", can be performed # by functions "arrayNDToNumpy" and "matrixToNumpy" implemented as # SWIG wrappers. These functions are properly overloaded by SWIG so # they do not need a special python-level typemap, like the one employed # in "arrayNDFromNumpy". def HistoND(typename, *args): """ This function creates multidimensional histograms with uniform bins. The first argument is the histogram bin type. Same bin type can be named according to three different naming conventions. I suggest using one of these conventions consistently throughout your application code: Numpy convention Npstat convention C++ convention ---------------- ----------------- ------------------------------- uint8 UChar unsigned char int32 Int int int64 LLong long long float32 Float float float64 Double double N/A StatAcc StatAccumulator N/A WStatAcc WeightedStatAccumulator N/A BinSummary BinSummary N/A FSampleAcc FloatSampleAccumulator N/A DSampleAcc DoubleSampleAccumulator N/A DWSampleAcc DoubleWeightedSampleAccumulator Note that, in numpy, "float" is a shorthand for "float64". This is not the case here: "float" will be interpreted according to the C++ convention. The second HistoND argument is a collection of "HistoAxis" objects which specifies the histogram binning. The histogram dimensionality will be defined by the number of axes provided in this collection. The largest allowed histogram dimensionality is 31 on 32-bit architectures and 63 on 64-bit architectures. The optional third argument specifies the histogram title for subsequent use by plotting routines. The optional fourth argument specifies the label for the histogram bin contents. Here is an example which creates a 2-d histogram of unsigned integer bins: from npstat import HistoAxis from npstat_utils import HistoND axes = (HistoAxis(20, 0.0, 1.0, "X"), HistoAxis(30, -2.0, 3.0, "Y")) histo = HistoND("Int", axes, "An example 2-d histogram", "Counts") There will be 20 bins in X (first dimension) in the interval [0.0, 1.0) and 30 bins in Y (second dimension) in the interval [-2.0, 3.0). The histogram title will be set to "An example 2-d histogram" and the histogram bin label will be set to "Counts". There is an alternative way to build a uniformly binned histogram, useful when the bounds of the region which will be covered by the histogram are represented by the "DoubleBoxND" object. In this case one simply needs to provide the number of bins in each dimension, as illustrated in the example below: from npstat import DoubleInterval, DoubleBoxND from npstat_utils import HistoND box = DoubleBoxND() box.push_back(DoubleInterval(0.0, 1.0)) box.push_back(DoubleInterval(-2.0, 3.0)) nbins = (20, 30) histo = HistoND("Int", nbins, box, "An example 2-d histogram", "Counts") This creates a histogram which is almost identical to the one created in the previous example. The only difference is that the axes of this histogram are currently unlabeled. To assign the labels (or to replace them at any future time), you can use the "setAxisLabel" method of the histogram object: histo.setAxisLabel(0, "X") histo.setAxisLabel(1, "Y") """ funcs = {'uint8' : npstat.UCharHistoND, 'UChar' : npstat.UCharHistoND, 'unsigned char' : npstat.UCharHistoND, 'int32' : npstat.IntHistoND, 'Int' : npstat.IntHistoND, 'int' : npstat.IntHistoND, 'int64' : npstat.LLongHistoND, 'LLong' : npstat.LLongHistoND, 'long long' : npstat.LLongHistoND, 'long long int' : npstat.LLongHistoND, 'float32' : npstat.FloatHistoND, 'Float' : npstat.FloatHistoND, 'float' : npstat.FloatHistoND, 'float64' : npstat.DoubleHistoND, 'Double' : npstat.DoubleHistoND, 'double' : npstat.DoubleHistoND, 'StatAccumulator' : npstat.StatAccHistoND, 'npstat::StatAccumulator' : npstat.StatAccHistoND, 'StatAcc' : npstat.StatAccHistoND, 'WeightedStatAccumulator' : npstat.WStatAccHistoND, 'npstat::WeightedStatAccumulator' : npstat.WStatAccHistoND, 'WStatAcc' : npstat.WStatAccHistoND, 'BinSummary' : npstat.BinSummaryHistoND, 'npstat::BinSummary' : npstat.BinSummaryHistoND, 'FloatSampleAccumulator' : npstat.FSampleAccHistoND, 'npstat::FloatSampleAccumulator' : npstat.FSampleAccHistoND, 'FSampleAcc' : npstat.FSampleAccHistoND, 'DoubleSampleAccumulator' : npstat.DSampleAccHistoND, 'npstat::DoubleSampleAccumulator' : npstat.DSampleAccHistoND, 'DSampleAcc' : npstat.DSampleAccHistoND, 'DoubleWeightedSampleAccumulator' : npstat.DWSampleAccHistoND, 'npstat::DoubleWeightedSampleAccumulator' : npstat.DWSampleAccHistoND, 'DWSampleAcc' : npstat.DWSampleAccHistoND} return funcs[typename](*args) def NUHistoND(typename, *args): """ This function creates multidimensional histograms with non-uniform bins. The function signature is almost identical to that of "HistoND", so please read the "HistoND" docstring. The only difference is that a collectuon of "NUHistoAxis" objects must be provided to specify the binning instead of a collection of "HistoAxis" objects. To build an "NUHistoAxis", you have to specify a complete set of bin edges, for example: from npstat import NUHistoAxis axisLabel = "X" axis = NUHistoAxis((0., 1., 2., 4., 8., 16.), axisLabel) Although possible, it does not make much sense to build "NUHistoND" using "DoubleBoxND" which specifies its boundaries -- uniform binning will be created in this case, and "HistoND" is more efficient in handling uniform bins. """ funcs = {'uint8' : npstat.UCharNUHistoND, 'UChar' : npstat.UCharNUHistoND, 'unsigned char' : npstat.UCharNUHistoND, 'int32' : npstat.IntNUHistoND, 'Int' : npstat.IntNUHistoND, 'int' : npstat.IntNUHistoND, 'int64' : npstat.LLongNUHistoND, 'LLong' : npstat.LLongNUHistoND, 'long long' : npstat.LLongNUHistoND, 'long long int' : npstat.LLongNUHistoND, 'float32' : npstat.FloatNUHistoND, 'Float' : npstat.FloatNUHistoND, 'float' : npstat.FloatNUHistoND, 'float64' : npstat.DoubleNUHistoND, 'Double' : npstat.DoubleNUHistoND, 'double' : npstat.DoubleNUHistoND, 'StatAccumulator' : npstat.StatAccNUHistoND, 'npstat::StatAccumulator' : npstat.StatAccNUHistoND, 'StatAcc' : npstat.StatAccNUHistoND, 'WeightedStatAccumulator' : npstat.WStatAccNUHistoND, 'npstat::WeightedStatAccumulator' : npstat.WStatAccNUHistoND, 'WStatAcc' : npstat.WStatAccNUHistoND, 'BinSummary' : npstat.BinSummaryNUHistoND, 'npstat::BinSummary' : npstat.BinSummaryNUHistoND, 'FloatSampleAccumulator' : npstat.FSampleAccNUHistoND, 'npstat::FloatSampleAccumulator' : npstat.FSampleAccNUHistoND, 'FSampleAcc' : npstat.FSampleAccNUHistoND, 'DoubleSampleAccumulator' : npstat.DSampleAccNUHistoND, 'npstat::DoubleSampleAccumulator' : npstat.DSampleAccNUHistoND, 'DSampleAcc' : npstat.DSampleAccNUHistoND, 'DoubleWeightedSampleAccumulator' : npstat.DWSampleAccNUHistoND, 'npstat::DoubleWeightedSampleAccumulator' : npstat.DWSampleAccNUHistoND, 'DWSampleAcc' : npstat.DWSampleAccNUHistoND} return funcs[typename](*args) def DAHistoND(typename, *args): """ This function creates multidimensional histograms with axes that can be uniform in some dimensions and non-uniform in others. The function signature is almost identical to that of "HistoND", so please read the "HistoND" docstring. The only difference is that a collectuon of "DualHistoAxis" objects must be provided to specify the binning instead of a collection of "HistoAxis" objects. To build a "DualHistoAxis", you can use either "HistoAxis" signature (specify the number of bins, left edge, and right edge) or the "NUHistoAxis" signature (specify a complete set of bin edges). Here are some examples: from npstat import DualHistoAxis axis1 = DualHistoAxis((0., 1., 2., 4., 8., 16.), "X") # for non-uniform binning axis2 = DualHistoAxis(100, -1.0, 2.0, "Y") # for uniform binning Although possible, it does not make much sense to build "DAHistoND" using "DoubleBoxND" which specifies its boundaries -- uniform binning will be created in this case, and "HistoND" is more efficient in handling uniform bins. """ funcs = {'uint8' : npstat.UCharDAHistoND, 'UChar' : npstat.UCharDAHistoND, 'unsigned char' : npstat.UCharDAHistoND, 'int32' : npstat.IntDAHistoND, 'Int' : npstat.IntDAHistoND, 'int' : npstat.IntDAHistoND, 'int64' : npstat.LLongDAHistoND, 'LLong' : npstat.LLongDAHistoND, 'long long' : npstat.LLongDAHistoND, 'long long int' : npstat.LLongDAHistoND, 'float32' : npstat.FloatDAHistoND, 'Float' : npstat.FloatDAHistoND, 'float' : npstat.FloatDAHistoND, 'float64' : npstat.DoubleDAHistoND, 'Double' : npstat.DoubleDAHistoND, 'double' : npstat.DoubleDAHistoND, 'StatAccumulator' : npstat.StatAccDAHistoND, 'npstat::StatAccumulator' : npstat.StatAccDAHistoND, 'StatAcc' : npstat.StatAccDAHistoND, 'WeightedStatAccumulator' : npstat.WStatAccDAHistoND, 'npstat::WeightedStatAccumulator' : npstat.WStatAccDAHistoND, 'WStatAcc' : npstat.WStatAccDAHistoND, 'BinSummary' : npstat.BinSummaryDAHistoND, 'npstat::BinSummary' : npstat.BinSummaryDAHistoND, 'FloatSampleAccumulator' : npstat.FSampleAccDAHistoND, 'npstat::FloatSampleAccumulator' : npstat.FSampleAccDAHistoND, 'FSampleAcc' : npstat.FSampleAccDAHistoND, 'DoubleSampleAccumulator' : npstat.DSampleAccDAHistoND, 'npstat::DoubleSampleAccumulator' : npstat.DSampleAccDAHistoND, 'DSampleAcc' : npstat.DSampleAccDAHistoND, 'DoubleWeightedSampleAccumulator' : npstat.DWSampleAccDAHistoND, 'npstat::DoubleWeightedSampleAccumulator' : npstat.DWSampleAccDAHistoND, 'DWSampleAcc' : npstat.DWSampleAccDAHistoND} return funcs[typename](*args) def BandwidthCVLeastSquares1D(typename): """ Least squares cross-validation helper class """ funcs = {'unsigned char' : npstat.UCharBandwidthCVLeastSquares1D, 'int' : npstat.IntBandwidthCVLeastSquares1D, 'long long' : npstat.LLongBandwidthCVLeastSquares1D, 'float' : npstat.FloatBandwidthCVLeastSquares1D, 'double' : npstat.DoubleBandwidthCVLeastSquares1D} return funcs[typename]() def BandwidthCVPseudoLogli1D(typename, *args): """ Pseudo-likelihood cross-validation helper class """ funcs = {'unsigned char' : npstat.UCharBandwidthCVPseudoLogli1D, 'int' : npstat.IntBandwidthCVPseudoLogli1D, 'long long' : npstat.LLongBandwidthCVPseudoLogli1D, 'float' : npstat.FloatBandwidthCVPseudoLogli1D, 'double' : npstat.DoubleBandwidthCVPseudoLogli1D} return funcs[typename](*args) def histoBinType(h): lookup = {'UCharHistoND' : 'unsigned char', 'IntHistoND' : 'int', 'LLongHistoND' : 'long long', 'FloatHistoND' : 'float', 'DoubleHistoND' : 'double', 'StatAccHistoND' : 'npstat::StatAccumulator', 'WStatAccHistoND' : 'npstat::WeightedStatAccumulator', 'BinSummaryHistoND' : 'npstat::BinSummary', 'FSampleAccHistoND' : 'npstat::FloatSampleAccumulator', 'DSampleAccHistoND' : 'npstat::DoubleSampleAccumulator', 'DWSampleAccHistoND' : 'npstat::DoubleWeightedSampleAccumulator', # 'UCharNUHistoND' : 'unsigned char', 'IntNUHistoND' : 'int', 'LLongNUHistoND' : 'long long', 'FloatNUHistoND' : 'float', 'DoubleNUHistoND' : 'double', 'StatAccNUHistoND' : 'npstat::StatAccumulator', 'WStatAccNUHistoND' : 'npstat::WeightedStatAccumulator', 'BinSummaryNUHistoND' : 'npstat::BinSummary', 'FSampleAccNUHistoND' : 'npstat::FloatSampleAccumulator', 'DSampleAccNUHistoND' : 'npstat::DoubleSampleAccumulator', 'DWSampleAccNUHistoND' : 'npstat::DoubleWeightedSampleAccumulator', # 'UCharDAHistoND' : 'unsigned char', 'IntDAHistoND' : 'int', 'LLongDAHistoND' : 'long long', 'FloatDAHistoND' : 'float', 'DoubleDAHistoND' : 'double', 'StatAccDAHistoND' : 'npstat::StatAccumulator', 'WStatAccDAHistoND' : 'npstat::WeightedStatAccumulator', 'BinSummaryDAHistoND' : 'npstat::BinSummary', 'FSampleAccDAHistoND' : 'npstat::FloatSampleAccumulator', 'DSampleAccDAHistoND' : 'npstat::DoubleSampleAccumulator', 'DWSampleAccDAHistoND' : 'npstat::DoubleWeightedSampleAccumulator'} return lookup[type(h).__name__] def InMemoryNtuple(typename, columns, title=None): """ This function creates "memory-resident" ntuples with named columns. All columns will have the same type. """ funcs = {'int32' : npstat.IntInMemoryNtuple, 'Int' : npstat.IntInMemoryNtuple, 'int' : npstat.IntInMemoryNtuple, 'UChar' : npstat.UCharInMemoryNtuple, 'uint8' : npstat.UCharInMemoryNtuple, 'unsigned char' : npstat.UCharInMemoryNtuple, 'int64' : npstat.LLongInMemoryNtuple, 'LLong' : npstat.LLongInMemoryNtuple, 'long long' : npstat.LLongInMemoryNtuple, 'long long int' : npstat.LLongInMemoryNtuple, 'float32' : npstat.FloatInMemoryNtuple, 'Float' : npstat.FloatInMemoryNtuple, 'float' : npstat.FloatInMemoryNtuple, 'float64' : npstat.DoubleInMemoryNtuple, 'Double' : npstat.DoubleInMemoryNtuple, 'double' : npstat.DoubleInMemoryNtuple} cols = npstat.StringVector() cols.reserve(len(columns)) for name in columns: cols.push_back(name) if title is None: nt = funcs[typename](cols) else: nt = funcs[typename](cols, title) return nt def ArchivedNtuple(typename, columns, title, archive, itemNameInArchive, categoryInArchive, rowsPerBuffer=1000, writeColumnWise=False): """ This function creates "archive-resident" ntuples with named columns. All columns will have the same type. With file-based archives, these ntuples can grow as large as the available disk space. """ funcs = {'uint8' : npstat.UCharArchivedNtuple, 'UChar' : npstat.UCharArchivedNtuple, 'unsigned char' : npstat.UCharArchivedNtuple, 'int32' : npstat.IntArchivedNtuple, 'Int' : npstat.IntArchivedNtuple, 'int' : npstat.IntArchivedNtuple, 'int64' : npstat.LLongArchivedNtuple, 'LLong' : npstat.LLongArchivedNtuple, 'long long' : npstat.LLongArchivedNtuple, 'long long int' : npstat.LLongArchivedNtuple, 'float32' : npstat.FloatArchivedNtuple, 'Float' : npstat.FloatArchivedNtuple, 'float' : npstat.FloatArchivedNtuple, 'float64' : npstat.DoubleArchivedNtuple, 'Double' : npstat.DoubleArchivedNtuple, 'double' : npstat.DoubleArchivedNtuple} cols = npstat.StringVector() cols.reserve(len(columns)) for name in columns: cols.push_back(name) nt = funcs[typename](cols, title, archive, itemNameInArchive, categoryInArchive, rowsPerBuffer, writeColumnWise) _registerArchivedNtuple(nt.classId().name(), nt.objectNumber(), archive) return nt # Registry of ArchivedNtuple objects is needed in order to prevent garbage # collection of archives before garbage collection of ntuples themselves __ArchivedNtupleRegistry = dict() def _registerArchivedNtuple(classId, number, archive): key = "%s_(%u)" % (classId, number) __ArchivedNtupleRegistry[key] = archive def _unregisterArchivedNtuple(classId, number): key = "%s_(%u)" % (classId, number) if key in __ArchivedNtupleRegistry: del __ArchivedNtupleRegistry[key] _ValueRecordFuncs = {'bool' : npstat.NPValueRecord_Bool, 'Bool' : npstat.NPValueRecord_Bool, 'char' : npstat.NPValueRecord_Char, 'Char' : npstat.NPValueRecord_Char, 'unsigned char' : npstat.NPValueRecord_UChar, 'uint8' : npstat.NPValueRecord_UChar, 'UChar' : npstat.NPValueRecord_UChar, 'signed char' : npstat.NPValueRecord_SChar, 'int8' : npstat.NPValueRecord_SChar, 'SChar' : npstat.NPValueRecord_SChar, 'short' : npstat.NPValueRecord_Short, 'int16' : npstat.NPValueRecord_Short, 'short int' : npstat.NPValueRecord_Short, 'Short' : npstat.NPValueRecord_Short, 'unsigned short' : npstat.NPValueRecord_UShort, 'uint16' : npstat.NPValueRecord_UShort, 'unsigned short int' : npstat.NPValueRecord_UShort, 'UShort' : npstat.NPValueRecord_UShort, 'int' : npstat.NPValueRecord_Int, 'int32' : npstat.NPValueRecord_Int, 'Int' : npstat.NPValueRecord_Int, 'long' : npstat.NPValueRecord_Long, 'long int' : npstat.NPValueRecord_Long, 'Long' : npstat.NPValueRecord_Long, 'long long' : npstat.NPValueRecord_LLong, 'int64' : npstat.NPValueRecord_LLong, 'long long int' : npstat.NPValueRecord_LLong, 'LLong' : npstat.NPValueRecord_LLong, 'unsigned' : npstat.NPValueRecord_UInt, 'uint32' : npstat.NPValueRecord_UInt, 'unsigned int' : npstat.NPValueRecord_UInt, 'UInt' : npstat.NPValueRecord_UInt, 'unsigned long' : npstat.NPValueRecord_ULong, 'unsigned long int' : npstat.NPValueRecord_ULong, 'ULong' : npstat.NPValueRecord_ULong, 'unsigned long long' : npstat.NPValueRecord_ULLong, 'uint64' : npstat.NPValueRecord_ULLong, 'unsigned long long int' : npstat.NPValueRecord_ULLong, 'ULLong' : npstat.NPValueRecord_ULLong, 'float' : npstat.NPValueRecord_Float, 'float32' : npstat.NPValueRecord_Float, 'Float' : npstat.NPValueRecord_Float, 'double' : npstat.NPValueRecord_Double, 'float64' : npstat.NPValueRecord_Double, 'Double' : npstat.NPValueRecord_Double, 'long double' : npstat.NPValueRecord_LDouble, 'LDouble' : npstat.NPValueRecord_LDouble, 'std::complex' : npstat.NPValueRecord_CFloat, 'complex' : npstat.NPValueRecord_CFloat, 'complex64' : npstat.NPValueRecord_CFloat, 'CFloat' : npstat.NPValueRecord_CFloat, 'std::complex' : npstat.NPValueRecord_CDouble, 'complex' : npstat.NPValueRecord_CDouble, 'complex' : npstat.NPValueRecord_CDouble, 'complex128' : npstat.NPValueRecord_CDouble, 'CDouble' : npstat.NPValueRecord_CDouble, 'std::complex' : npstat.NPValueRecord_CLDouble, 'complex' : npstat.NPValueRecord_CLDouble, 'CLDouble' : npstat.NPValueRecord_CLDouble, 'string' : npstat.NPValueRecord_String, 'String' : npstat.NPValueRecord_String, 'std::string' : npstat.NPValueRecord_String, 'SCharVector' : npstat.NPValueRecord_SCharVector, 'vector' : npstat.NPValueRecord_SCharVector, 'UCharVector' : npstat.NPValueRecord_UCharVector, 'vector' : npstat.NPValueRecord_UCharVector, 'ShortVector' : npstat.NPValueRecord_ShortVector, 'vector' : npstat.NPValueRecord_ShortVector, 'UShortVector' : npstat.NPValueRecord_UShortVector, 'vector' : npstat.NPValueRecord_UShortVector, 'LongVector' : npstat.NPValueRecord_LongVector, 'vector' : npstat.NPValueRecord_LongVector, 'ULongVector' : npstat.NPValueRecord_ULongVector, 'vector' : npstat.NPValueRecord_ULongVector, 'IntVector' : npstat.NPValueRecord_IntVector, 'vector' : npstat.NPValueRecord_IntVector, 'LLongVector' : npstat.NPValueRecord_LLongVector, 'vector' : npstat.NPValueRecord_LLongVector, 'UIntVector' : npstat.NPValueRecord_UIntVector, 'vector' : npstat.NPValueRecord_UIntVector, 'ULLongVector' : npstat.NPValueRecord_ULLongVector, 'vector' : npstat.NPValueRecord_ULLongVector, 'FloatVector' : npstat.NPValueRecord_FloatVector, 'vector' : npstat.NPValueRecord_FloatVector, 'DoubleVector' : npstat.NPValueRecord_DoubleVector, 'vector' : npstat.NPValueRecord_DoubleVector, 'LDoubleVector' : npstat.NPValueRecord_LDoubleVector, 'vector' : npstat.NPValueRecord_LDoubleVector, 'CFloatVector' : npstat.NPValueRecord_CFloatVector, 'vector >' : npstat.NPValueRecord_CFloatVector, 'vector>' : npstat.NPValueRecord_CFloatVector, 'CDoubleVector' : npstat.NPValueRecord_CDoubleVector, 'vector >' : npstat.NPValueRecord_CDoubleVector, 'vector>' : npstat.NPValueRecord_CDoubleVector, 'CLDoubleVector' : npstat.NPValueRecord_CLDoubleVector, 'vector >' : npstat.NPValueRecord_CLDoubleVector, 'vector>' : npstat.NPValueRecord_CLDoubleVector, 'StringVector' : npstat.NPValueRecord_StringVector, 'vector' : npstat.NPValueRecord_StringVector, 'NoneType' : npstat.NPValueRecord_StorableNone, 'npstat::StorableNone' : npstat.NPValueRecord_StorableNone} def CPPRecord(typename, value, *args): """ Type-disambiguated archive record. This record can be used to auto-convert certain simple Python objects and sequences to C++ types for storing in "geners" archives. Stored in this manner, these objects can be subsequently accessed from pure C++ programs. CPPRecord will attempt to perform the conversion of a python object or sequence according to the type name provided as the first argument. The types can be named according to any one of three different naming conventions. I suggest using one of these conventions consistently throughout your application code: Numpy convention Npstat convention C++ convention ---------------- ----------------- ----------------------- bool Bool bool char Char char int8 SChar signed char uint8 UChar unsigned char int16 Short short uint16 UShort unsigned short int32 Int int uint32 UInt unsigned N/A Long long N/A ULong unsigned long int64 LLong long long uint64 ULLong unsigned long long float32 Float float float64 Double double N/A LDouble long double complex64 CFloat complex complex128 CDouble complex N/A CLDouble complex N/A String string N/A SCharVector vector N/A UCharVector vector N/A ShortVector vector N/A UShortVector vector N/A LongVector vector N/A ULongVector vector N/A IntVector vector N/A LLongVector vector N/A UIntVector vector N/A ULLongVector vector N/A FloatVector vector N/A DoubleVector vector N/A LDoubleVector vector N/A CFloatVector vector > N/A CDoubleVector vector > N/A CLDoubleVector vector > N/A StringVector vector N/A NoneType npstat::StorableNone Note that, in numpy, "float" is a shorthand for "float64". This is not the case here: "float" will be interpreted according to the C++ convention. The second CPPRecord argument is the object to convert and store. The third and the fourth arguments are, respectively, the name and the category of the item in the archive. If automatic conversion of the Python object to the requested C++ type is impossible, an exception will be raised. Here is a simple example which stores a Python float as a C++ double. It assumes that there is a variable "ar" which refers to a "geners" archive. from npstat_utils import CPPRecord number = 1.2345 ar.store(CPPRecord('double', number, "Name for the archive", "Category")) """ if value is None: if typename == 'NoneType' or typename == 'npstat::StorableNone': return _ValueRecordFuncs[typename](_StorableNone, *args) else: raise ValueError("ValueRecord can not store None as %s" % typename) else: return _ValueRecordFuncs[typename](value, *args) def Record(value, *args): if value is None: return npstat.NPRecord(_StorableNone, *args) else: return npstat.NPRecord(value, *args) class Reference(object): "Universal archive reference" __typemap = dict() # Initialize __typemap variable. This variable maps classes stored # in "geners" archives into reference classes used for their retrieval. # # The code below assumes that every class whose name starts with letters # "Ref_" and which is not a secondary SWIG wrapper class is a reference # class which can be used to retrieve items from "geners" archives. # This assumption relies upon a naming convention used by the "npstat" # package wrapping code. This convention itself is just a convenient # coding practice, it can not be enforced by the language. # arx = npstat.StringArchive() for name in dir(npstat): if name[:4] == 'Ref_' and name[-12:] != 'swigregister': fcn = getattr(npstat, name) ref = fcn(arx, 1) __typemap[ref.type().name()] = fcn del arx, name, fcn, ref @classmethod def addType(typename, fcn): __typemap[typename] = fcn def __init__(self, ar, nameOrId, category=None): self.archive = ar self.nameOrId = nameOrId self.categoryPattern = category if category is None: # Assume that constructor got the item id self._idlist = list() if ar.itemExists(nameOrId): self._idlist.append(nameOrId) else: # Assume that constructor got name and category self._idlist = ar.findItems(nameOrId, category) def size(self): return len(self._idlist) def unique(self): return len(self._idlist) == 1 def empty(self): return len(self._idlist) == 0 def id(self, index): return self._idlist[index] def idList(self): return list(self._idlist) def indexedCatalogEntry(self, index): itemid = self.id(index) return self.archive.getCatalogEntry(itemid) def allCatalogEntries(self): return [self.archive.getCatalogEntry(itemid) for itemid in self._idlist] def retrieve(self, index): itemid = self.id(index) entry = self.archive.getCatalogEntry(itemid) type = entry.type().name() if type == 'gs::StringArchive': result = npstat.loadStringArchiveFromArchive(self.archive, itemid) else: referenceFcn = Reference.__typemap[type] result = referenceFcn(self.archive, itemid).retrieve(0) if type == 'NoneType': result = None return result def getValue(self, index): itemid = self.id(index) entry = self.archive.getCatalogEntry(itemid) type = entry.type().name() referenceFcn = Reference.__typemap[type] result = referenceFcn(self.archive, itemid).getValue(0) if type == 'NoneType': result = None return result def restore(self, index, obj): itemid = self.id(index) entry = self.archive.getCatalogEntry(itemid) type = entry.type().name() if type == 'NoneType': if obj is None: referenceFcn(self.archive, itemid).retrieve(0) return else: raise TypeError('Can not use "restore" method with NoneType records') if obj is None: raise ValueError("Can not overwrite the immutable object None") referenceFcn = Reference.__typemap[type] referenceFcn(self.archive, itemid).restore(0, obj) class _NtHistoFillHelper: _typemap = dict() for name in dir(npstat): if name[-11:] == 'NtHistoFill': callsFillC = getattr(npstat, "%s_callsFillC" % name) histoClassname = getattr(npstat, "%s_histoClassname" % name) key = histoClassname(), callsFillC() _typemap[key] = getattr(npstat, name) del name, callsFillC, histoClassname, key def NtHistoFill(histo, columns, weightColumn=None, useFillC=False): histoClassname = histo.classId().name() key = histoClassname, useFillC try: cl = _NtHistoFillHelper._typemap[key] except KeyError: raise TypeError('NtHistoFill with useFillC=%d is not supported for ' \ 'histograms of type "%s"' % (useFillC, histoClassname)) if weightColumn is None: return cl(histo, columns) else: return cl(histo, columns, weightColumn) def NtRectangularCut(typename, *args): funcs = {'uint8' : npstat.UCharRectangularCut, 'UChar' : npstat.UCharRectangularCut, 'unsigned char' : npstat.UCharRectangularCut, 'int32' : npstat.IntNtRectangularCut, 'Int' : npstat.IntNtRectangularCut, 'int' : npstat.IntNtRectangularCut, 'int64' : npstat.LLongNtRectangularCut, 'LLong' : npstat.LLongNtRectangularCut, 'long long' : npstat.LLongNtRectangularCut, 'long long int' : npstat.LLongNtRectangularCut, 'float32' : npstat.FloatNtRectangularCut, 'Float' : npstat.FloatNtRectangularCut, 'float' : npstat.FloatNtRectangularCut, 'float64' : npstat.DoubleNtRectangularCut, 'Double' : npstat.DoubleNtRectangularCut, 'double' : npstat.DoubleNtRectangularCut} return funcs[typename](*args) def templateParameter(classid, index=0): """ Returns the class name of a given template parameter in a given class id. Parameter index should be between 0 (included, also default) and the number of template parameters in the class (excluded). TypeError will be raised if the argument class id does not correspond to a template. """ tparams = npstat.templateParameters(classid) npars = len(tparams) if index >= 0 and index < npars: return tparams[index] else: if npars > 0: raise IndexError('template parameter index %d is out of range ' \ 'for class %s' % (index, classid.name())) else: raise TypeError('class %s is not a template' % classid.name()) class Config(object): """ Class to create/store configuration information for various programs. The information will be stored in a StringArchive object which can also be automatically written to file. Arbitrary objects can be stored as configuration data, while the configuration names will be converted to strings. If all of your configuration data items have string representations, consider using the ConfigParser class instead. """ def __init__(self): self._data = dict() def set(self, name, value): "Set a configuration parameter" self._data[str(name)] = value def get(self, name, defaultValue=None, noneIsDefault=False): """ Retrieve a configuration parameter value. Typical usage: config.get("some name") -- get the value for "some name". Raise KeyError exception in case "some name" was not configured. config.get("some name", defaultValue) -- get the value for "some name". Return defaultValue in case "some name" was not configured. config.get("some name", None, True) -- use this form in case None is the desired default value which should be returned in case "some name" was not configured. """ key = str(name) if defaultValue is None and (not noneIsDefault): return self._data[key] else: return self._data.get(key, defaultValue) def store(self, ar, category, checkUniqueness=True): "Store configuration in an archive. Use the category given." if len(self._data) == 0: return cat = str(category) if checkUniqueness: nameSpec = npstat.SearchSpecifier(".*", True) catSpec = npstat.SearchSpecifier(cat, False) ref = Reference(ar, nameSpec, catSpec) if not ref.empty(): raise ValueError('Category \"%s\" is not empty' % cat) for key, value in self._data.iteritems(): ar.store(Record(value, key, cat)) def retrieve(self, ar, category, clearExistingData=False): "Retrieve configuration from an archive" if clearExistingData: self._data.clear() nameSpec = npstat.SearchSpecifier(".*", True) catSpec = npstat.SearchSpecifier(str(category), False) ref = Reference(ar, nameSpec, catSpec) for i in range(ref.size()): entry = ref.indexedCatalogEntry(i) self._data[entry.name()] = ref.retrieve(i) def write(self, filename): """ Simplified top-level driver: write configuration into a file using a stand-alone string archive """ ar = npstat.StringArchive("Config") self.store(ar, "", False) if not npstat.writeCompressedStringArchive(ar, filename): raise IOError('Failed to write file "%s"' % filename) def read(self, filename, clearExistingData=False): """ Simplified top-level driver: read configuration from a file using a stand-alone string archive """ ar = npstat.readCompressedStringArchive(filename) self.retrieve(ar, "", clearExistingData) def __str__(self): rep = "" count = 0 for key, value in self._data.iteritems(): if count: rep += "\n" rep += "%s : " % key try: rep += str(value) except: rep += "(no string representation)" count += 1 return rep class ConfigParser(configparser.SafeConfigParser): """ A simple adaptation of configparser.SafeConfigParser which allows for default values for each of the "get" calls """ def __init__(self): configparser.SafeConfigParser.__init__(self, allow_no_value=True) self._typemap1 = {str : self.getstr, 'str' : self.getstr, eval : self.geteval, 'eval' : self.geteval, float : self.getfloat, 'float' : self.getfloat, int : self.getint, 'int' : self.getint, bool : self.getboolean, 'bool' : self.getboolean} self._typemap2 = {str : self.get, 'str' : self.get, eval : self.getevalOrNone, 'eval' : self.getevalOrNone, float : self.getfloatOrNone, 'float' : self.getfloatOrNone, int : self.getintOrNone, 'int' : self.getintOrNone, bool : self.getbooleanOrNone, 'bool' : self.getbooleanOrNone} def getstr(self, section, option, default=Exception): "Get a string parameter" try: value = configparser.SafeConfigParser.get(self, section, option) except (NoSectionError, NoOptionError): if default is Exception: raise if default is None: value = None else: value = str(default) if value is None: raise ConfigError('NoneType is not allowed for option ' \ '"%s" in section "%s"' % (option, section)) return value def get(self, section, option, default=Exception): "Get a string parameter or None" try: value = configparser.SafeConfigParser.get(self, section, option) except (NoSectionError, NoOptionError): if default is Exception: raise if default is None: value = None else: value = str(default) return value def getint(self, section, option, default=Exception): "Get an integer parameter" try: value = configparser.SafeConfigParser.getint(self, section, option) except (NoSectionError, NoOptionError): if default is Exception: raise value = int(default) return value def getintOrNone(self, section, option, default=Exception): "Get an integer parameter or None" try: value = configparser.SafeConfigParser.getint(self, section, option) except (NoSectionError, NoOptionError): if default is Exception: raise if default is None: value = None else: value = int(default) except: value = configparser.SafeConfigParser.get(self, section, option) if value is not None: raise ConfigError('Expected an ineger or None for option ' \ '"%s" in section "%s", got "%s"' % (option, section, value)) return value def getfloat(self, section, option, default=Exception): "Get a float parameter" try: value = configparser.SafeConfigParser.getfloat(self, section, option) except (NoSectionError, NoOptionError): if default is Exception: raise value = float(default) return value def getfloatOrNone(self, section, option, default=Exception): "Get a float parameter or None" try: value = configparser.SafeConfigParser.getfloat(self, section, option) except (NoSectionError, NoOptionError): if default is Exception: raise if default is None: value = None else: value = float(default) except: value = configparser.SafeConfigParser.get(self, section, option) if value is not None: raise ConfigError('Expected a float or None for option ' \ '"%s" in section "%s", got "%s"' % (option, section, value)) return value def getboolean(self, section, option, default=Exception): "Get a boolean parameter" try: value = configparser.SafeConfigParser.getboolean(self, section, option) except (NoSectionError, NoOptionError): if default is Exception: raise value = bool(default) return value def getbooleanOrNone(self, section, option, default=Exception): "Get a boolean parameter or None" try: value = configparser.SafeConfigParser.getboolean(self, section, option) except (NoSectionError, NoOptionError): if default is Exception: raise if default is None: value = None else: value = bool(default) except: value = configparser.SafeConfigParser.get(self, section, option) if value is not None: raise ConfigError('Expected a boolean or None for option ' \ '"%s" in section "%s", got "%s"' % (option, section, value)) return value def geteval(self, section, option, default=Exception): "Process parameter string with ast.literal_eval" value = self.getstr(section, option, default) return ast.literal_eval(value) def getevalOrNone(self, section, option, default=Exception): "Process parameter string with ast.literal_eval or return None" value = self.get(section, option, default) if value is None: return None else: return ast.literal_eval(value) def getbytype(self, type, section, option, default=Exception): "Get a parameter with the given type" try: value = self._typemap1[type](section, option, default) except KeyError: try: msg = "Don't know how to handle \"%s\"" % type except: msg = "Unknown type without string representation" raise TypeError(msg) return value def getbytypeOrNone(self, type, section, option, default=Exception): "Get a parameter with the given type or None" try: value = self._typemap2[type](section, option, default) except KeyError: try: msg = "Don't know how to handle \"%s\"" % type except: msg = "Unknown type without string representation" raise TypeError(msg) return value def addkwarg(self, type, section, option, inputDict): """ This function will add the option to the input dictionary if the option is specified in the configuration. None is not allowed as the option value. """ if configparser.SafeConfigParser.has_option(self, section, option): value = self.getbytype(type, section, option) inputDict[option] = value def addkwargOrNone(self, type, section, option, inputDict): """ This function will add the option to the input dictionary if the option is specified in the configuration. None is allowed as the option value. """ if configparser.SafeConfigParser.has_option(self, section, option): value = self.getbytypeOrNone(type, section, option) inputDict[option] = value class ConfigError(ValueError): """ This exception is raised if an invalid option value is specified in a configuration file (e.g., plotting style file) """ def __init__(self, *args): ValueError.__init__(self, *args) +class _BinnedCVCalc1D: + def __init__(self, effectiveSampleSize, weightedSample): + self._effectiveSampleSize = effectiveSampleSize + self._weightedSample = weightedSample + + def __call__(self, h, result, filter): + calc = self._makeCalc(h) + if self._weightedSample is not None: + return calc.cvWeightedSample(h, self._weightedSample, result, filter) + elif self._effectiveSampleSize is not None: + return calc.cvWeighted(h, self._effectiveSampleSize, result, filter) + else: + return calc.cv(h, result, filter) + + def _makeCalc(self, h): + raise NotImplementedError("This method is not implemented") + + +class BinnedRLCVCalc1D(_BinnedCVCalc1D): + def __init__(self, regParameter, effectiveSampleSize=None, weightedSample=None): + assert regParameter >= 0.0, "RLCV regularisation parameter can not be negative" + _BinnedCVCalc1D.__init__(self, effectiveSampleSize, weightedSample) + self._regParameter = regParameter + + def _makeCalc(self, h): + return BandwidthCVPseudoLogli1D(histoBinType(h), self._regParameter) + + +class BinnedLSCVCalc1D(_BinnedCVCalc1D): + def __init__(self, effectiveSampleSize=None, weightedSample=None): + _BinnedCVCalc1D.__init__(self, effectiveSampleSize, weightedSample) + + def _makeCalc(self, h): + return BandwidthCVLeastSquares1D(histoBinType(h)) + + class FFTKDE1D: def __init__(self, prototypeHisto, allowNegativeResults=True, useMirroring=True): if prototypeHisto.dim() != 1: raise ValueError("Expected 1-d histogram as a prototype") if not prototypeHisto.isUniformlyBinned(): raise ValueError("Expected uniformy binned histogram as a prototype") axis = prototypeHisto.axis(0) self._xmin = axis.min() self._xmax = axis.max() self._nBins = axis.nBins() self._binWidth = axis.binWidth(0) self._engine = npstat.ConvolutionEngine1D(2*self._nBins) self._bh = npstat.BoundaryHandling("BM_TRUNCATE") self._allowNegative = allowNegativeResults self._mirror = useMirroring self._kernelTable = dict() self._kernelProperties = list() self._directId = 2147483647 self._directKernelKey = (0.0, 0.0, -1.0) self._gaussKernelNSigmas = 12.0 def allowNegativeEstimates(self, b): self._allowNegative = b def isCompatible(self, h): if h.dim() != 1: return False if not h.isUniformlyBinned(): return False axis = h.axis(0) return self._xmin == axis.min() and \ self._xmax == axis.max() and \ self._nBins == axis.nBins() def makeKernel(self, symbetaPower, filterDegree, bandwidth): assert filterDegree >= 0.0, "Filter degree can not be negative" assert bandwidth > 0.0, "Kernel bandwidth must be positive" if symbetaPower < 0.0: distro = npstat.TruncatedGauss1D(0.0, bandwidth, self._gaussKernelNSigmas) else: distro = npstat.SymmetricBeta1D(0.0, bandwidth, symbetaPower) builder = npstat.getBoundaryFilter1DBuilder(self._bh, distro, self._binWidth) taper = npstat.contDegreeTaper(filterDegree) kernelCenter = self._nBins - 1 pf = builder.makeFilter(taper, len(taper)-1, kernelCenter, 2*self._nBins) selfContrib = pf.valueAt(pf.peakPosition()) return (npstat.polyFilter1DToNumpy(pf, kernelCenter, 2*self._nBins), selfContrib) def addKernel(self, symbetaPower, filterDegree, bandwidth): key = (symbetaPower, filterDegree, bandwidth) if key not in self._kernelTable: id = len(self._kernelProperties) assert id < self._directId, "Too many kernels" k, sc = self.makeKernel(symbetaPower, filterDegree, bandwidth) self._engine.depositFilter(id, k, self._nBins+1) self._kernelTable[key] = id self._kernelProperties.append((symbetaPower, filterDegree, bandwidth, sc)) return self._kernelTable[key] def nKernelsAdded(self): return len(self._kernelProperties) def _getKernelId(self, symbetaPower, filterDegree, bandwidth, addKern): assert bandwidth > 0.0, "Kernel bandwidth must be positive" if addKern: return self.addKernel(symbetaPower, filterDegree, bandwidth) key = (symbetaPower, filterDegree, bandwidth) if key in self._kernelTable: return self._kernelTable[key] if key != self._directKernelKey: k, sc = self.makeKernel(symbetaPower, filterDegree, bandwidth) self._engine.depositFilter(self._directId, k, self._nBins+1) self._directKernelKey = key return self._directId def setData(self, h): assert self.isCompatible(h), "Incompatible input histogram" self._engine.setFilter(npstat.doubleHistoBins(h, self._mirror)) def _buildResult(self, id, makeNonNegative): arr = self._engine.convolveFilterAndDeposit(id, makeNonNegative) result = np.resize(arr, self._nBins) # The result we have now is normalized to the histogram area # in case of mirroring, and potentially has some leakage # if there is no mirroring. Renormalize everything to 1. area = np.sum(result)*self._binWidth assert area > 0.0, "Result can not be normalized. "\ "Was the input histogram filled?" result /= area return result def dataDensity(self, symbetaPower, filterDegree, bandwidth, addKern=True): assert self._engine.isFilterSet(), "Data is not set" id = self._getKernelId(symbetaPower, filterDegree, bandwidth, addKern) makeNonNegative = (filterDegree == 0.0 or (not self._allowNegative)) return self._buildResult(id, makeNonNegative) def estimateDensity(self, h, *args): self.setData(h) return self.dataDensity(*args) - def _buildCVCalc(self, h, cvType, *args): - binType = histoBinType(h) - if cvType == "LSCV": - return BandwidthCVLeastSquares1D(binType) - if cvType == "RLCV": - return BandwidthCVPseudoLogli1D(binType, args[0]) - raise ValueError('Invalid CV type argument "{}". Expected RLCV or LSCV'.format(cvType)) - - def bestKernelCV(self, h, effectiveSampleSize, cvType, *args): + def bestKernelByCV(self, h, cvCalc): + """ + Choose the best kernel among kernels added previsously utilizing + cross-validation + """ assert len(self._kernelProperties) > 0, "No kernels added" self.setData(h) - cvCalc = self._buildCVCalc(h, cvType, *args) bestCVValue = -1.7e308 bestId = -1 bestResult = None for id, properties in enumerate(self._kernelProperties): symbetaPower, filterDegree, bandwidth, sc = properties makeNonNegative = (filterDegree == 0.0 or (not self._allowNegative)) result = self._buildResult(id, makeNonNegative) - dummyf = npstat.BasicPolyFilter1D(h.nBins(), sc) - cvValue = cvCalc.cvWeighted(h, effectiveSampleSize, result, dummyf) + dummyFilter = npstat.BasicPolyFilter1D(h.nBins(), sc) + cvValue = cvCalc(h, result, dummyFilter) if cvValue > bestCVValue: bestCVValue = cvValue bestId = id bestResult = result assert bestId >= 0, "Kernel optimization scan failed" symbetaPower, filterDegree, bandwidth, sc = self._kernelProperties[bestId] key = (symbetaPower, filterDegree, bandwidth) return (bestCVValue, key, bestResult) def histoPluginBandwidth1D(h, sampleSize, symbetaPower, deg): if h.dim() != 1: raise ValueError("Expected 1-d histogram as an argument") - assert sampleSize > 0.0, "Sample size must be positive" assert deg >= 0.0, "Filter degree can not be negative" + if sampleSize <= 0.0: + sampleSize = h.nFillsInRange() qvalues = (0.15865525393145705, 0.84134474606854295) quantiles = npstat.histoQuantiles(h, 0, qvalues) sigmaEstimate = (quantiles[1] - quantiles[0])/2.0 pluginBw = npstat.approxAmisePluginBwGauss(deg, sampleSize, sigmaEstimate) if symbetaPower >= 0.0: pluginBw *= npstat.approxSymbetaBandwidthRatio(symbetaPower, deg); return pluginBw