Index: trunk/examples/Python/lorpe_1d_features.py
===================================================================
--- trunk/examples/Python/lorpe_1d_features.py	(revision 787)
+++ trunk/examples/Python/lorpe_1d_features.py	(revision 788)
@@ -1,299 +1,299 @@
 #!/usr/bin/env python3
 """
 This example illustrates how to perform LOrPE on unbinned samples,
 including the following features:
 -- Weighted samples
 -- Variable bandwidth
 -- Local cross-validation
 """
 
 __author__="Igor Volobouev (i.volobouev@ttu.edu)"
-__version__="1.0"
+__version__="1.1"
 __date__ ="June 23 2022"
 
 # Perform necessary imports
 import math
 import npstat as ns
 import numpy as np
 from operator import itemgetter
 from npstat_utils import HistoND
 import matplotlib.pyplot as plt
 
 # Some parameters for the script
 sampleSize = 1000   # Number of points to generate (sample size)
 xmin = 0.0          # Left edge of the sample histogram
 xmax = 5.0          # Right edge of the sample histogram
 numBins = 50        # Number of bins for the sample histogram
 symbetaPower = 4    # Power parameter of the symmetric beta weight function
                     # w.r.t. which the orthogonal polynomial system will
                     # be generated. Use -1 to employ a Gaussian instead.
 nDiscrete = 1000    # Number of intervals for displaying the results.
 normalize = True    # Do we want to normalize the density estimate?
                     # Here, this only ensures that the integral is 1
                     # and does not check for negative densities.
 nIntegSplits = 50   # Number of intervals to use for various quadratures.
 nIntegPoints = 4    # Number of Gauss-Legendre integration points to use
                     # on each interval for various quadratures.
 nBwToTry = 50       # Number of bandwidth values to scan.
 
 # Choose boundary handling method. Only two methods are currently
 # supported for unbinned LOrPE:
 #
 #  "BM_TRUNCATE" -- Just truncate the weight function at the boundary.
 #
 #  "BM_FOLD"     -- Reflect (fold) the weight function at the boundary and
 #                   add to the non-reflected part.
 #
 # It is not obvious a priori which boundary handling method will perform
 # best for any particular case, so it might be useful to try different ones.
 bh = ns.BoundaryHandling("BM_TRUNCATE")
 
 # Generate a weighted sample of points on [xmin, xmax]. Here, we
 # use a flat distribution to generate the points and some other
 # distribution (a mixture of exponential and Gaussian) to assign
 # the weights.
 d1 = ns.Uniform1D(xmin, xmax - xmin)
 rng = ns.MersenneTwister()
 sampleCoords = d1.generate(rng, sampleSize)
 
 truncatedGauss = ns.TruncatedDistribution1D(ns.Gauss1D(2.0, 0.2), xmin, xmax)
 truncatedExponential = ns.TruncatedDistribution1D(ns.Exponential1D(0.0, 1.0), xmin, xmax)
 weightDistro = ns.DistributionMix1D()
 weightDistro.add(truncatedGauss, 0.3)
 weightDistro.add(truncatedExponential, 0.7)
 weights = [weightDistro.density(x) for x in sampleCoords]
 
 weightedSample = list(zip(sampleCoords, weights))
 
 # Histogram for displaying the weighted sample. Note that
 # the type of bins is double rather than integer.
 h1 = HistoND("double", ns.HistoAxis(numBins, xmin, xmax, "X"),
              "Sample Histogram", "Weight")
 h1.fill(weightedSample)
 
 # The two most general NPStat functions which create objects performing
 # unbinned LOrPE with cross-validation are called "LOrPE1DLSCVFunctor"
 # and "LOrPE1DRLCVFunctor". The former performs least squares
 # cross-validation (LSCV) and the latter regularized likelihood
 # cross-validation (RLCV). The function signatures are very similar.
 # The only difference is that "LOrPE1DRLCVFunctor" can take an extra
 # parameter playing the role of regularization strength. If the default
 # value of that parameter (0.5) is acceptable, the functions have identical
 # signatures.
 #
 # We will do the following here. Only non-negative values of regularization
 # parameter "rlcvAlpha" make sense for RLCV. So, if the parameter below is
 # negative, we use LSCV, otherwise we use RLCV. Try different values of
 # rlcvAlpha and see what happens.
 rlcvAlpha = -1.0
 if rlcvAlpha < 0.0:
     useRLCV = False
     lorpeFunctor = ns.LOrPE1DLSCVFunctor
 else:
     useRLCV = True
     lorpeFunctor = ns.LOrPE1DRLCVFunctor
 
 # Create the object which performs cross-validation. The function signature
 # looks as follows (see also C++ header file LOrPE1DCV.hh):
 #
 #      LOrPE1DLSCVFunctor(symbetaPower,
 #                         leftBoundary,
 #                         rightBoundary,
 #                         bh,
 #                         bandwidthFunctor,
 #                         localizingWeight,
 #                         localizingWeightXmin,
 #                         localizingWeightXmax,
 #                         nIntegIntervals,
 #                         nIntegPoints,
 #                         normalizeLOrPEEstimate,
 #                         sample)
 #
 # The following arguments must be provided:
 #
 #  symbetaPower           is the "power", p, of the symmetric Beta function
 #                         used to generate the smoothing kernels. It must
 #                         be an integer. If it is non-negative, the
 #                         function will look like (1 - x^2)^p. If it is
 #                         negative, the standard normal density is used
 #                         instead, truncated to the interval [-10, 10].
 #                         I typically use either -1 or 4 for this parameter.
 #
 #  leftBoundary           is the assumed left boundary of the support
 #                         of the density we are trying to estimate.
 #
 #  rightBoundary          is the assumed right boundary of the support
 #                         of the density we are trying to estimate.
 #
 #  bh                     is the boundary handling method. Currently the
 #                         kernel extending beyond the support boundary
 #                         can be handled by either truncating or folding
 #                         (i.e., mirroring).
 #
 #  bandwidthFunctor       This function specifies how the LOrPE bandwidth
 #                         depends on the coordinate. The value of this
 #                         function will be multiplied by the overall
 #                         scale, "bwFactor", in order to determine the
 #                         actual location-dependent bandwidth value.
 #                         bandwidthFunctor has to be ns.PyFunctor1 wrapping
 #                         a python callable. bandwidthFunctor can also be
 #                         just a python float (typically 1.0) if variable
 #                         bandwidth is not desired. Hopefully, this explanation
 #                         will be more clear from the examples.
 #
 #  localizingWeight       This function specifies how cross-validation
 #                         is localized. It needs to be either ns.PyFunctor1
 #                         wrapping a python callable or ns.DensityFunctor1D
 #                         object (density function of any 1-d probability
 #                         distribution implemented by NPStat). It can also be
 #                         just a python float (typically 1.0) if
 #                         cross-validation localization is not desired.
 #
 #  localizingWeightXmin   The cross-validation will only utilize sample
 #  localizingWeightXmax   points inside the interval
 #                         [localizingWeightXmin, localizingWeightXmax].
 #                         If you want to use global cross-validation,
 #                         set localizingWeightXmin to leftBoundary
 #                         (or smaller) and localizingWeightXmax to
 #                         rightBoundary (or larger).
 # 
 #  nIntegIntervals        These parameters specify how various quadratures
 #  nIntegPoints           are performed. "nIntegPoints" is the number of
 #                         points for the Gauss-Legendre quadrature, and
 #                         "nIntegIntervals" is the number of intervals
 #                         into which the support of the estimated density
 #                         is split (Gauss-Legendre quadrature is performed
 #                         separately on each interval). Note that
 #                         Gauss-Legendre quadratures with "nIntegPoints"
 #                         points exactly integrate polynomials up to degree
 #                         2*nIntegPoints - 1. If you expect that your
 #                         density estimate is going to be very smooth
 #                         (this can be expected for negative symbetaPower
 #                         values), use larger nIntegPoints and smaller
 #                         nIntegIntervals.
 #
 #  normalizeLOrPEEstimate If this parameter is "True", the code will
 #                         automatically normalize the LOrPE density
 #                         estimate area to 1 using numerical integration
 #                         (see parameters nIntegIntervals and nIntegPoints).
 #                         Note that this does not address the potential
 #                         problem of negative density values.
 #
 #  sample                 The sample of points for which density estimate
 #                         is constructed. These can be either unweighted,
 #                         in which case "sample" is typically a list of
 #                         doubles, or weighted, in which case "sample"
 #                         is typically a list of two-element tuples. In
 #                         these tuples, the first element is the coordinate
 #                         and the second element is the weight.
 #
 # LOrPE cross-validation objects can also be created by functions with
 # simpler signatures. Use "LOrPE1DGlobLSCVFunctor" if local cross-validation
 # is not needed and "LOrPE1DSimpleLSCVFunctor" if neither local
 # cross-validation nor location-dependent bandwidth is desired. Similar
 # finctions exist for RLCV. See the C++ header file LOrPE1DCV.hh for
 # exact signatures.
 
 # The example callable to use for bandwidth dependence on the coordinate
 class QuadraticBandwidth:
     def __init__(self, a, x0, y0):
         self.a = a
         self.x0 = x0
         self.y0 = y0
         assert self.y0 > 0.0
         assert self.a >= 0.0
 
     def __call__(self, x):
         delta = x - self.x0
         return self.a*delta*delta + self.y0
 
 bandwidthCallable = QuadraticBandwidth(1.0, 2.0, 1.0)
 bandwidthFunctor = ns.PyFunctor1(bandwidthCallable)
 
 # You can also try to uncomment the following in order
 # to have no bandwidth dependence on the coordinate:
 #
 # bandwidthFunctor = 1.0
 
 # Example distribution to use to localize the weight
 localizingDistro = ns.Gauss1D(2.0, 0.5)
 localizingWeight = ns.DensityFunctor1D(localizingDistro)
 localizingWeightXmin = localizingDistro.quantile(0.0)
 localizingWeightXmax = localizingDistro.quantile(1.0)
 
 # Create the LOrPE cross-validation object
 cv = lorpeFunctor(symbetaPower, xmin, xmax, bh, bandwidthFunctor,
                   localizingWeight, localizingWeightXmin, localizingWeightXmax,
                   nIntegSplits, nIntegPoints, normalize, weightedSample)
 
 # Check if we use RLCV and set the regularization parameter
 # if this is the case
 if useRLCV:
     cv.setRlcvAlpha(rlcvAlpha)
 
 # For simplicity, we will fix the filter degree here to 0
 # and scan the bandwidth without optimizing the degree.
 # This corresponds to using KDE with boundary correction.
 # One can, of course, also optimize the degree. See example
 # script "lorpe_1d_unbinned_cv.py" for this.
 filterDegree = 0
 bwmin = 0.02
 bwmax = 1.0
 bandwidthFactors = np.array(ns.EquidistantInLogSpace(bwmin, bwmax, nBwToTry),
                             dtype=np.double)
 cvValues = [cv(filterDegree, bw) for bw in bandwidthFactors]
 
 # Find the best bandwidth factor (in log space).
 # The function "findScanMaximum1D" will fit
 # a parabola around the maximum value.
 bestCV = ns.findScanMaximum1D(np.log(bandwidthFactors), cvValues)
 bestBWFactor = math.exp(bestCV.location())
 print("Best bandwidth factor for degree {:.1f} is {:.4f}".format(
     filterDegree, bestBWFactor))
 
 # Show the bandwidth scan results. Kill the plot window
 # to proceed beyond this point.
 plt.plot(bandwidthFactors, cvValues)
 plt.xscale('log')
 plt.xlabel('Bandwidth Factor')
 plt.ylabel('CV Function')
 plt.title('Cross-Validation Function Dependence on Bandwidth')
 plt.show()
 
 # Plot the dependence of the bandwidth on the coordinate
 xcoords = np.linspace(xmin, xmax, nDiscrete+1)
-bw = [bestBWFactor*bandwidthCallable(x) for x in xcoords]
+bw = [bestBWFactor*cv.bandwidthCurve(x) for x in xcoords]
 plt.plot(xcoords, bw)
 plt.xlabel('X')
 plt.ylabel('Bandwidth')
 plt.title('Bandwidth Dependence on Coordinate')
 plt.show()
 
 # Plot the sample histogram
 xh, yh = ns.histoOutline1D(h1)
 plt.plot(xh, yh, 'k')
 
 # Overlay the original density. Scale all densities so that
 # their plots are compatible with the histogram height.
 area = h1.integral()
 xdens, ydens = ns.scanDensity1D(weightDistro, xmin, xmax, nDiscrete+1)
 plt.plot(xdens, ydens*area, 'g', label='True density')
 
 # Overlay the unbinned LOrPE results
 x1, y1 = ns.scanDoubleFunctor1(cv.densityFunctor(filterDegree, bestBWFactor),
                                xmin, xmax, nDiscrete+1)
 label = "LOrPE "
 if useRLCV:
     label += "RLCV"
 else:
     label += "LSCV"
 plt.plot(x1, y1*area, 'r', label=label)
 
 # Add labels and comments
 plt.xlabel('X')
 plt.ylabel('Density estimate')
 plt.title('LOrPE with Local Cross-Validation')
 plt.legend()
 
 # Display the window
 plt.show()