Page MenuHomeHEPForge

No OneTemporary

Index: trunk/npstat/stat/lorpeBackground1D.icc
===================================================================
--- trunk/npstat/stat/lorpeBackground1D.icc (revision 119)
+++ trunk/npstat/stat/lorpeBackground1D.icc (revision 120)
@@ -1,422 +1,422 @@
#include <cmath>
#include <cassert>
#include <utility>
#include <stdexcept>
#include "npstat/nm/GaussLegendreQuadrature.hh"
namespace npstat {
namespace Private {
inline void normalizeDoubleArrayAsDensity(double* arr, const unsigned n,
const double binwidth)
{
long double integ = 0.0L;
for (unsigned i=0; i<n; ++i)
{
if (arr[i] < 0.0)
arr[i] = 0.0;
integ += arr[i];
}
const double denom = integ*binwidth;
if (denom <= 0.0)
throw std::runtime_error(
"In npstat::Private::normalizeDoubleArrayAsDensity: "
"density array is nowhere positive");
for (unsigned i=0; i<n; ++i)
arr[i] /= denom;
}
}
template<typename Numeric, typename NumIn, typename NumOut>
unsigned lorpeBackground1D(
const HistoND<Numeric>& histo, const AbsSymbetaFilterProvider& fbuilder,
const AbsDistribution1D& signal, const double signalFraction,
const unsigned nIntegrationPoints,
const NumIn* initialApprox, const unsigned lenApproximation,
const int m, const double bandwidth, const double maxDegree,
const double convergenceEpsilon, const unsigned maxIterations,
NumOut* signalDensity, const unsigned lenSignalDensity,
NumOut* result, const unsigned lenResult,
std::vector<double>& workspace,
NumOut* densityMinusOne, const unsigned lenDensityMinusOne,
const bool stretchFilterAtBoundary)
{
// Check the input arguments
if (histo.dim() != 1U) throw std::invalid_argument(
"In npstat::lorpeBackground1D: "
"input histogram must be one-dimensional");
const unsigned long npoints = histo.nFillsInRange();
if (npoints == 0UL) throw std::invalid_argument(
"In npstat::lorpeBackground1D: "
"input histogram appears to be empty");
const unsigned nbins = histo.nBins();
assert(nbins);
if (signalFraction < 0.0 || signalFraction >= 1.0)
throw std::invalid_argument(
"In npstat::lorpeBackground1D: "
"signal fraction is out of range");
const double oneMinusSF = 1.0 - signalFraction;
const bool haveInitialApprox = initialApprox && lenApproximation;
if (haveInitialApprox)
if (nbins != lenApproximation) throw std::invalid_argument(
"In npstat::lorpeBackground1D: "
"incorrect length of the initial approximation buffer");
if (bandwidth <= 0.0) throw std::invalid_argument(
"In npstat::lorpeBackground1D: bandwidth must be positive");
if (convergenceEpsilon < 0.0) throw std::invalid_argument(
"In npstat::lorpeBackground1D: "
"convergence epsilon must be non-negative");
const bool haveSignalBuffer = signalDensity && lenSignalDensity;
if (haveSignalBuffer)
if (!(nbins <= lenSignalDensity)) throw std::invalid_argument(
"In npstat::lorpeBackground1D: "
"insufficient length of the signal density buffer");
const bool haveBackgroundBuffer = result && lenResult;
if (haveBackgroundBuffer)
if (!(nbins <= lenResult)) throw std::invalid_argument(
"In npstat::lorpeBackground1D: "
"insufficient length of the background density buffer");
const bool haveMinusOneBuffer = densityMinusOne && lenDensityMinusOne;
if (haveMinusOneBuffer)
if (!(nbins <= lenDensityMinusOne)) throw std::invalid_argument(
"In npstat::lorpeBackground1D: "
"insufficient length of the cross validation output buffer");
const ArrayND<Numeric>& binContents = histo.binContents();
const double nEvents = binContents.template sum<long double>();
if (nEvents < 2.0) throw std::invalid_argument(
"In npstat::lorpeBackground1D: "
"insufficient number of events in the input histogram");
if (!binContents.isDensity()) throw std::invalid_argument(
"In npstat::lorpeBackground1D: "
"all bins of the input histogram must be non-negative");
// Partition the workspace
workspace.resize(5*nbins);
double* signalScan = &workspace[0];
double* previousApprox = signalScan + nbins;
double* currentApprox = previousApprox + nbins;
double* weightedInput = currentApprox + nbins;
double* fittedBackground = weightedInput + nbins;
// Scan the signal
const HistoAxis& axis(histo.axis(0));
const double binWidth = axis.binWidth(0);
assert(binWidth > 0.0);
if (nIntegrationPoints)
{
if (nIntegrationPoints == 1U)
for (unsigned ibin=0; ibin<nbins; ++ibin)
signalScan[ibin] = signal.density(axis.binCenter(ibin));
else
{
GaussLegendreQuadrature q(nIntegrationPoints);
DensityFunctor1D df(signal);
for (unsigned ibin=0; ibin<nbins; ++ibin)
{
const double xmin = axis.leftBinEdge(ibin);
const double xmax = xmin + binWidth;
signalScan[ibin] = q.integrate(df, xmin, xmax)/binWidth;
}
}
}
else
for (unsigned ibin=0; ibin<nbins; ++ibin)
{
const double xmin = axis.leftBinEdge(ibin);
const double xmax = xmin + binWidth;
signalScan[ibin] = (signal.cdf(xmax)-signal.cdf(xmin))/binWidth;
}
// Normalize the signal
Private::normalizeDoubleArrayAsDensity(signalScan, nbins, binWidth);
// Construct the filter
CPP11_auto_ptr<LocalPolyFilter1D> filter = fbuilder.provideFilter(
m, bandwidth, maxDegree, nbins, binWidth, stretchFilterAtBoundary);
// Initialize the background approximation
if (haveInitialApprox)
{
for (unsigned ibin=0; ibin<nbins; ++ibin)
previousApprox[ibin] = static_cast<double>(initialApprox[ibin]);
Private::normalizeDoubleArrayAsDensity(
previousApprox, nbins, binWidth);
}
else
{
const double d = 1.0/(nbins*binWidth);
for (unsigned ibin=0; ibin<nbins; ++ibin)
previousApprox[ibin] = d;
}
// Iterative construction of the background approximation
unsigned maxiter = 0;
for (; maxiter < maxIterations; ++maxiter)
{
// Reweight the bin contents
for (unsigned ibin=0; ibin<nbins; ++ibin)
{
const double denom = oneMinusSF*previousApprox[ibin] +
signalFraction*signalScan[ibin];
double w = 0.0;
if (denom > 0.0)
w = previousApprox[ibin]/denom;
weightedInput[ibin] = w*static_cast<double>(
binContents.linearValue(ibin));
}
// Apply the filter to the reweighted contents
filter->filter(weightedInput, nbins, currentApprox);
// Normalize
Private::normalizeDoubleArrayAsDensity(currentApprox,nbins,binWidth);
// Calculate the L1 divergence with the previous iteration
long double integ = 0.0L;
for (unsigned ibin=0; ibin<nbins; ++ibin)
integ += fabs(previousApprox[ibin] - currentApprox[ibin]);
integ *= binWidth;
// Swap the array pointers to prepare for next iteration
std::swap(previousApprox, currentApprox);
// If we have reached the convergence target, we are done
if (integ <= convergenceEpsilon)
break;
}
// Fill out the output arrays
if (haveSignalBuffer)
for (unsigned ibin=0; ibin<nbins; ++ibin)
signalDensity[ibin] = static_cast<NumOut>(signalScan[ibin]);
if (haveBackgroundBuffer)
for (unsigned ibin=0; ibin<nbins; ++ibin)
result[ibin] = static_cast<NumOut>(previousApprox[ibin]);
// Calculate the cross validation density is requested
if (haveMinusOneBuffer)
{
std::swap(fittedBackground, previousApprox);
const double nSignal = signalFraction*nEvents;
- const double nBackg = nEvents - nSignal;
+ // const double nBackg = nEvents - nSignal;
for (unsigned minusBin=0; minusBin<nbins; ++minusBin)
{
const double binValue = static_cast<double>(
binContents.linearValue(minusBin));
double minusValue = binValue - 1.0;
if (minusValue < 0.0)
minusValue = 0.0;
const double eventsRemoved = binValue - minusValue;
if (!eventsRemoved)
{
densityMinusOne[minusBin] = static_cast<NumOut>(0);
continue;
}
// When we perform "leaving-one-out" cross validation,
// we do not know whether the removed event belongs to
// the signal or to the background sample -- and the results
// are going to be different. There are, in principle,
// two ways to proceed here. We can either run two times
// (once assuming that the removed event is signal and once
// assuming that it is background) and combine these two
// estimates later with appropriate weights or we can run
// once assuming that the signal and bg fractions were
// reduced according to their expectation from the original
// fit. Here, the faster single pass approach is implemented.
//
const double sig = signalFraction*signalScan[minusBin];
const double bg = oneMinusSF*fittedBackground[minusBin];
const double denom = sig + bg;
if (denom <= 0.0)
{
densityMinusOne[minusBin] = static_cast<NumOut>(0);
continue;
}
const double sigRemoved = sig/denom*eventsRemoved;
const double sigfrac = (nSignal - sigRemoved)/
(nEvents - eventsRemoved);
const double bgfrac = 1.0 - sigfrac;
for (unsigned ibin=0; ibin<nbins; ++ibin)
previousApprox[ibin] = fittedBackground[ibin];
unsigned iter = 0;
for (; iter < maxIterations; ++iter)
{
// Reweight the bin contents
for (unsigned ibin=0; ibin<nbins; ++ibin)
{
const double denom = bgfrac*previousApprox[ibin] +
sigfrac*signalScan[ibin];
double w = 0.0;
if (denom > 0.0)
w = previousApprox[ibin]/denom;
if (ibin == minusBin)
weightedInput[ibin] = w*minusValue;
else
weightedInput[ibin] = w*static_cast<double>(
binContents.linearValue(ibin));
}
// Apply the filter to the reweighted contents
filter->filter(weightedInput, nbins, currentApprox);
// Normalize
Private::normalizeDoubleArrayAsDensity(
currentApprox, nbins, binWidth);
// Calculate the L1 divergence with the previous cycle
long double integ = 0.0L;
for (unsigned ibin=0; ibin<nbins; ++ibin)
integ += fabs(previousApprox[ibin] -
currentApprox[ibin]);
integ *= binWidth;
// Swap the array pointers to prepare for next iteration
std::swap(previousApprox, currentApprox);
// If we have reached the convergence target, we are done
if (integ <= convergenceEpsilon)
break;
}
if (iter > maxiter)
maxiter = iter;
densityMinusOne[minusBin] =
static_cast<NumOut>(previousApprox[minusBin]);
}
}
return maxiter;
}
template<typename Numeric, typename NumOut>
double lorpeBgCVPseudoLogli1D(
const HistoND<Numeric>& histo, const double signalFraction,
const NumOut* signalDensity, const unsigned lenSignalDensity,
const NumOut* bgDensity, const unsigned lenBgDensity,
const NumOut* densityMinusOne, const unsigned lenDensityMinusOne,
const double minLog)
{
const long double minDensity = expl(minLog);
if (histo.dim() != 1U) throw std::invalid_argument(
"In npstat::lorpeBgCVPseudoLogli1D: "
"input histogram must be one-dimensional");
const unsigned long npoints = histo.nFillsInRange();
if (npoints == 0UL) throw std::invalid_argument(
"In npstat::lorpeBgCVPseudoLogli1D: "
"input histogram appears to be empty");
const unsigned nbins = histo.nBins();
assert(nbins);
if (signalFraction < 0.0 || signalFraction >= 1.0)
throw std::invalid_argument(
"In npstat::lorpeBgCVPseudoLogli1D: "
"signal fraction is out of range");
const double oneMinusSF = 1.0 - signalFraction;
assert(signalDensity);
assert(lenSignalDensity >= nbins);
assert(bgDensity);
assert(lenBgDensity >= nbins);
assert(densityMinusOne);
assert(lenDensityMinusOne >= nbins);
const ArrayND<Numeric>& binContents = histo.binContents();
long double ll = 0.0L, logli = 0.0L;
for (unsigned ibin=0; ibin<nbins; ++ibin)
{
const double binValue = static_cast<double>(
binContents.linearValue(ibin));
if (binValue > 0.0)
{
const long double dens =
static_cast<long double>(densityMinusOne[ibin]);
if (dens <= minDensity)
ll = minLog;
else
ll = logl(dens);
const double bg = static_cast<double>(bgDensity[ibin]);
const double sig = static_cast<double>(signalDensity[ibin]);
const double denom = oneMinusSF*bg + signalFraction*sig;
if (denom > 0.0)
logli += ll*binValue*bg/denom;
}
}
return logli;
}
template<typename Numeric, typename NumOut>
double lorpeBgLogli1D(
const HistoND<Numeric>& histo, const double signalFraction,
const NumOut* signalDensity, const unsigned lenSignalDensity,
const NumOut* bgDensity, const unsigned lenBgDensity,
const double minLog)
{
const long double minDensity = expl(minLog);
if (histo.dim() != 1U) throw std::invalid_argument(
"In npstat::lorpeBgLogli1D: "
"input histogram must be one-dimensional");
const unsigned long npoints = histo.nFillsInRange();
if (npoints == 0UL) throw std::invalid_argument(
"In npstat::lorpeBgLogli1D: "
"input histogram appears to be empty");
const unsigned nbins = histo.nBins();
assert(nbins);
if (signalFraction < 0.0 || signalFraction >= 1.0)
throw std::invalid_argument(
"In npstat::lorpeBgLogli1D: "
"signal fraction is out of range");
const double oneMinusSF = 1.0 - signalFraction;
assert(signalDensity);
assert(lenSignalDensity >= nbins);
assert(bgDensity);
assert(lenBgDensity >= nbins);
const ArrayND<Numeric>& binContents = histo.binContents();
long double ll = 0.0L, logli = 0.0L;
for (unsigned ibin=0; ibin<nbins; ++ibin)
{
const double binValue = static_cast<double>(
binContents.linearValue(ibin));
if (binValue > 0.0)
{
const long double dens = signalFraction*signalDensity[ibin] +
oneMinusSF*bgDensity[ibin];
if (dens <= minDensity)
ll = minLog;
else
ll = logl(dens);
logli += ll*binValue;
}
}
return logli;
}
}

File Metadata

Mime Type
text/x-diff
Expires
Tue, Nov 19, 4:27 PM (1 d, 14 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3805169
Default Alt Text
(17 KB)

Event Timeline