Page Menu
Home
HEPForge
Search
Configure Global Search
Log In
Files
F7877838
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
17 KB
Subscribers
None
View Options
Index: trunk/npstat/stat/lorpeBackground1D.icc
===================================================================
--- trunk/npstat/stat/lorpeBackground1D.icc (revision 119)
+++ trunk/npstat/stat/lorpeBackground1D.icc (revision 120)
@@ -1,422 +1,422 @@
#include <cmath>
#include <cassert>
#include <utility>
#include <stdexcept>
#include "npstat/nm/GaussLegendreQuadrature.hh"
namespace npstat {
namespace Private {
inline void normalizeDoubleArrayAsDensity(double* arr, const unsigned n,
const double binwidth)
{
long double integ = 0.0L;
for (unsigned i=0; i<n; ++i)
{
if (arr[i] < 0.0)
arr[i] = 0.0;
integ += arr[i];
}
const double denom = integ*binwidth;
if (denom <= 0.0)
throw std::runtime_error(
"In npstat::Private::normalizeDoubleArrayAsDensity: "
"density array is nowhere positive");
for (unsigned i=0; i<n; ++i)
arr[i] /= denom;
}
}
template<typename Numeric, typename NumIn, typename NumOut>
unsigned lorpeBackground1D(
const HistoND<Numeric>& histo, const AbsSymbetaFilterProvider& fbuilder,
const AbsDistribution1D& signal, const double signalFraction,
const unsigned nIntegrationPoints,
const NumIn* initialApprox, const unsigned lenApproximation,
const int m, const double bandwidth, const double maxDegree,
const double convergenceEpsilon, const unsigned maxIterations,
NumOut* signalDensity, const unsigned lenSignalDensity,
NumOut* result, const unsigned lenResult,
std::vector<double>& workspace,
NumOut* densityMinusOne, const unsigned lenDensityMinusOne,
const bool stretchFilterAtBoundary)
{
// Check the input arguments
if (histo.dim() != 1U) throw std::invalid_argument(
"In npstat::lorpeBackground1D: "
"input histogram must be one-dimensional");
const unsigned long npoints = histo.nFillsInRange();
if (npoints == 0UL) throw std::invalid_argument(
"In npstat::lorpeBackground1D: "
"input histogram appears to be empty");
const unsigned nbins = histo.nBins();
assert(nbins);
if (signalFraction < 0.0 || signalFraction >= 1.0)
throw std::invalid_argument(
"In npstat::lorpeBackground1D: "
"signal fraction is out of range");
const double oneMinusSF = 1.0 - signalFraction;
const bool haveInitialApprox = initialApprox && lenApproximation;
if (haveInitialApprox)
if (nbins != lenApproximation) throw std::invalid_argument(
"In npstat::lorpeBackground1D: "
"incorrect length of the initial approximation buffer");
if (bandwidth <= 0.0) throw std::invalid_argument(
"In npstat::lorpeBackground1D: bandwidth must be positive");
if (convergenceEpsilon < 0.0) throw std::invalid_argument(
"In npstat::lorpeBackground1D: "
"convergence epsilon must be non-negative");
const bool haveSignalBuffer = signalDensity && lenSignalDensity;
if (haveSignalBuffer)
if (!(nbins <= lenSignalDensity)) throw std::invalid_argument(
"In npstat::lorpeBackground1D: "
"insufficient length of the signal density buffer");
const bool haveBackgroundBuffer = result && lenResult;
if (haveBackgroundBuffer)
if (!(nbins <= lenResult)) throw std::invalid_argument(
"In npstat::lorpeBackground1D: "
"insufficient length of the background density buffer");
const bool haveMinusOneBuffer = densityMinusOne && lenDensityMinusOne;
if (haveMinusOneBuffer)
if (!(nbins <= lenDensityMinusOne)) throw std::invalid_argument(
"In npstat::lorpeBackground1D: "
"insufficient length of the cross validation output buffer");
const ArrayND<Numeric>& binContents = histo.binContents();
const double nEvents = binContents.template sum<long double>();
if (nEvents < 2.0) throw std::invalid_argument(
"In npstat::lorpeBackground1D: "
"insufficient number of events in the input histogram");
if (!binContents.isDensity()) throw std::invalid_argument(
"In npstat::lorpeBackground1D: "
"all bins of the input histogram must be non-negative");
// Partition the workspace
workspace.resize(5*nbins);
double* signalScan = &workspace[0];
double* previousApprox = signalScan + nbins;
double* currentApprox = previousApprox + nbins;
double* weightedInput = currentApprox + nbins;
double* fittedBackground = weightedInput + nbins;
// Scan the signal
const HistoAxis& axis(histo.axis(0));
const double binWidth = axis.binWidth(0);
assert(binWidth > 0.0);
if (nIntegrationPoints)
{
if (nIntegrationPoints == 1U)
for (unsigned ibin=0; ibin<nbins; ++ibin)
signalScan[ibin] = signal.density(axis.binCenter(ibin));
else
{
GaussLegendreQuadrature q(nIntegrationPoints);
DensityFunctor1D df(signal);
for (unsigned ibin=0; ibin<nbins; ++ibin)
{
const double xmin = axis.leftBinEdge(ibin);
const double xmax = xmin + binWidth;
signalScan[ibin] = q.integrate(df, xmin, xmax)/binWidth;
}
}
}
else
for (unsigned ibin=0; ibin<nbins; ++ibin)
{
const double xmin = axis.leftBinEdge(ibin);
const double xmax = xmin + binWidth;
signalScan[ibin] = (signal.cdf(xmax)-signal.cdf(xmin))/binWidth;
}
// Normalize the signal
Private::normalizeDoubleArrayAsDensity(signalScan, nbins, binWidth);
// Construct the filter
CPP11_auto_ptr<LocalPolyFilter1D> filter = fbuilder.provideFilter(
m, bandwidth, maxDegree, nbins, binWidth, stretchFilterAtBoundary);
// Initialize the background approximation
if (haveInitialApprox)
{
for (unsigned ibin=0; ibin<nbins; ++ibin)
previousApprox[ibin] = static_cast<double>(initialApprox[ibin]);
Private::normalizeDoubleArrayAsDensity(
previousApprox, nbins, binWidth);
}
else
{
const double d = 1.0/(nbins*binWidth);
for (unsigned ibin=0; ibin<nbins; ++ibin)
previousApprox[ibin] = d;
}
// Iterative construction of the background approximation
unsigned maxiter = 0;
for (; maxiter < maxIterations; ++maxiter)
{
// Reweight the bin contents
for (unsigned ibin=0; ibin<nbins; ++ibin)
{
const double denom = oneMinusSF*previousApprox[ibin] +
signalFraction*signalScan[ibin];
double w = 0.0;
if (denom > 0.0)
w = previousApprox[ibin]/denom;
weightedInput[ibin] = w*static_cast<double>(
binContents.linearValue(ibin));
}
// Apply the filter to the reweighted contents
filter->filter(weightedInput, nbins, currentApprox);
// Normalize
Private::normalizeDoubleArrayAsDensity(currentApprox,nbins,binWidth);
// Calculate the L1 divergence with the previous iteration
long double integ = 0.0L;
for (unsigned ibin=0; ibin<nbins; ++ibin)
integ += fabs(previousApprox[ibin] - currentApprox[ibin]);
integ *= binWidth;
// Swap the array pointers to prepare for next iteration
std::swap(previousApprox, currentApprox);
// If we have reached the convergence target, we are done
if (integ <= convergenceEpsilon)
break;
}
// Fill out the output arrays
if (haveSignalBuffer)
for (unsigned ibin=0; ibin<nbins; ++ibin)
signalDensity[ibin] = static_cast<NumOut>(signalScan[ibin]);
if (haveBackgroundBuffer)
for (unsigned ibin=0; ibin<nbins; ++ibin)
result[ibin] = static_cast<NumOut>(previousApprox[ibin]);
// Calculate the cross validation density is requested
if (haveMinusOneBuffer)
{
std::swap(fittedBackground, previousApprox);
const double nSignal = signalFraction*nEvents;
- const double nBackg = nEvents - nSignal;
+ // const double nBackg = nEvents - nSignal;
for (unsigned minusBin=0; minusBin<nbins; ++minusBin)
{
const double binValue = static_cast<double>(
binContents.linearValue(minusBin));
double minusValue = binValue - 1.0;
if (minusValue < 0.0)
minusValue = 0.0;
const double eventsRemoved = binValue - minusValue;
if (!eventsRemoved)
{
densityMinusOne[minusBin] = static_cast<NumOut>(0);
continue;
}
// When we perform "leaving-one-out" cross validation,
// we do not know whether the removed event belongs to
// the signal or to the background sample -- and the results
// are going to be different. There are, in principle,
// two ways to proceed here. We can either run two times
// (once assuming that the removed event is signal and once
// assuming that it is background) and combine these two
// estimates later with appropriate weights or we can run
// once assuming that the signal and bg fractions were
// reduced according to their expectation from the original
// fit. Here, the faster single pass approach is implemented.
//
const double sig = signalFraction*signalScan[minusBin];
const double bg = oneMinusSF*fittedBackground[minusBin];
const double denom = sig + bg;
if (denom <= 0.0)
{
densityMinusOne[minusBin] = static_cast<NumOut>(0);
continue;
}
const double sigRemoved = sig/denom*eventsRemoved;
const double sigfrac = (nSignal - sigRemoved)/
(nEvents - eventsRemoved);
const double bgfrac = 1.0 - sigfrac;
for (unsigned ibin=0; ibin<nbins; ++ibin)
previousApprox[ibin] = fittedBackground[ibin];
unsigned iter = 0;
for (; iter < maxIterations; ++iter)
{
// Reweight the bin contents
for (unsigned ibin=0; ibin<nbins; ++ibin)
{
const double denom = bgfrac*previousApprox[ibin] +
sigfrac*signalScan[ibin];
double w = 0.0;
if (denom > 0.0)
w = previousApprox[ibin]/denom;
if (ibin == minusBin)
weightedInput[ibin] = w*minusValue;
else
weightedInput[ibin] = w*static_cast<double>(
binContents.linearValue(ibin));
}
// Apply the filter to the reweighted contents
filter->filter(weightedInput, nbins, currentApprox);
// Normalize
Private::normalizeDoubleArrayAsDensity(
currentApprox, nbins, binWidth);
// Calculate the L1 divergence with the previous cycle
long double integ = 0.0L;
for (unsigned ibin=0; ibin<nbins; ++ibin)
integ += fabs(previousApprox[ibin] -
currentApprox[ibin]);
integ *= binWidth;
// Swap the array pointers to prepare for next iteration
std::swap(previousApprox, currentApprox);
// If we have reached the convergence target, we are done
if (integ <= convergenceEpsilon)
break;
}
if (iter > maxiter)
maxiter = iter;
densityMinusOne[minusBin] =
static_cast<NumOut>(previousApprox[minusBin]);
}
}
return maxiter;
}
template<typename Numeric, typename NumOut>
double lorpeBgCVPseudoLogli1D(
const HistoND<Numeric>& histo, const double signalFraction,
const NumOut* signalDensity, const unsigned lenSignalDensity,
const NumOut* bgDensity, const unsigned lenBgDensity,
const NumOut* densityMinusOne, const unsigned lenDensityMinusOne,
const double minLog)
{
const long double minDensity = expl(minLog);
if (histo.dim() != 1U) throw std::invalid_argument(
"In npstat::lorpeBgCVPseudoLogli1D: "
"input histogram must be one-dimensional");
const unsigned long npoints = histo.nFillsInRange();
if (npoints == 0UL) throw std::invalid_argument(
"In npstat::lorpeBgCVPseudoLogli1D: "
"input histogram appears to be empty");
const unsigned nbins = histo.nBins();
assert(nbins);
if (signalFraction < 0.0 || signalFraction >= 1.0)
throw std::invalid_argument(
"In npstat::lorpeBgCVPseudoLogli1D: "
"signal fraction is out of range");
const double oneMinusSF = 1.0 - signalFraction;
assert(signalDensity);
assert(lenSignalDensity >= nbins);
assert(bgDensity);
assert(lenBgDensity >= nbins);
assert(densityMinusOne);
assert(lenDensityMinusOne >= nbins);
const ArrayND<Numeric>& binContents = histo.binContents();
long double ll = 0.0L, logli = 0.0L;
for (unsigned ibin=0; ibin<nbins; ++ibin)
{
const double binValue = static_cast<double>(
binContents.linearValue(ibin));
if (binValue > 0.0)
{
const long double dens =
static_cast<long double>(densityMinusOne[ibin]);
if (dens <= minDensity)
ll = minLog;
else
ll = logl(dens);
const double bg = static_cast<double>(bgDensity[ibin]);
const double sig = static_cast<double>(signalDensity[ibin]);
const double denom = oneMinusSF*bg + signalFraction*sig;
if (denom > 0.0)
logli += ll*binValue*bg/denom;
}
}
return logli;
}
template<typename Numeric, typename NumOut>
double lorpeBgLogli1D(
const HistoND<Numeric>& histo, const double signalFraction,
const NumOut* signalDensity, const unsigned lenSignalDensity,
const NumOut* bgDensity, const unsigned lenBgDensity,
const double minLog)
{
const long double minDensity = expl(minLog);
if (histo.dim() != 1U) throw std::invalid_argument(
"In npstat::lorpeBgLogli1D: "
"input histogram must be one-dimensional");
const unsigned long npoints = histo.nFillsInRange();
if (npoints == 0UL) throw std::invalid_argument(
"In npstat::lorpeBgLogli1D: "
"input histogram appears to be empty");
const unsigned nbins = histo.nBins();
assert(nbins);
if (signalFraction < 0.0 || signalFraction >= 1.0)
throw std::invalid_argument(
"In npstat::lorpeBgLogli1D: "
"signal fraction is out of range");
const double oneMinusSF = 1.0 - signalFraction;
assert(signalDensity);
assert(lenSignalDensity >= nbins);
assert(bgDensity);
assert(lenBgDensity >= nbins);
const ArrayND<Numeric>& binContents = histo.binContents();
long double ll = 0.0L, logli = 0.0L;
for (unsigned ibin=0; ibin<nbins; ++ibin)
{
const double binValue = static_cast<double>(
binContents.linearValue(ibin));
if (binValue > 0.0)
{
const long double dens = signalFraction*signalDensity[ibin] +
oneMinusSF*bgDensity[ibin];
if (dens <= minDensity)
ll = minLog;
else
ll = logl(dens);
logli += ll*binValue;
}
}
return logli;
}
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Tue, Nov 19, 4:27 PM (1 d, 14 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3805169
Default Alt Text
(17 KB)
Attached To
rNPSTATSVN npstatsvn
Event Timeline
Log In to Comment