No OneTemporary
Actions

Size

17 KB

Subscribers

None

View Options

	Index: trunk/npstat/stat/lorpeBackground1D.icc
	===================================================================
	--- trunk/npstat/stat/lorpeBackground1D.icc (revision 119)
	+++ trunk/npstat/stat/lorpeBackground1D.icc (revision 120)
	@@ -1,422 +1,422 @@
	#include <cmath>
	#include <cassert>
	#include <utility>
	#include <stdexcept>

	#include "npstat/nm/GaussLegendreQuadrature.hh"

	namespace npstat {
	namespace Private {
	inline void normalizeDoubleArrayAsDensity(double* arr, const unsigned n,
	const double binwidth)
	{
	long double integ = 0.0L;
	for (unsigned i=0; i<n; ++i)
	{
	if (arr[i] < 0.0)
	arr[i] = 0.0;
	integ += arr[i];
	}
	const double denom = integ*binwidth;
	if (denom <= 0.0)
	throw std::runtime_error(
	"In npstat::Private::normalizeDoubleArrayAsDensity: "
	"density array is nowhere positive");
	for (unsigned i=0; i<n; ++i)
	arr[i] /= denom;
	}
	}

	template<typename Numeric, typename NumIn, typename NumOut>
	unsigned lorpeBackground1D(
	const HistoND<Numeric>& histo, const AbsSymbetaFilterProvider& fbuilder,
	const AbsDistribution1D& signal, const double signalFraction,
	const unsigned nIntegrationPoints,
	const NumIn* initialApprox, const unsigned lenApproximation,
	const int m, const double bandwidth, const double maxDegree,
	const double convergenceEpsilon, const unsigned maxIterations,
	NumOut* signalDensity, const unsigned lenSignalDensity,
	NumOut* result, const unsigned lenResult,
	std::vector<double>& workspace,
	NumOut* densityMinusOne, const unsigned lenDensityMinusOne,
	const bool stretchFilterAtBoundary)
	{
	// Check the input arguments
	if (histo.dim() != 1U) throw std::invalid_argument(
	"In npstat::lorpeBackground1D: "
	"input histogram must be one-dimensional");
	const unsigned long npoints = histo.nFillsInRange();
	if (npoints == 0UL) throw std::invalid_argument(
	"In npstat::lorpeBackground1D: "
	"input histogram appears to be empty");
	const unsigned nbins = histo.nBins();
	assert(nbins);

	if (signalFraction < 0.0 \|\| signalFraction >= 1.0)
	throw std::invalid_argument(
	"In npstat::lorpeBackground1D: "
	"signal fraction is out of range");
	const double oneMinusSF = 1.0 - signalFraction;

	const bool haveInitialApprox = initialApprox && lenApproximation;
	if (haveInitialApprox)
	if (nbins != lenApproximation) throw std::invalid_argument(
	"In npstat::lorpeBackground1D: "
	"incorrect length of the initial approximation buffer");

	if (bandwidth <= 0.0) throw std::invalid_argument(
	"In npstat::lorpeBackground1D: bandwidth must be positive");

	if (convergenceEpsilon < 0.0) throw std::invalid_argument(
	"In npstat::lorpeBackground1D: "
	"convergence epsilon must be non-negative");

	const bool haveSignalBuffer = signalDensity && lenSignalDensity;
	if (haveSignalBuffer)
	if (!(nbins <= lenSignalDensity)) throw std::invalid_argument(
	"In npstat::lorpeBackground1D: "
	"insufficient length of the signal density buffer");

	const bool haveBackgroundBuffer = result && lenResult;
	if (haveBackgroundBuffer)
	if (!(nbins <= lenResult)) throw std::invalid_argument(
	"In npstat::lorpeBackground1D: "
	"insufficient length of the background density buffer");

	const bool haveMinusOneBuffer = densityMinusOne && lenDensityMinusOne;
	if (haveMinusOneBuffer)
	if (!(nbins <= lenDensityMinusOne)) throw std::invalid_argument(
	"In npstat::lorpeBackground1D: "
	"insufficient length of the cross validation output buffer");

	const ArrayND<Numeric>& binContents = histo.binContents();
	const double nEvents = binContents.template sum<long double>();
	if (nEvents < 2.0) throw std::invalid_argument(
	"In npstat::lorpeBackground1D: "
	"insufficient number of events in the input histogram");
	if (!binContents.isDensity()) throw std::invalid_argument(
	"In npstat::lorpeBackground1D: "
	"all bins of the input histogram must be non-negative");

	// Partition the workspace
	workspace.resize(5*nbins);
	double* signalScan = &workspace[0];
	double* previousApprox = signalScan + nbins;
	double* currentApprox = previousApprox + nbins;
	double* weightedInput = currentApprox + nbins;
	double* fittedBackground = weightedInput + nbins;

	// Scan the signal
	const HistoAxis& axis(histo.axis(0));
	const double binWidth = axis.binWidth(0);
	assert(binWidth > 0.0);
	if (nIntegrationPoints)
	{
	if (nIntegrationPoints == 1U)
	for (unsigned ibin=0; ibin<nbins; ++ibin)
	signalScan[ibin] = signal.density(axis.binCenter(ibin));
	else
	{
	GaussLegendreQuadrature q(nIntegrationPoints);
	DensityFunctor1D df(signal);

	for (unsigned ibin=0; ibin<nbins; ++ibin)
	{
	const double xmin = axis.leftBinEdge(ibin);
	const double xmax = xmin + binWidth;
	signalScan[ibin] = q.integrate(df, xmin, xmax)/binWidth;
	}
	}
	}
	else
	for (unsigned ibin=0; ibin<nbins; ++ibin)
	{
	const double xmin = axis.leftBinEdge(ibin);
	const double xmax = xmin + binWidth;
	signalScan[ibin] = (signal.cdf(xmax)-signal.cdf(xmin))/binWidth;
	}

	// Normalize the signal
	Private::normalizeDoubleArrayAsDensity(signalScan, nbins, binWidth);

	// Construct the filter
	CPP11_auto_ptr<LocalPolyFilter1D> filter = fbuilder.provideFilter(
	m, bandwidth, maxDegree, nbins, binWidth, stretchFilterAtBoundary);

	// Initialize the background approximation
	if (haveInitialApprox)
	{
	for (unsigned ibin=0; ibin<nbins; ++ibin)
	previousApprox[ibin] = static_cast<double>(initialApprox[ibin]);
	Private::normalizeDoubleArrayAsDensity(
	previousApprox, nbins, binWidth);
	}
	else
	{
	const double d = 1.0/(nbins*binWidth);
	for (unsigned ibin=0; ibin<nbins; ++ibin)
	previousApprox[ibin] = d;
	}

	// Iterative construction of the background approximation
	unsigned maxiter = 0;
	for (; maxiter < maxIterations; ++maxiter)
	{
	// Reweight the bin contents
	for (unsigned ibin=0; ibin<nbins; ++ibin)
	{
	const double denom = oneMinusSF*previousApprox[ibin] +
	signalFraction*signalScan[ibin];
	double w = 0.0;
	if (denom > 0.0)
	w = previousApprox[ibin]/denom;
	weightedInput[ibin] = w*static_cast<double>(
	binContents.linearValue(ibin));
	}

	// Apply the filter to the reweighted contents
	filter->filter(weightedInput, nbins, currentApprox);

	// Normalize
	Private::normalizeDoubleArrayAsDensity(currentApprox,nbins,binWidth);

	// Calculate the L1 divergence with the previous iteration
	long double integ = 0.0L;
	for (unsigned ibin=0; ibin<nbins; ++ibin)
	integ += fabs(previousApprox[ibin] - currentApprox[ibin]);
	integ *= binWidth;

	// Swap the array pointers to prepare for next iteration
	std::swap(previousApprox, currentApprox);

	// If we have reached the convergence target, we are done
	if (integ <= convergenceEpsilon)
	break;
	}

	// Fill out the output arrays
	if (haveSignalBuffer)
	for (unsigned ibin=0; ibin<nbins; ++ibin)
	signalDensity[ibin] = static_cast<NumOut>(signalScan[ibin]);

	if (haveBackgroundBuffer)
	for (unsigned ibin=0; ibin<nbins; ++ibin)
	result[ibin] = static_cast<NumOut>(previousApprox[ibin]);

	// Calculate the cross validation density is requested
	if (haveMinusOneBuffer)
	{
	std::swap(fittedBackground, previousApprox);

	const double nSignal = signalFraction*nEvents;
	- const double nBackg = nEvents - nSignal;
	+ // const double nBackg = nEvents - nSignal;

	for (unsigned minusBin=0; minusBin<nbins; ++minusBin)
	{
	const double binValue = static_cast<double>(
	binContents.linearValue(minusBin));
	double minusValue = binValue - 1.0;
	if (minusValue < 0.0)
	minusValue = 0.0;
	const double eventsRemoved = binValue - minusValue;

	if (!eventsRemoved)
	{
	densityMinusOne[minusBin] = static_cast<NumOut>(0);
	continue;
	}

	// When we perform "leaving-one-out" cross validation,
	// we do not know whether the removed event belongs to
	// the signal or to the background sample -- and the results
	// are going to be different. There are, in principle,
	// two ways to proceed here. We can either run two times
	// (once assuming that the removed event is signal and once
	// assuming that it is background) and combine these two
	// estimates later with appropriate weights or we can run
	// once assuming that the signal and bg fractions were
	// reduced according to their expectation from the original
	// fit. Here, the faster single pass approach is implemented.
	//
	const double sig = signalFraction*signalScan[minusBin];
	const double bg = oneMinusSF*fittedBackground[minusBin];
	const double denom = sig + bg;
	if (denom <= 0.0)
	{
	densityMinusOne[minusBin] = static_cast<NumOut>(0);
	continue;
	}
	const double sigRemoved = sig/denom*eventsRemoved;
	const double sigfrac = (nSignal - sigRemoved)/
	(nEvents - eventsRemoved);
	const double bgfrac = 1.0 - sigfrac;

	for (unsigned ibin=0; ibin<nbins; ++ibin)
	previousApprox[ibin] = fittedBackground[ibin];

	unsigned iter = 0;
	for (; iter < maxIterations; ++iter)
	{
	// Reweight the bin contents
	for (unsigned ibin=0; ibin<nbins; ++ibin)
	{
	const double denom = bgfrac*previousApprox[ibin] +
	sigfrac*signalScan[ibin];
	double w = 0.0;
	if (denom > 0.0)
	w = previousApprox[ibin]/denom;
	if (ibin == minusBin)
	weightedInput[ibin] = w*minusValue;
	else
	weightedInput[ibin] = w*static_cast<double>(
	binContents.linearValue(ibin));
	}

	// Apply the filter to the reweighted contents
	filter->filter(weightedInput, nbins, currentApprox);

	// Normalize
	Private::normalizeDoubleArrayAsDensity(
	currentApprox, nbins, binWidth);

	// Calculate the L1 divergence with the previous cycle
	long double integ = 0.0L;
	for (unsigned ibin=0; ibin<nbins; ++ibin)
	integ += fabs(previousApprox[ibin] -
	currentApprox[ibin]);
	integ *= binWidth;

	// Swap the array pointers to prepare for next iteration
	std::swap(previousApprox, currentApprox);

	// If we have reached the convergence target, we are done
	if (integ <= convergenceEpsilon)
	break;
	}

	if (iter > maxiter)
	maxiter = iter;

	densityMinusOne[minusBin] =
	static_cast<NumOut>(previousApprox[minusBin]);
	}
	}

	return maxiter;
	}


	template<typename Numeric, typename NumOut>
	double lorpeBgCVPseudoLogli1D(
	const HistoND<Numeric>& histo, const double signalFraction,
	const NumOut* signalDensity, const unsigned lenSignalDensity,
	const NumOut* bgDensity, const unsigned lenBgDensity,
	const NumOut* densityMinusOne, const unsigned lenDensityMinusOne,
	const double minLog)
	{
	const long double minDensity = expl(minLog);

	if (histo.dim() != 1U) throw std::invalid_argument(
	"In npstat::lorpeBgCVPseudoLogli1D: "
	"input histogram must be one-dimensional");
	const unsigned long npoints = histo.nFillsInRange();
	if (npoints == 0UL) throw std::invalid_argument(
	"In npstat::lorpeBgCVPseudoLogli1D: "
	"input histogram appears to be empty");
	const unsigned nbins = histo.nBins();
	assert(nbins);

	if (signalFraction < 0.0 \|\| signalFraction >= 1.0)
	throw std::invalid_argument(
	"In npstat::lorpeBgCVPseudoLogli1D: "
	"signal fraction is out of range");
	const double oneMinusSF = 1.0 - signalFraction;

	assert(signalDensity);
	assert(lenSignalDensity >= nbins);
	assert(bgDensity);
	assert(lenBgDensity >= nbins);
	assert(densityMinusOne);
	assert(lenDensityMinusOne >= nbins);

	const ArrayND<Numeric>& binContents = histo.binContents();

	long double ll = 0.0L, logli = 0.0L;
	for (unsigned ibin=0; ibin<nbins; ++ibin)
	{
	const double binValue = static_cast<double>(
	binContents.linearValue(ibin));
	if (binValue > 0.0)
	{
	const long double dens =
	static_cast<long double>(densityMinusOne[ibin]);
	if (dens <= minDensity)
	ll = minLog;
	else
	ll = logl(dens);

	const double bg = static_cast<double>(bgDensity[ibin]);
	const double sig = static_cast<double>(signalDensity[ibin]);
	const double denom = oneMinusSFbg + signalFractionsig;

	if (denom > 0.0)
	logli += llbinValuebg/denom;
	}
	}

	return logli;
	}


	template<typename Numeric, typename NumOut>
	double lorpeBgLogli1D(
	const HistoND<Numeric>& histo, const double signalFraction,
	const NumOut* signalDensity, const unsigned lenSignalDensity,
	const NumOut* bgDensity, const unsigned lenBgDensity,
	const double minLog)
	{
	const long double minDensity = expl(minLog);

	if (histo.dim() != 1U) throw std::invalid_argument(
	"In npstat::lorpeBgLogli1D: "
	"input histogram must be one-dimensional");
	const unsigned long npoints = histo.nFillsInRange();
	if (npoints == 0UL) throw std::invalid_argument(
	"In npstat::lorpeBgLogli1D: "
	"input histogram appears to be empty");
	const unsigned nbins = histo.nBins();
	assert(nbins);

	if (signalFraction < 0.0 \|\| signalFraction >= 1.0)
	throw std::invalid_argument(
	"In npstat::lorpeBgLogli1D: "
	"signal fraction is out of range");
	const double oneMinusSF = 1.0 - signalFraction;

	assert(signalDensity);
	assert(lenSignalDensity >= nbins);
	assert(bgDensity);
	assert(lenBgDensity >= nbins);

	const ArrayND<Numeric>& binContents = histo.binContents();

	long double ll = 0.0L, logli = 0.0L;
	for (unsigned ibin=0; ibin<nbins; ++ibin)
	{
	const double binValue = static_cast<double>(
	binContents.linearValue(ibin));
	if (binValue > 0.0)
	{
	const long double dens = signalFraction*signalDensity[ibin] +
	oneMinusSF*bgDensity[ibin];
	if (dens <= minDensity)
	ll = minLog;
	else
	ll = logl(dens);
	logli += ll*binValue;
	}
	}

	return logli;
	}
	}

File Metadata

Mime Type: text/x-diff
Expires: Tue, Nov 19, 4:27 PM (1 d, 14 h)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 3805169
Default Alt Text: (17 KB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions