Index: trunk/tests/test_Distributions1D.cc =================================================================== --- trunk/tests/test_Distributions1D.cc (revision 595) +++ trunk/tests/test_Distributions1D.cc (revision 596) @@ -1,1809 +1,1860 @@ #include #include #include "UnitTest++.h" #include "test_utils.hh" #include "npstat/stat/Distributions1D.hh" #include "npstat/stat/GaussianMixture1D.hh" #include "npstat/stat/JohnsonCurves.hh" #include "npstat/stat/TruncatedDistribution1D.hh" #include "npstat/stat/LeftCensoredDistribution.hh" #include "npstat/stat/RightCensoredDistribution.hh" #include "npstat/stat/QuantileTable1D.hh" #include "npstat/stat/DistributionMix1D.hh" #include "npstat/stat/VerticallyInterpolatedDistribution1D.hh" #include "npstat/stat/UGaussConvolution1D.hh" #include "npstat/stat/RatioOfNormals.hh" #include "npstat/stat/InterpolatedDistro1D1P.hh" #include "npstat/stat/TransformedDistribution1D.hh" #include "npstat/stat/EdgeworthSeries1D.hh" #include "npstat/stat/DeltaMixture1D.hh" #include "npstat/stat/distro1DStats.hh" #include "npstat/stat/IdentityTransform1D.hh" #include "npstat/stat/AsinhTransform1D.hh" #include "npstat/stat/LogRatioTransform1D.hh" #include "npstat/stat/LocationScaleTransform1.hh" #include "npstat/stat/Distribution1DReader.hh" #include "npstat/stat/DistributionTransform1DReader.hh" #include "npstat/stat/arrayStats.hh" #include "npstat/nm/EquidistantSequence.hh" #include "npstat/nm/LinearMapper1d.hh" #include "npstat/nm/ClassicalOrthoPolys1D.hh" #include "npstat/nm/MathUtils.hh" #include "npstat/nm/GaussHermiteQuadrature.hh" #include "npstat/rng/MersenneTwister.hh" #include "LogLogQuadratic1D.hh" #include "geners/pseudoIO.hh" #define LSQR2 1.41421356237309504880169L #define SQRTWOPIL 2.50662827463100050241577L using namespace npstat; using namespace std; struct Two : public npstat::ConstValue1 { inline Two() : npstat::ConstValue1(2.0) {} }; gs_enable_pseudo_io(Two) struct Three : public npstat::ConstValue1 { inline Three() : npstat::ConstValue1(3.0) {} }; gs_enable_pseudo_io(Three) struct Xsq { inline double operator()(const double x) const {return x*x;} inline bool operator==(const Xsq&) const {return true;} }; gs_enable_pseudo_io(Xsq) struct Xp1 { inline double operator()(const double x) const {return x + 1.0;} inline bool operator==(const Xp1&) const {return true;} }; gs_enable_pseudo_io(Xp1) static long double ldgexceedance(const long double x) { return erfcl(x/LSQR2)/2.0L; } static long double ldgcdf(const long double x) { return erfcl(-x/LSQR2)/2.0L; } namespace { double he0(const double x) { return 1.0; } double he1(const double x) { return x; } double he2(const double x) { return x*x - 1.0; } double he3(const double x) { return x*(x*x - 3.0); } double he4(const long double x) { const long double xsq = x*x; return 3 - 6*xsq + xsq*xsq; } double he5(const long double x) { const long double xsq = x*x; return x*(xsq*xsq - 10*xsq + 15); } double he6(const long double x) { const long double xsq = x*x; return -15 + 45*xsq - 15*xsq*xsq + xsq*xsq*xsq; } double he7(const long double x) { const long double xsq = x*x; return x*(-105 + 105*xsq - 21*xsq*xsq + xsq*xsq*xsq); } double he8(const long double x) { const long double xsq = x*x; const long double x4 = xsq*xsq; return 105 - 420*xsq + 210*x4 - 28*x4*xsq + x4*x4; } double he9(const long double x) { const long double xsq = x*x; const long double x4 = xsq*xsq; return x*(945 - 1260*xsq + 378*x4 - 36*xsq*x4 + x4*x4); } double he10(const long double x) { const long double xsq = x*x; const long double x4 = xsq*xsq; const long double x6 = x4*xsq; const long double x8 = x6*xsq; const long double x10 = x8*xsq; return -945 + 4725*xsq - 3150*x4 + 630*x6 - 45*x8 + x10; } double he11(const long double x) { const long double xsq = x*x; const long double x4 = xsq*xsq; const long double x6 = x4*xsq; const long double x8 = x6*xsq; const long double x10 = x8*xsq; return x*(-10395 + 17325*xsq - 6930*x4 + 990*x6 - 55*x8 + x10); } double he12(const long double x) { const long double xsq = x*x; const long double x4 = xsq*xsq; const long double x6 = x4*xsq; const long double x8 = x6*xsq; const long double x10 = x8*xsq; const long double x12 = x10*xsq; return 10395 - 62370*xsq + 51975*x4 - 13860*x6 + 1485*x8 - 66*x10 + x12; } void get_cumulants_severini(const EdgeworthSeries1D& s, double cumulants[8]) { assert(s.method() == EDGEWORTH_SEVERINI); double m[8]; GaussHermiteQuadrature quad(128); for (unsigned i=0; i<8; ++i) { EdgeworthSeriesMomentFcn fcn(s, i); m[i] = quad.integrateProb(0.0, 1.0, fcn); } for (unsigned i=0; i<4; ++i) cumulants[i] = m[i]; cumulants[4] = m[4] - 3*m[2]*m[2]; cumulants[5] = m[5] - 10*m[2]*m[3]; cumulants[6] = m[6] - 15*m[2]*m[4] - 10*m[3]*m[3] + 30*m[2]*m[2]*m[2]; cumulants[7] = m[7] + 210*m[2]*m[2]*m[3] - 35*m[3]*m[4] - 21*m[2]*m[5]; } void get_cumulants_classical(const EdgeworthSeries1D& s, double cumulants[8]) { assert(s.method() == EDGEWORTH_CLASSICAL); double m[8]; GaussHermiteQuadrature quad(128); for (unsigned i=0; i<8; ++i) { EdgeworthSeriesMomentFcn fcn(s, i); m[i] = quad.integrateProb(s.mean(), s.stdev(), fcn); } for (unsigned i=0; i<4; ++i) cumulants[i] = m[i]; cumulants[4] = m[4] - 3*m[2]*m[2]; cumulants[5] = m[5] - 10*m[2]*m[3]; cumulants[6] = m[6] - 15*m[2]*m[4] - 10*m[3]*m[3] + 30*m[2]*m[2]*m[2]; cumulants[7] = m[7] + 210*m[2]*m[2]*m[3] - 35*m[3]*m[4] - 21*m[2]*m[5]; } void io_test(const AbsDistribution1D& d) { std::ostringstream os; CHECK(d.classId().write(os)); CHECK(d.write(os)); std::istringstream is(os.str()); gs::ClassId id(is, 1); AbsDistribution1D* phoenix = AbsDistribution1D::read(id, is); CHECK(*phoenix == d); delete phoenix; } void standard_test(const AbsDistribution1D& d, const double range, const double eps, const bool do_io = true) { const double cdf0 = d.cdf(0.0); for (unsigned i=0; i<100; ++i) { const double r = 2*range*(test_rng() - 0.5); const double cdfr = d.cdf(r); if (cdfr > 0.0 && cdfr < 1.0) { CHECK_CLOSE(cdfr, 1.0 - d.exceedance(r), eps); CHECK_CLOSE(r, d.quantile(cdfr), eps); const double integ = simpson_integral( DensityFunctor1D(d), 0.0, r); CHECK_CLOSE(integ, cdfr-cdf0, eps); } } if (do_io) io_test(d); } void invcdf_test(const AbsDistribution1D& d, const double eps) { for (unsigned i=0; i<1000; ++i) { const double r = test_rng(); const double x = d.quantile(r); const double cdf = d.cdf(x); CHECK_CLOSE(cdf, r, eps); CHECK_CLOSE(cdf, 1.0 - d.exceedance(x), eps); } } void invexceedance_test(const EdgeworthSeries1D& d, const double eps) { for (unsigned i=0; i<1000; ++i) { const double r = test_rng(); const double x = d.inverseExceedance(r); const double exc = d.exceedance(x); CHECK_CLOSE(exc, r, eps); CHECK_CLOSE(exc, 1.0 - d.cdf(x), eps); } } TEST(Distributions1D_Gauss) { Gauss1D g(0., 1.); double value = simpson_integral(DensityFunctor1D(g), -6, 6); CHECK_CLOSE(1.0, value, 1.e-8); standard_test(g, 6, 1.e-7); CHECK_CLOSE(7.61985302416e-24, g.cdf(-10.0), 1.e-34); CHECK_CLOSE(7.61985302416e-24, g.exceedance(10.0), 1.e-34); CHECK_CLOSE(2.75362411861e-89, g.cdf(-20.0), 1.e-99); CHECK_CLOSE(2.75362411861e-89, g.exceedance(20.0), 1.e-99); } TEST(Distributions1D_Bifgauss_0) { const double lfrac = 0.3; const double lsigma = lfrac*2.0; const double rsigma = 2.0 - lsigma; BifurcatedGauss1D g(0.0, 1.0, lfrac, 2.0, 1.5); double value = simpson_integral(DensityFunctor1D(g), -lsigma*g.nSigmasLeft(), rsigma*g.nSigmasRight()); CHECK_CLOSE(1.0, value, 1.e-8); standard_test(g, lsigma*g.nSigmasLeft(), 1.e-7); } TEST(Distributions1D_Bifgauss_1) { const double eps = 1.0e-12; BifurcatedGauss1D g1(0.0, 2.0, 0.5, 2.0, 2.0); TruncatedGauss1D g2(0.0, 2.0, 2.0); for (unsigned i=0; i<100; ++i) { const double r = test_rng(); const double q1 = g1.quantile(r); const double q2 = g2.quantile(r); CHECK_CLOSE(q1, q2, eps); CHECK_CLOSE(g1.density(q1), g2.density(q1), eps); CHECK_CLOSE(g1.cdf(q1), g2.cdf(q1), eps); CHECK_CLOSE(g1.exceedance(q1), g2.exceedance(q1), eps); } } TEST(Distributions1D_Bifgauss_2) { BifurcatedGauss1D g(0.0, 1.0, 0.0, 2.0, 2.0); double value = simpson_integral(DensityFunctor1D(g), 0.0, 4.0); CHECK_CLOSE(1.0, value, 1.e-8); standard_test(g, 4.0, 1.e-7); } TEST(Distributions1D_Bifgauss_3) { BifurcatedGauss1D g(0.0, 1.0, 1.0, 2.0, 2.0); double value = simpson_integral(DensityFunctor1D(g), -4.0, 0.0); CHECK_CLOSE(1.0, value, 1.e-8); standard_test(g, 4.0, 1.e-7); IdentityTransform1D it(5.0); TransformedDistribution1D td(it, g); value = simpson_integral(DensityFunctor1D(td), -4.0, 0.0); CHECK_CLOSE(1.0, value, 1.e-8); standard_test(td, 4.0, 1.e-7); } TEST(Distributions1D_uniform) { Uniform1D g(0., 1.); double value = simpson_integral(DensityFunctor1D(g), -0.1, 1.1); CHECK_CLOSE(1.0, value, 1.e-3); standard_test(g, 1, 1.e-7); } TEST(Distributions1D_exp) { Exponential1D g(0., 1.); standard_test(g, 20.0, 1.e-7); g.setScale(3.); standard_test(g, 60.0, 1.e-7); } TEST(Distributions1D_Pareto) { Pareto1D p1(0., 1., 1.5); double value = simpson_integral(DensityFunctor1D(p1), 1, 6); CHECK_CLOSE(0.93195861825602283, value, 1.e-8); invcdf_test(p1, 1.e-10); io_test(p1); } TEST(Distributions1D_MirroredGauss) { { MirroredGauss1D g(0., 1., 0.3, 0.1); const double value = simpson_integral(DensityFunctor1D(g), 0, 1); CHECK_CLOSE(1.0, value, 1.e-12); invcdf_test(g, 1.e-10); io_test(g); for (unsigned i=0; i<100; ++i) { const double y = test_rng(); const double cdfy = simpson_integral(DensityFunctor1D(g), 0, y); CHECK_CLOSE(cdfy, g.cdf(y), 1.0e-10); } } { MirroredGauss1D g(0., 2., 0.7, 0.1); const double value = simpson_integral(DensityFunctor1D(g), 0, 2); CHECK_CLOSE(1.0, value, 1.e-12); invcdf_test(g, 1.e-10); io_test(g); for (unsigned i=0; i<100; ++i) { const double y = 2.0*test_rng(); const double cdfy = simpson_integral(DensityFunctor1D(g), 0, y); CHECK_CLOSE(cdfy, g.cdf(y), 1.0e-10); } } } TEST(Distributions1D_Huber) { Huber1D h1(0., 1., 0.0); double value = simpson_integral(DensityFunctor1D(h1), -6, 6); CHECK_CLOSE(1.0, value, 1.e-8); standard_test(h1, 6, 1.e-7); double tailW = 0.2; double sigma = 0.8; Huber1D h2(0., sigma, tailW); CHECK_EQUAL(tailW, h2.tailWeight()); value = simpson_integral(DensityFunctor1D(h2), -20, 20, 10000); CHECK_CLOSE(1.0, value, 1.e-8); standard_test(h2, 6, 1.e-7); const double a = sigma*h2.tailStart(); value = simpson_integral(DensityFunctor1D(h2), -a, a, 10000); CHECK_CLOSE(1.0-tailW, value, 1.e-8); for (unsigned i=0; i<1000; ++i) { const double rndx = 6*a*(test_rng() - 0.5); const double cdf = h2.cdf(rndx); value = simpson_integral(DensityFunctor1D(h2), -20, rndx, 10000); CHECK_CLOSE(cdf, value, 1.e-8); CHECK_CLOSE(h2.quantile(cdf), rndx, 1.e-8); } } TEST(Distributions1D_Symbeta) { SymmetricBeta1D sb(0.0, 1.0, 3); double value = simpson_integral(DensityFunctor1D(sb), -1, 1); CHECK_CLOSE(1.0, value, 1.e-8); for (unsigned i=0; i<10; ++i) { SymmetricBeta1D sb(0.0, 2.0, i); standard_test(sb, 1.8, 1.e-8); } } TEST(Distributions1D_Moyal) { Moyal1D mo(0.0, 1.0); double value = simpson_integral(DensityFunctor1D(mo), -7, 1000, 100000); CHECK_CLOSE(1.0, value, 1.e-9); CHECK_CLOSE(0.24197072451914335, mo.density(0.0), 1.0e-14); CHECK_CLOSE(0.20131624406488799, mo.density(1.0), 1.0e-14); CHECK_CLOSE(0.31731050786291410, mo.cdf(0.0), 1.0e-12); CHECK_CLOSE(0.54416242936230305, mo.cdf(1.0), 1.0e-12); invcdf_test(mo, 1.e-8); io_test(mo); } TEST(Distributions1D_StudentsT1D) { for (unsigned i=1; i<10; ++i) { StudentsT1D t(0.0, 1.0, i); invcdf_test(t, 1.e-8); } } TEST(Distributions1D_beta) { Beta1D b(0.0, 1.0, 3.0, 4.0); double value = simpson_integral(DensityFunctor1D(b), 0, 1); CHECK_CLOSE(1.0, value, 1.e-8); for (unsigned i=1; i<5; ++i) for (unsigned j=1; j<5; ++j) { Beta1D sb(0.5, 2.0, i, j); standard_test(sb, 0.9, 1.e-3); } } TEST(Distributions1D_gamma) { Gamma1D g1(0., 0.8, 1.0); standard_test(g1, 20.0, 5.e-3); Gamma1D g2(0., 0.9, 2.0); standard_test(g2, 20.0, 2.e-5); Gamma1D g3(0., 1.1, 3.0); standard_test(g3, 20.0, 2.e-5); } TEST(UGaussConvolution1D) { UGaussConvolution1D ug(0, 1, -1, 1); standard_test(ug, 6.0, 1.0e-6); } TEST(Distributions1D_Transformed) { StaticDistributionTransform1DReader::registerClass< LocationScaleTransform1 >(); const double teps = 1.0e-12; Two two; Three three; LocationScaleTransform1 tr(two, three); Gauss1D g(0.0, 1.0); TransformedDistribution1D d1(tr, g); io_test(d1); Gauss1D g1(2.0, 3.0); for (unsigned i=0; i<100; ++i) { const double x = 10.0*(test_rng() - 0.5); CHECK_CLOSE(g1.density(x), d1.density(x), teps); CHECK_CLOSE(g1.cdf(x), d1.cdf(x), teps); CHECK_CLOSE(g1.exceedance(x), d1.exceedance(x), teps); const double y = test_rng(); CHECK_CLOSE(g1.quantile(y), d1.quantile(y), teps); } } TEST(Distributions1D_JohnsonSu) { const double teps = 1.0e-12; JohnsonSu jsu1(0.0, 1.0, 1.0, 10.0); standard_test(jsu1, 6, 1.e-8); AsinhTransform1D tr1(jsu1.getDelta(), jsu1.getLambda(), jsu1.getGamma(), jsu1.getXi()); Gauss1D g(0.0, 1.0); TransformedDistribution1D d1(tr1, g); standard_test(d1, 6, 1.e-8); for (unsigned i=0; i<100; ++i) { const double x = 10.0*(test_rng() - 0.5); CHECK_CLOSE(jsu1.density(x), d1.density(x), teps); CHECK_CLOSE(jsu1.cdf(x), d1.cdf(x), teps); CHECK_CLOSE(jsu1.exceedance(x), d1.exceedance(x), teps); const double y = test_rng(); CHECK_CLOSE(jsu1.quantile(y), d1.quantile(y), teps); } JohnsonSu jsu2(0.1, 1.0, -1.0, 10.0); standard_test(jsu2, 6, 1.e-8); AsinhTransform1D tr2(jsu2.getDelta(), jsu2.getLambda(), jsu2.getGamma(), jsu2.getXi()); TransformedDistribution1D d2(tr2, g); standard_test(d2, 6, 1.e-8); double mean, sigma, skew, kurt; distro1DStats(jsu2, -30.0, 20.0, 500000, &mean, &sigma, &skew, &kurt); CHECK_CLOSE(0.1, mean, 1.0e-5); CHECK_CLOSE(1.0, sigma, 1.0e-4); CHECK_CLOSE(-1.0, skew, 0.01); CHECK_CLOSE(10.0, kurt, 0.2); } TEST(Distributions1D_JohnsonSb) { const double teps = 1.0e-12; JohnsonSb jsb1(0.0, 1.0, 1.0, 2.9); LogRatioTransform1D tr1(jsb1.getDelta(), jsb1.getLambda(), jsb1.getGamma(), jsb1.getXi()); Gauss1D g(0.0, 1.0); TransformedDistribution1D d1(tr1, g); const double lolim = jsb1.getXi(); const double range = jsb1.getLambda(); for (unsigned i=0; i<100; ++i) { const double x = lolim + range*test_rng(); CHECK_CLOSE(jsb1.density(x), d1.density(x), teps); CHECK_CLOSE(jsb1.cdf(x), d1.cdf(x), teps); CHECK_CLOSE(jsb1.exceedance(x), d1.exceedance(x), teps); const double y = test_rng(); CHECK_CLOSE(jsb1.quantile(y), d1.quantile(y), teps); } } TEST(Distributions1D_LogNormal0) { LogNormal g(0., 1., 0.); double value = simpson_integral(DensityFunctor1D(g), -6, 6); CHECK_CLOSE(1.0, value, 1.e-8); standard_test(g, 6, 1.e-6); } TEST(Distributions1D_LogNormal1) { LogNormal ln1(0., 1., 2.0); double value = simpson_integral(DensityFunctor1D(ln1), -6, 20); CHECK_CLOSE(1.0, value, 1.e-6); standard_test(ln1, 7, 2.e-4); } TEST(Distributions1D_LogNormal2) { LogNormal ln2(0., 1., -2.0); double value = simpson_integral(DensityFunctor1D(ln2), -20, 6); CHECK_CLOSE(1.0, value, 1.e-6); standard_test(ln2, 7, 4.e-4); } TEST(Distributions1D_LogNormal3) { LogNormal ln3(-0.5, 1., 2.0); standard_test(ln3, 7, 1.e-3); } TEST(Distributions1D_LogNormal4) { LogNormal ln4(-0.5, 1., -2.0); standard_test(ln4, 7, 1.e-3); } TEST(Distributions1D_LogNormal5) { LogNormal ln5(0.5, 2., 2.0); standard_test(ln5, 7, 1.e-3); } TEST(Distributions1D_LogNormal6) { LogNormal ln6(0.5, 2., -2.0); standard_test(ln6, 7, 1.e-3); } TEST(Distributions1D_LogNormal7) { LogNormal ln7(1., 2., 10.0); standard_test(ln7, 7, 1.e-3); } TEST(Distributions1D_LogNormal8) { LogNormal ln8(1., 2., -10.0); standard_test(ln8, 7, 1.e-3); } TEST(Distributions1D_misc) { Cauchy1D ch(0.0, 0.5); standard_test(ch, 5, 1.e-8); TruncatedGauss1D tg(0.0, 2.0, 3.0); standard_test(tg, 6.0, 1.e-8); Gauss1D gauss(0.0, 2.0); TruncatedDistribution1D td(gauss, -6.0, 6.0); standard_test(td, 6.0, 1.e-8); CHECK_CLOSE(td.density(0.5), tg.density(0.5), 1.0e-14); CHECK_CLOSE(td.cdf(0.5), tg.cdf(0.5), 1.0e-14); CHECK_CLOSE(td.quantile(0.3), tg.quantile(0.3), 1.0e-14); CHECK_CLOSE(td.exceedance(0.3), tg.exceedance(0.3), 1.0e-14); } TEST(EdgeworthSeries1D_io) { const unsigned order = 2; std::vector cumulants(4); for (unsigned i=0; i dummy; double empCum[8]; EdgeworthSeries1D es1(dummy, EDGEWORTH_SEVERINI, order, false); CHECK_CLOSE(0.241970724519143, es1.density(1.0), 1.e-14); CHECK_CLOSE(7.61985302416e-24, es1.cdf(-10.0), 1.e-34); CHECK_CLOSE(7.61985302416e-24, es1.exceedance(10.0), 1.e-34); standard_test(es1, 6, 1.e-8); invexceedance_test(es1, 1.e-8); get_cumulants_severini(es1, empCum); CHECK_CLOSE(1.0, empCum[0], 1.e-14); CHECK_CLOSE(0.0, empCum[1], 1.e-14); CHECK_CLOSE(1.0, empCum[2], 1.e-14); CHECK_CLOSE(0.0, empCum[3], 1.e-14); CHECK_CLOSE(0.0, empCum[4], 1.e-14); EdgeworthSeries1D es2(dummy, EDGEWORTH_SEVERINI, order, true); CHECK_CLOSE(0.241970724519143, es2.density(1.0), 1.e-14); CHECK_CLOSE(7.61985302416e-24, es2.cdf(-10.0), 1.e-34); CHECK_CLOSE(7.61985302416e-24, es2.exceedance(10.0), 1.e-34); standard_test(es2, 6, 1.e-8); invexceedance_test(es2, 1.e-8); get_cumulants_severini(es2, empCum); CHECK_CLOSE(1.0, empCum[0], 1.e-14); CHECK_CLOSE(0.0, empCum[1], 1.e-14); CHECK_CLOSE(1.0, empCum[2], 1.e-14); CHECK_CLOSE(0.0, empCum[3], 1.e-14); CHECK_CLOSE(0.0, empCum[4], 1.e-14); EdgeworthSeries1D es3(dummy, EDGEWORTH_CLASSICAL, order, false); CHECK_CLOSE(0.241970724519143, es3.density(1.0), 1.e-14); CHECK_CLOSE(7.61985302416e-24, es3.cdf(-10.0), 1.e-34); CHECK_CLOSE(7.61985302416e-24, es3.exceedance(10.0), 1.e-34); standard_test(es3, 6, 1.e-8); invexceedance_test(es3, 1.e-8); get_cumulants_classical(es3, empCum); CHECK_CLOSE(1.0, empCum[0], 1.e-14); CHECK_CLOSE(0.0, empCum[1], 1.e-14); CHECK_CLOSE(1.0, empCum[2], 1.e-14); CHECK_CLOSE(0.0, empCum[3], 1.e-14); CHECK_CLOSE(0.0, empCum[4], 1.e-14); EdgeworthSeries1D es4(dummy, EDGEWORTH_CLASSICAL, order, true); CHECK_CLOSE(0.241970724519143, es4.density(1.0), 1.e-14); CHECK_CLOSE(7.61985302416e-24, es4.cdf(-10.0), 1.e-34); CHECK_CLOSE(7.61985302416e-24, es4.exceedance(10.0), 1.e-34); standard_test(es4, 6, 1.e-8); invexceedance_test(es4, 1.e-8); get_cumulants_classical(es4, empCum); CHECK_CLOSE(1.0, empCum[0], 1.e-14); CHECK_CLOSE(0.0, empCum[1], 1.e-14); CHECK_CLOSE(1.0, empCum[2], 1.e-14); CHECK_CLOSE(0.0, empCum[3], 1.e-14); CHECK_CLOSE(0.0, empCum[4], 1.e-14); } TEST(EdgeworthSeries1D_1) { const double range = 5.0; const unsigned ntries = 10; const unsigned order = 1; const double k1 = 3, k2 = 2, k3 = 0.5; double empCum[8]; std::vector c_slr(1); c_slr[0] = k1; std::vector c_gen(3); c_gen[0] = k1; c_gen[1] = k2; c_gen[2] = k3; EdgeworthSeries1D es1(c_slr, EDGEWORTH_SEVERINI, order, true); standard_test(es1, 6, 1.e-8); invexceedance_test(es1, 1.e-8); for (unsigned i=0; i c_slr(2); c_slr[0] = k1; c_slr[1] = k2; std::vector c_gen(4); c_gen[0] = k1; c_gen[1] = k2; c_gen[2] = k3; c_gen[3] = k4; EdgeworthSeries1D es1(c_slr, EDGEWORTH_SEVERINI, order, true); standard_test(es1, 6, 1.e-8); for (unsigned i=0; i c_slr(3); c_slr[0] = k1; c_slr[1] = k2; c_slr[2] = k3; std::vector c_gen(5); c_gen[0] = k1; c_gen[1] = k2; c_gen[2] = k3; c_gen[3] = k4; c_gen[4] = k5; EdgeworthSeries1D es1(c_slr, EDGEWORTH_SEVERINI, order, true); standard_test(es1, 3, 1.e-8); for (unsigned i=0; i c_slr(4); c_slr[0] = k1; c_slr[1] = k2; c_slr[2] = k3; c_slr[3] = k4; std::vector c_gen(6); c_gen[0] = k1; c_gen[1] = k2; c_gen[2] = k3; c_gen[3] = k4; c_gen[4] = k5; c_gen[5] = k6; EdgeworthSeries1D es1(c_slr, EDGEWORTH_SEVERINI, order, true); standard_test(es1, 3, 1.e-8); invexceedance_test(es1, 1.e-8); for (unsigned i=0; i quantiles(nquant); arrayQuantiles1D(data, sizeof(data)/sizeof(data[0]), 0.0, 1.0, &qs[0], &quantiles[0], nquant); BinnedDensity1D t1(location, width, data, sizeof(data)/sizeof(data[0]), 0); for (unsigned i=0; i(); LogLogQuadratic1D q1(1.0, 10.0, 0.5, 1.0); standard_test(q1, 1.0, 1.e-9); invcdf_test(q1, 1.e-14); } TEST(GaussianMixture1D_1) { std::vector entries; entries.push_back(GaussianMixtureEntry(3, 0, 1)); GaussianMixture1D g(0, 1, &entries[0], entries.size()); double value = simpson_integral(DensityFunctor1D(g), -6, 6); CHECK_CLOSE(1.0, value, 1.e-8); standard_test(g, 6, 1.e-7); Gauss1D g2(0, 1); DistributionMix1D mix; mix.add(g2, 3.0); value = simpson_integral(DensityFunctor1D(mix), -6, 6); CHECK_CLOSE(1.0, value, 1.e-8); standard_test(mix, 6, 1.e-7); VerticallyInterpolatedDistribution1D shmix(1U); shmix.add(g2, 3.0); value = simpson_integral(DensityFunctor1D(shmix), -6, 6); CHECK_CLOSE(1.0, value, 1.e-8); standard_test(shmix, 6, 1.e-7, false); for (unsigned i=0; i<100; ++i) { const double r = test_rng(); CHECK_CLOSE(shmix.quantile(r), mix.quantile(r), 1.e-15); } } TEST(GaussianMixture1D_2) { for (unsigned i=0; i<100; ++i) { Gauss1D g(test_rng(), 0.1+test_rng()); GaussianMixture1D mix(g); for (unsigned j=0; j<100; ++j) { const double x = -3 + 6*test_rng(); CHECK_CLOSE(g.density(x), mix.density(x), 1.0e-15); CHECK_CLOSE(g.cdf(x), mix.cdf(x), 1.0e-15); } } } TEST(DistributionMix1D_2) { for (unsigned i=0; i<100; ++i) { Gauss1D g(test_rng(), 0.1+test_rng()); DistributionMix1D mix; VerticallyInterpolatedDistribution1D shmix; mix.add(g, 1); shmix.add(g, 1); for (unsigned j=0; j<100; ++j) { const double x = -3 + 6*test_rng(); CHECK_CLOSE(g.density(x), mix.density(x), 1.0e-15); CHECK_CLOSE(g.cdf(x), mix.cdf(x), 1.0e-15); CHECK_CLOSE(g.density(x), shmix.density(x), 1.0e-15); CHECK_CLOSE(g.cdf(x), shmix.cdf(x), 1.0e-15); } } } TEST(GaussianMixture1D_3) { std::vector entries; entries.push_back(GaussianMixtureEntry(3, 0, 1)); entries.push_back(GaussianMixtureEntry(2, -0.5, 1.5)); entries.push_back(GaussianMixtureEntry(1, 0.5, 0.5)); GaussianMixture1D g(0, 1, &entries[0], entries.size()); double value = simpson_integral(DensityFunctor1D(g), -8, 8); CHECK_CLOSE(1.0, value, 1.e-7); standard_test(g, 4, 1.e-7); } TEST(DistributionMix1D_3) { DistributionMix1D mix; VerticallyInterpolatedDistribution1D shmix(3); Gauss1D g1(0, 1), g2(-0.5, 1.5), g3(0.5, 0.5); mix.add(g1, 3); mix.add(g2, 2); mix.add(g3, 1); shmix.add(g1, 3); shmix.add(g2, 2); shmix.add(g3, 1); double value = simpson_integral(DensityFunctor1D(mix), -8, 8); CHECK_CLOSE(1.0, value, 1.e-7); value = simpson_integral(DensityFunctor1D(shmix), -8, 8); CHECK_CLOSE(1.0, value, 1.e-7); standard_test(mix, 4, 1.e-7); standard_test(shmix, 4, 1.e-7, false); } TEST(GaussianMixture1D_4) { const double m0 = 1.3; const double s0 = 3/2.0; std::vector entries; entries.push_back(GaussianMixtureEntry(3, 0, 1)); entries.push_back(GaussianMixtureEntry(2, -0.5, 1.5)); entries.push_back(GaussianMixtureEntry(1, 0.5, 0.5)); GaussianMixture1D g(m0, s0, &entries[0], entries.size()); // G[x_, m_, s_] := 1/Sqrt[2 Pi]/s Exp[-(x - m)^2/s^2/2] // w1 = 3/6; m1 = 0; s1 = 1 // w2 = 2/6; m2 = -1/2; s2 = 3/2 // w3 = 1/6; m3 = 1/2; s3 = 1/2 // m0 = 1 + 3/10 // s0 = 3/2 // f = (w1*G[(x-m0)/s0,m1,s1]+w2*G[(x-m0)/s0,m2,s2]+w3*G[(x-m0)/s0,m3,s3])/s0 // mean = Integrate[x f, {x, -Infinity, Infinity}] // stdev = Sqrt[Integrate[(x - mean)^2 f, {x, -Infinity, Infinity}]] // q0 = Integrate[f, {x, -Infinity, 5/2}] // N[q0, 20] const double expmean = 47.0/40; const double expstdev = sqrt(203.0)/8.0; const double expcdf = 0.78400933536932041748; CHECK_CLOSE(expmean, g.mean(), 1.0e-15); CHECK_CLOSE(expstdev, g.stdev(), 1.0e-15); CHECK_CLOSE(expcdf, g.cdf(2.5), 1.0e-15); CHECK_CLOSE(2.5, g.quantile(expcdf), 1.0e-12); } TEST(LeftCensoredDistribution) { const double cutoff = 0.1234; const double eps = 1.0e-9; const double infty = -100.0; Gauss1D gauss(0.0, 1.0); TruncatedDistribution1D td(gauss, cutoff, 100.0); const double frac = gauss.exceedance(cutoff); LeftCensoredDistribution lc(td, frac, infty); CHECK_CLOSE(lc.cdf(cutoff-1.0), 1.0-frac, eps); CHECK_CLOSE(lc.exceedance(cutoff-1.0), frac, eps); CHECK_EQUAL(lc.cdf(infty-10.0), 0.0); CHECK_EQUAL(lc.exceedance(infty-10.0), 1.0); for (unsigned i=0; i<1000; ++i) { const double r = cutoff + test_rng()*5.0; CHECK_CLOSE(gauss.density(r), lc.density(r), eps); CHECK_CLOSE(gauss.exceedance(r), lc.exceedance(r), eps); CHECK_CLOSE(gauss.cdf(r), lc.cdf(r), eps); CHECK_CLOSE(lc.quantile(lc.cdf(r)), r, eps); } io_test(lc); } TEST(RightCensoredDistribution) { const double cutoff = 0.1234; const double eps = 1.0e-10; const double infty = 100.0; Gauss1D gauss(0.0, 1.0); TruncatedDistribution1D td(gauss, -100.0, cutoff); const double frac = gauss.cdf(cutoff); RightCensoredDistribution rc(td, frac, infty); CHECK_CLOSE(rc.cdf(cutoff+1.0), frac, eps); CHECK_CLOSE(rc.exceedance(cutoff+1.0), 1.0-frac, eps); CHECK_EQUAL(rc.cdf(infty+10.0), 1.0); CHECK_EQUAL(rc.exceedance(infty+10.0), 0.0); for (unsigned i=0; i<1000; ++i) { const double r = cutoff - test_rng()*5.0; CHECK_CLOSE(gauss.density(r), rc.density(r), eps); CHECK_CLOSE(gauss.exceedance(r), rc.exceedance(r), eps); CHECK_CLOSE(gauss.cdf(r), rc.cdf(r), eps); CHECK_CLOSE(rc.quantile(rc.cdf(r)), r, eps); } io_test(rc); } TEST(RatioOfNormals) { RatioOfNormals r1(-2.0, 1.0, 0.5, 1.0, 0.7); standard_test(r1, 10, 1.e-8); RatioOfNormals r2(1.0, 3.0, 2.0, 1.0, 0.5); standard_test(r2, 10, 1.e-7); const double x = 1.0; const double cdf = r2.cdf(x); const double quantile = r2.quantile(cdf); CHECK_CLOSE(x, quantile, 1.0e-8); } TEST(InterpolatedDistro1D1P) { const double eps = 1.0e-8; const double gridPoints[] = {1.0, 2.0}; double param = 1.3; const unsigned nGridPoints = sizeof(gridPoints)/sizeof(gridPoints[0]); GridAxis ax(std::vector(gridPoints, gridPoints+nGridPoints)); std::vector > distros; distros.push_back(CPP11_shared_ptr(new Gauss1D(1.0, 2.0))); distros.push_back(CPP11_shared_ptr(new Gauss1D(-1.0, 1.0))); InterpolatedDistro1D1P idist(ax, distros, param); standard_test(idist, 10.0, 1.e-7); LinearMapper1d meanmap(gridPoints[0], 1.0, gridPoints[1], -1.0); LinearMapper1d sigmap(gridPoints[0], 2.0, gridPoints[1], 1.0); for (unsigned i=0; i<100; ++i) { param = 1.0 + test_rng(); idist.setParameter(param); Gauss1D g(meanmap(param), sigmap(param)); const double r = test_rng(); const double x = g.quantile(r); CHECK_CLOSE(x, idist.quantile(r), eps); CHECK_CLOSE(g.density(x), idist.density(x), eps); CHECK_CLOSE(g.cdf(x), idist.cdf(x), eps); } } TEST(distro1DStats_1) { const double eps = 1.0e-7; for (unsigned i=0; i<100; ++i) { const double mean = test_rng(); const double sigma = 0.5 + test_rng(); double m, s, skew, kurt; distro1DStats(Gauss1D(mean, sigma), -10., 11., 100000, &m, &s, &skew, &kurt); CHECK_CLOSE(mean, m, eps); CHECK_CLOSE(sigma, s, eps); CHECK_CLOSE(0.0, skew, eps); CHECK_CLOSE(3.0, kurt, eps); } } TEST(DeltaMixture1D_1) { const double eps = 1.0e-12; const unsigned nPoints = 3; double coords[nPoints] = {0., 1., 2.}; double weights[nPoints] = {1., 3., 2.}; DeltaMixture1D dmix(5.0, 1.0, coords, weights, nPoints); io_test(dmix); CHECK_CLOSE(0.6666666666666666, dmix.cdf(6.0), eps); CHECK_CLOSE(0.3333333333333333, dmix.exceedance(6.0), eps); MersenneTwister rng; int counts[nPoints] = {0}; for (unsigned i=0; i<6000; ++i) { double rnd; dmix.random(rng, &rnd); CHECK(rnd == 5.0 || rnd == 6.0 || rnd == 7.0); if (rnd == 5.0) ++counts[0]; if (rnd == 6.0) ++counts[1]; if (rnd == 7.0) ++counts[2]; } CHECK(std::abs(counts[0] - 1000) < 200); CHECK(std::abs(counts[2] - 2000) < 300); CHECK_CLOSE(0.5, dmix.integral(6.0, true, 6.0, true), eps); CHECK_CLOSE(0.0, dmix.integral(6.0, false, 6.0, false), eps); CHECK_CLOSE(0.0, dmix.integral(6.0, false, 6.0, true), eps); CHECK_CLOSE(0.0, dmix.integral(6.0, true, 6.0, false), eps); CHECK_CLOSE(1.0, dmix.integral(5.0, true, 7.0, true), eps); CHECK_CLOSE(0.5, dmix.integral(5.0, false, 7.0, false), eps); CHECK_CLOSE(0.8333333333333334, dmix.integral(5.0, false, 7.0, true), eps); CHECK_CLOSE(0.6666666666666667, dmix.integral(5.0, true, 7.0, false), eps); const double expectedMoments[4] = {1.0, 37/6.0, 77/2.0, 1459/6.0}; long double moments[4]; dmix.moments(0, 3, moments); for (unsigned i=0; i<=3; ++i) CHECK_CLOSE(expectedMoments[i], moments[i], eps); const double expectedCMoments[4] = {1.0, 37/6.0, 17/36.0, -2/27.0}; dmix.centralMoments(3, moments); for (unsigned i=0; i<=3; ++i) CHECK_CLOSE(expectedCMoments[i], moments[i], eps); } + + TEST(DeltaMixture1D_2) + { + const double eps = 1.0e-12; + const unsigned nPoints = 3; + double coords[nPoints] = {0., 1., 2.}; + double weights[nPoints] = {1., 3., 2.}; + DeltaMixture1D dmix(5.0, 2.0, coords, weights, nPoints); + io_test(dmix); + + CHECK_CLOSE(0.6666666666666666, dmix.cdf(7.0), eps); + CHECK_CLOSE(0.3333333333333333, dmix.exceedance(7.0), eps); + + MersenneTwister rng; + int counts[nPoints] = {0}; + for (unsigned i=0; i<6000; ++i) + { + double rnd; + dmix.random(rng, &rnd); + CHECK(rnd == 5.0 || rnd == 7.0 || rnd == 9.0); + if (rnd == 5.0) + ++counts[0]; + if (rnd == 7.0) + ++counts[1]; + if (rnd == 9.0) + ++counts[2]; + } + CHECK(std::abs(counts[0] - 1000) < 200); + CHECK(std::abs(counts[2] - 2000) < 300); + + CHECK_CLOSE(0.5, dmix.integral(7.0, true, 7.0, true), eps); + CHECK_CLOSE(0.0, dmix.integral(7.0, false, 7.0, false), eps); + CHECK_CLOSE(0.0, dmix.integral(7.0, false, 7.0, true), eps); + CHECK_CLOSE(0.0, dmix.integral(7.0, true, 7.0, false), eps); + + CHECK_CLOSE(1.0, dmix.integral(5.0, true, 9.0, true), eps); + CHECK_CLOSE(0.5, dmix.integral(5.0, false, 9.0, false), eps); + CHECK_CLOSE(0.8333333333333334, dmix.integral(5.0, false, 9.0, true), eps); + CHECK_CLOSE(0.6666666666666667, dmix.integral(5.0, true, 9.0, false), eps); + + const double expectedMoments[4] = {1.0, 22/3.0, 167/3.0, 1306/3.0}; + long double moments[4]; + dmix.moments(0, 3, moments); + for (unsigned i=0; i<=3; ++i) + CHECK_CLOSE(expectedMoments[i], moments[i], eps); + + const double expectedCMoments[4] = {1.0, 22/3.0, 17/9.0, -16/27.0}; + dmix.centralMoments(3, moments); + for (unsigned i=0; i<=3; ++i) + CHECK_CLOSE(expectedCMoments[i], moments[i], eps); + } } Index: trunk/npstat/stat/00README.txt =================================================================== --- trunk/npstat/stat/00README.txt (revision 595) +++ trunk/npstat/stat/00README.txt (revision 596) @@ -1,716 +1,716 @@ The code in this directory can depend on headers from the "nm" and "rng" directories in the "npstat" package. The classes implemented in this directory can be split into several subgroups by their purpose: * Data representation * Descriptive statistics * Statistical distributions * Fitting of parametric models * Statistical testing * Local filtering * Nonparametric density estimation * Deconvolution density estimation (Unfolding) * Nonparametric regression * Interpolation of statistical distributions * Miscellaneous data analysis techniques * Convenience API * Utilities * I/O Data representation ------------------- AbsNtuple.hh -- interface class for ntuples. Implemented in InMemoryNtuple.hh, ArchivedNtuple.hh. To be used by application code. NtHistoFill.hh, NtNtupleFill.hh, NtRectangularCut.hh -- convenience classes and functors for use inside "cycleOverRows", "conditionalCycleOverRows", and other similar ntuple methods. Column.hh -- helper class for AbsNtuple.hh. Can be used to refer to ntuple columns either by name or by column number. Used by AbsNtuple.hh, and should not be used separately. ArchivedNtuple.hh -- class for storing huge ntuples which do not fit in the computer memory. To be used by application code. HistoAxis.hh -- representation of a histogram axis with equidistant bins. NUHistoAxis.hh -- representation of a histogram axis with non-uniform bins. DualHistoAxis.hh -- histogram axis which works reasonably well for both uniform and non-uniform bins. convertAxis.hh -- conversion functions between histogram and grid axes. HistoND.hh -- arbitrary-dimensional histogram template. interpolateHistoND.hh -- interpolation of histogram data to coordinate values between bin centers. mergeTwoHistos.hh -- a utility for smooth blending of two histograms. ProductSymmetricBetaNDCdf.hh -- an interpolation function for blending histograms. InMemoryNtuple.hh -- class for storing small ntuples which completely fit in the computer memory. Works faster than ArchivedNtuple. NtupleBuffer.hh -- data buffer for data exchange between ArchivedNtuple and disk-based archive. OrderedPointND.hh -- a multidimensional point which can be sorted according to multiple sorting criteria. StorableMultivariateFunctor.hh -- Base class for storable multivariate functors. StorableHistoNDFunctor.hh -- Adaptation that represents HistoND as a StorableMultivariateFunctor. HistoNDFunctorInstances.hh -- A number of concrete typedefs for StorableHistoNDFunctor template. StorableInterpolationFunctor.hh -- Adaptation that represents LinInterpolatedTableND as a StorableMultivariateFunctor. InterpolationFunctorInstances.hh -- A number of concrete typedefs for StorableInterpolationFunctor template. Descriptive statistics ---------------------- ArrayProjectors.hh -- helper classes for making lower-dimensional projections of the ArrayND class. Calculate various statistics over projected dimensions (mean, median, etc). arrayStats.hh -- means, standard deviations, and covariance matrices of multivariate arrays. skewness and kurtosis for 1-d arrays. statUncertainties.hh -- uncertainties of various sample statistics. histoStats.hh -- means and covariance matrices for histograms. kendallsTau.hh -- Kendall's rank correlation coefficient, from a sample of points or from copula. spearmansRho.hh -- Spearman's rank correlation coefficient, from a sample of points or from copula. logLikelihoodPeak.hh -- summary of a 1-d log-likelihood curve. MultivariateSumAccumulator.hh -- classes in these files are MultivariateSumsqAccumulator.hh intended for calculating MultivariateWeightedSumAccumulator.hh covariance and correlation MultivariateWeightedSumsqAccumulator.hh matrices of multivariate data in a numerically stable manner. These classes can serve as appropriate accumulator functors in various "cycleOverRows" methods of AbsNtuple or as separate tools. SampleAccumulator.hh -- accumulator of items for the purpose of calculating statistical summaries. For use inside histograms, etc. WeightedSampleAccumulator.hh -- similar class for use with weighted items. CircularBuffer.hh -- accumulator of items with fixed length. When this length is exceeded, the oldest items are discarded. Can calculate statistical summaries (mean, stdev) that do not require element sorting. StatAccumulator.hh -- simple, single-pass calculator of mean and standard deviation. Updates a running average, so it works a bit better than a "naive" version. StatAccumulatorPair.hh -- simple, single-pass calculator of mean, standard deviation, and covariance for two variables. StatAccumulatorArr.hh -- simple, single-pass calculator of mean and standard deviation for array sets. CrossCovarianceAccumulator.hh -- single-pass calculator of covariances and correlations for samples with elements represented by arrays. WeightedStatAccumulator.hh -- single-pass calculator of mean and standard deviation for weighted points. WeightedStatAccumulatorPair.hh -- simple, single-pass calculator of mean, standard deviation, and covariance for weighted points. BinSummary.hh -- a five-number summary for SampleAccumulator, StatAccumulator, WeightedSampleAccumulator, and WeightedSampleAccumulator which can be used for making box plots, etc. Allows for direct manipulation of the center value, upper and lower ranges, and min/max value. Statistical distributions ------------------------- AbsDistribution1D.hh -- interface classes for 1-d parametric and tabulated statistical distributions. AbsDiscreteDistribution1D.hh -- interface classes for 1-d discrete statistical distributions. Implemented in DiscreteDistributions1D.hh. AbsDistributionTransform1D.hh -- interface class for 1-d coordinate transforms intended for subsequent use in constructing distributions (in particular, with the TransformedDistribution1D class). AsinhTransform1D.hh -- asinh transform (just like the one used to generate Johnson's S_U curves). SinhAsinhTransform1D.hh -- transform y = sinh(a + b*asinh(x)). LogRatioTransform1D.hh -- log(y/(1-y)) transform with scale and location adjustment (just like the one used to generate Johnson's S_B curves). LogTransform1D.hh -- transform y = log(1 + x). AbsDistributionND.hh -- interface classes for multivariate statistical distributions. CompositeDistribution1D.hh -- represents univariate statistical distributions whose cumulative distribution functions F(x) can be built by composition of two other cumulative distribution functions: F(x) = G(H(x)). CompositeDistributionND.hh -- represents multivariate statistical distributions decomposed into copula and marginals. Copulas.hh -- distributions which are copulas. CompositeDistros1D.hh -- several implementations of CompositeDistribution1D.hh using concrete distributions. -DeltaSequence1D.hh -- a mixture of Dirac delta functions. +DeltaMixture1D.hh -- a mixture of Dirac delta functions. Distributions1D.hh -- a number of continuous 1-d statistical distributions. DistributionsND.hh -- a number of continuous multivariate statistical distributions. DiscreteDistributions1D.hh -- several discrete statistical distributions, including Poisson and tabulated. DistributionMix1D.hh -- Mixtures of one-dimensional statistical distributions. distro1DStats.hh -- a utility for empirical calculation of distribution mean, standard deviation, skewness, and kurtosis. EdgeworthSeries1D.hh -- Edgeworth series. EdgeworthSeriesMethod.hh -- Helper class for defining Edgeworth series. GaussianMixture1D.hh -- One-dimensional Gaussian mixtures. GridRandomizer.hh -- class which knows how to map the unit hypercube into a distribution whose density is represented on an n-d grid (and, therefore, how to generate corresponding random numbers). Not intended for direct use by application code (use class "BinnedDensityND" instead). IdentityTransform1D.hh -- Identity coordinate transformation. LocationScaleFamily1D.hh -- Creates a location-scale family from a (typically non-scalable) 1-d distribution. LocationScaleTransform1.hh -- Coordinate transformation of the type y = (x - mu)/sigma, with mu and sigma depending on a single parameter and calculated by two separate functors. JohnsonCurves.hh -- Johnson frequency curves. RatioOfNormals.hh -- Distribution generated when one Gaussian variable is divided by another. SbMomentsCalculator.hh -- Calculator of moments for S_b curves. Not for use by application code. ScalableGaussND.hh -- multivariate Gaussian with diagonal covariance matrix. TransformedDistribution1D.hh -- 1-d distributions in x but whose density, cdf, etc are specified in the y (transformed) space. TruncatedDistribution1D.hh -- 1-d distributions with truncated support. LeftCensoredDistribution.hh -- left-censored distribution. RightCensoredDistribution.hh -- right-censored distribution. UGaussConvolution1D.hh -- convolution of uniform and Gaussian distributions. Fitting of parametric models ---------------------------- FitUtils.hh -- fitting of 1-d histograms. See also headers minuitFitJohnsonCurves.hh and MinuitDensityFitFcn1D.hh in the "interfaces" directory. Statistical testing ------------------- AbsBinnedComparison1D.hh -- interface for comparisons of binned distributions BinnedADTest1D.hh -- binned version of the Anderson-Darling test BinnedKSTest1D.hh -- binned version of the Kolmogorov-Smirnov test PearsonsChiSquared.hh -- chi-squared goodness-of-fit test Local filtering --------------- AbsFilter1DBuilder.hh -- interface classes for building local polynomial filters in 1d. Implemented in Filter1DBuilders.hh, BetaFilter1DBuilder.hh, WeightTableFilter1DBuilder.hh, BernsteinFilter1DBuilder.hh. Used by LocalPolyFilter1D.hh. AbsPolyFilter1D.hh -- interface class for building univariate smoothers that can be optimized by cross-validation. Implemented in ConstantBandwidthSmoother1D.hh, LocalPolyFilter1D.hh. AbsPolyFilterND.hh -- interface class for building multivariate smoothers that can be optimized by cross-validation. Implemented in LocalPolyFilterND.hh, SequentialPolyFilterND.hh, and KDEFilterND.hh. Used by AbsBandwidthCV.hh, BandwidthCVLeastSquaresND.hh, BandwidthCVPseudoLogliND.hh. AbsSymbetaFilterProvider.hh -- interface class for building local polynomial filters in 1d using kernels from the symmetric beta family. MemoizingSymbetaFilterProvider.hh -- implements AbsSymbetaFilterProvider interface and allows for filter storage and lookup. BernsteinFilter1DBuilder.hh -- concrete class for building filters which smooth densities using Bernstein polynomials. BetaFilter1DBuilder.hh -- concrete class for building filters which smooth densities using beta functions (Bernstein polynomials of non-integer degree). betaKernelsBandwidth.hh -- optimal bandwidth for density estimation with beta kernels. BoundaryHandling.hh -- user API for handling LOrPE boundary methods. BoundaryMethod.hh -- enums for handling LOrPE boundary methods. continuousDegreeTaper.hh -- a method for generating tapers with effectively continuous degree. Intended for use with LocalPolyFilter1D. Filter1DBuilders.hh -- concrete classes for building local polynomial filters in 1d. They differ by their treatment of the weight function and boundary effects. WeightTableFilter1DBuilder.hh -- concrete classes for building local polynomial filters in 1d from density scans. KDEFilterND.hh -- KDE filtering (Nadaraya-Watson regression) on a regularly spaced 1-d grid. LocalPolyFilter1D.hh -- local polynomial filtering (regression) on a regularly spaced 1-d grid. LocalPolyFilterND.hh -- local polynomial filtering (regression) on a regularly spaced multivariate grid. LocalMultiFilter1D.hh -- local polynomial filtering with separate polynomials from an orthogonal set. LocalQuadraticLeastSquaresND.hh -- local quadratic polynomial filtering for an irregular set of points (possibly including uncertainties). lorpeSmooth1D.hh -- high level driver for LocalPolyFilter1D, etc. Intended for density reconstruction from histograms. lorpeBackground1D.hh -- high level driver for fitting mixed models in which signal is parameterized and background is nonparametric. lorpeBackgroundCVDensity1D.hh -- linearization of cross-validation calculations for fitting mixed models with nonparametric background. QuadraticOrthoPolyND.hh -- local quadratic polynomial filtering on a grid. In comparison with LocalPolyFilterND.hh, supports a finer interface to filtering functionality (direct support of an AbsDistributionND as a weight, calculations of gradient and hessian for the fitted surface, fitting is performed on functors rather than ArrayND objects, etc). Used by LocalLogisticRegression.hh. SequentialPolyFilterND.hh -- similar to LocalPolyFilterND.hh, but the filtering is performed sequentially for each dimension using 1-d filters. SymbetaPolyCollection1D.hh -- class that builds LocalPolyFilter1D objects and memoizes local polynomials for the bandwidth values used. Nonparametric density estimation -------------------------------- AbsBandwidthCV.hh -- interface classes for calculating 1-d and multivariate cross-validation criteria for bandwidth and taper selection. Interfaces declared in this file are implemented in BandwidthCVLeastSquares1D.hh, BandwidthCVLeastSquaresND.hh, BandwidthCVPseudoLogli1D.hh, and BandwidthCVPseudoLogliND.hh. These interfaces are used by classes in CVCopulaSmoother.hh. AbsBandwidthGCV.hh -- interface classes for calculating 1-d and multivariate cross-validation criteria for bandwidth and taper selection. Interfaces declared in this file are implemented in BandwidthGCVLeastSquares1D.hh, BandwidthGCVLeastSquaresND.hh, BandwidthGCVPseudoLogli1D.hh, and BandwidthGCVPseudoLogliND.hh. These interfaces are used by classes in GCVCopulaSmoother.hh. The difference with the series of classes defined in "AbsBandwidthCV.hh" is that the grouping (i.e., the binning) of data is explicitly acknowledged, so that a substantially different set of filters (removing the whole group) can be used for cross-validation. AbsCompositeDistroBuilder.hh -- interface class for building composite distrubutions (which consist of copula and marginals) out of data samples. Implemented in DummyCompositeDistroBuilder.hh and NonparametricCompositeBuilder.hh. AbsDistro1DBuilder.hh -- interface class for building 1-d distributions out of data samples. Implemented in DummyDistro1DBuilder.hh and NonparametricDistro1DBuilder.hh. AbsCopulaSmootherBase.hh -- interface class for building copulas out of data samples. Implemented in AbsCVCopulaSmoother.hh. Used by NonparametricCompositeBuilder.hh. AbsKDE1DKernel.hh -- interface class for simple, brute-force KDE in 1-d without discretization or boundary correction. Implemented in KDE1DHOSymbetaKernel.hh. AbsMarginalSmootherBase.hh -- interface class for building 1-d smoothers of histograms. Implemented in JohnsonKDESmoother.hh, LOrPEMarginalSmoother.hh, ConstantBandwidthSmoother1D.hh, and VariableBandwidthSmoother1D.hh. Used by NonparametricCompositeBuilder.hh. AbsResponseIntervalBuilder.hh -- interface class for making cuts in the inivariate response space when density estimation is performed in the regression context. Implemented in DummyResponseIntervalBuilder.hh and RatioResponseIntervalBuilder.hh. AbsResponseBoxBuilder.hh -- interface class for making cuts in the multivariate response space when density estimation is performed in the regression context. Implemented in DummyResponseBoxBuilder.hh and RatioResponseBoxBuilder.hh. amiseOptimalBandwidth.hh -- function for selecting optimal LOrPE bandwidth values by optimizing AMISE on a reference distribution. Used in JohnsonKDESmoother.cc and ConstantBandwidthSmoother1D.cc. Can also be used by application code. BandwidthCVLeastSquares1D.hh -- class for calculating KDE or LOrPE cross-validation MISE approximations for 1-d density estimates. BandwidthCVLeastSquaresND.hh -- class for calculating KDE or LOrPE cross-validation MISE approximations for multivariate density estimates. BandwidthCVPseudoLogli1D.hh -- class for calculating KDE or LOrPE cross-validation pseudo log likelihood, for 1-d density estimates. BandwidthCVPseudoLogliND.hh -- Class for calculating KDE or LOrPE cross-validation pseudo log likelihood, for multivariate density estimates. buildInterpolatedCompositeDistroND.hh -- Multivariate density estimation in the regression context, with interpolation. buildInterpolatedDistro1DNP.hh -- Univariate density estimation in the regression context, with interpolation. ConstantBandwidthSmoother1D.hh -- 1-d KDE implementation with constant bandwidth. Implements AbsMarginalSmoother interface. ConstantBandwidthSmootherND.hh -- multivariate KDE implementation with constant bandwidth. CVCopulaSmoother.hh -- an interface to copula smoothers which use constant bandwidth LOrPE and select bandwidth by cross-validation. Implemented in LOrPECopulaSmoother.hh, SequentialCopulaSmoother.hh, KDECopulaSmoother.hh, and BernsteinCopulaSmoother.hh. Could be used by application code if it needs to develop its own cross-validation method for nonparametric copula estimation. GCVCopulaSmoother.hh -- an interface to copula smoothers working with grouped data and using substantially different filters for cross-validation. Implemented in KDEGroupedCopulaSmoother.hh, LOrPEGroupedCopulaSmoother.hh, and SequentialGroupedCopulaSmoother.hh. empiricalCopula.hh -- functions for building empirical copulas by constructing kd-tree for the data points and then doing lookups in this tree. empiricalCopulaHisto.hh -- function for building empirical copula densities by ordering the data points in multiple dimensions. weightedCopulaHisto.hh -- function for building empirical copula densities by ordering weighted data points in multiple dimensions. HistoNDCdf.hh -- multivariate cumulative distribution function built from a histogram. Its "coveringBox" method can be used to make k-NN type density estimates (and for other purposes). JohnsonKDESmoother.hh -- 1-d KDE implementation with adaptive bandwidth (see comments in the header file for details). Implements AbsMarginalSmoother interface. See also "fitCompositeJohnson.hh" header in the "interfaces" directory for an alternative approach. KDE1D.hh -- Convenience class which aggregates the kernel and the data for brute-force 1-d KDE without boundary correction. KDE1DCV.hh -- Cross-validation utilities for brute-force KDE in 1-d. KDE1DHOSymbetaKernel.hh -- high order Gaussian or symmetric beta kernels for brute-force KDE in 1-d. KDECopulaSmoother.hh -- constant bandwidth multivariate KDE copula smoother in which the bandwidth is selected by cross-validation. Implements CVCopulaSmoother. LOrPECopulaSmoother.hh -- constant bandwidth multivariate LOrPE copula smoother in which the bandwidth is selected by cross-validation. Implements CVCopulaSmoother. LOrPEMarginalSmoother.hh -- 1-d LOrPE for fitting margins of composite distributions. Basically, interfaces "lorpeSmooth1D" to AbsMarginalSmoother. lorpeMise1D.hh -- Deterministic MISE calculation for reconstructing an arbitrary 1-d density. NonparametricCompositeBuilder.hh -- an implementation of AbsCompositeDistroBuilder. Uses separate density estimation procedures for copula and marginals. orthoPoly1DVProducts.hh -- utility functions for calculating certain statistical properties of 1-d orthogonal polynomials. OSDE1D.hh -- orthogonal series density estimation in one dimension. PolyFilterCollection1D.hh -- collection of LocalPolyFilter1D objects with the same kernel but different bandwidth values. Intended for use with bandwidth scans (for example, in cross-validation scenarios). QuantileTable1D.hh -- density function defined by its quantile table. Can be easily constructed using "empiricalQuantile" function from StatUtils.hh. SequentialCopulaSmoother.hh -- similar to LOrPECopulaSmoother, but the filters are limited to tensor products of univariate filters. variableBandwidthSmooth1D.hh -- KDE with adaptive bandwidth. Used by JohnsonKDESmoother.hh. Deconvolution density estimation (Unfolding) -------------------------------------------- AbsUnfold1D.hh -- interface class for deconvolution density estimation in 1-d (a.k.a. unfolding). AbsUnfoldND.hh -- interface class for multivariate deconvolution density estimation (a.k.a. unfolding). AbsUnfoldingFilterND.hh -- interface class for smoothers used in multivariate unfolding. gaussianResponseMatrix.hh -- helper function for building response matrices for one-dimensional unfolding problems. MultiscaleEMUnfold1D.hh -- a variation of 1-d unfolding algorithm with multiscale filtering and, potentially, faster convergence. productResponseMatrix.hh -- helper function for building sparse response matrices for multivariate unfolding problems. ResponseMatrix.hh -- sparse response matrix representation for multivariate unfolding. SmoothedEMUnfold1D.hh -- expectation-maximization (a.k.a. D'Agostini) unfolding with smoothing for 1-d distributions. SmoothedEMUnfoldND.hh -- expectation-maximization unfolding with smoothing for multivariate distributions. UnfoldingBandwidthScanner1D.hh -- class which gets various information from 1-d unfolding results in a convenient form. UnfoldingBandwidthScannerND.hh -- class which gets various information from multivariate unfolding results in a convenient form. Nonparametric regression ------------------------ LocalLogisticRegression.hh -- facilities for performing local linear and quadratic logistic regression. The interface is designed for use together with Minuit. See also the header "minuitLocalRegression.hh" in the "interfaces" directory. QuantileRegression1D.hh -- nonparametric quantile regression with one predictor. Supports polynomials of arbitrary degrees. Useful for constructing Neyman belts. See also "minuitLocalQuantileRegression1D.hh" header in the "interfaces" directory. LocalQuantileRegression.hh -- multivariate local linear or quadratic quantile regression. Can be used to fit histograms or collections of points. See also "minuitQuantileRegression.hh" header in the "interfaces" directory. CensoredQuantileRegression.hh -- multivariate local linear or quadratic quantile regression which can be used for data samples affected by a one-sided cut. griddedRobustRegression.hh -- framework for local robust regression (in particular, for local least trimmed squares). GriddedRobustRegressionStop.hh -- simple functor for stopping robust regression sequence. AbsLossCalculator.hh -- abstract class for calculating local loss for local robust regression. Implemented in "WeightedLTSLoss.hh" and "TwoPointsLTSLoss.hh". WeightedLTSLoss.hh -- functor for calculating local least trimmed squares with one point removed. TwoPointsLTSLoss.hh -- functor for calculating local least trimmed squares with two points or 1-d stripes removed. Interpolation of statistical distributions ------------------------------------------ AbsGridInterpolatedDistribution.hh -- interface class for interpolating between probability distributions placed at the points of a rectangular parameter grid. Implemented in GridInterpolatedDistribution.hh. To be used by application code. AbsInterpolatedDistribution1D.hh -- interface class for univariate density interpolation algorithms. Implemented by InterpolatedDistribution1D.hh and VerticallyInterpolatedDistribution1D.hh. AbsInterpolationAlgoND.hh -- interface class for multivariate density interpolation algorithms. Implemented by CopulaInterpolationND.hh and UnitMapInterpolationND.hh. Used by GridInterpolatedDistribution.hh. CopulaInterpolationND.hh -- interpolation of distributions represented by CompositeDistributionND. Copulas and quantile functions of the marginals are combined with externally provided weights. UnitMapInterpolationND.hh -- interpolation of distributions mapped to the unit cube by conditional quantile functions. GridInterpolatedDistribution.hh -- class which represents a complete multivariate distribution interpolated in parameters. Constructed incrementally, by adding distributions to the grid points. InterpolatedDistribution1D.hh -- 1-d continuous statistical distribution which interpolates between other distributions by performing linear interpolation of the quantile function. InterpolatedDistro1D1P.hh -- 1-d continuous statistical distribution interpolation on a 1-d parameter grid, with linear interpolation of weights between parameter values. Supports both interpolation of quantile functions and vertical interpolation. InterpolatedDistro1DNP.hh -- 1-d continuous statistical distribution interpolation on a multivariate parameter grid, with multilinear interpolation of weights between parameter values. Supports both interpolation of quantile functions and vertical interpolation. UnitMapInterpolationND.hh -- this class interpolates between multivariate distributions by interpolating between their unit maps. Miscellaneous data analysis techniques -------------------------------------- neymanPearsonWindow1D.hh -- search for likelihood ratio maximum and determination of optimal cuts in 1-d based on likelihood ratio between signal and background. Convenience API --------------- DensityScan1D.hh -- utility class for discretizing 1-d densities. DensityScanND.hh -- functor for filling multidimensional arrays with multivariate density scans. Calculates the density in the bin center. discretizationErrorND.hh -- function for calculating the ISE due to discretization of multivariate densities. DensityAveScanND.hh -- functor for filling multidimensional arrays with multivariate density scans. Integrates the density over the bin area. Distribution1DFactory.hh -- creation of a number of 1-d distributions from a uniform interface. scanDensityAsWeight.hh -- determines density support and scans a multivariate density in a manner suitable for subsequent construction of orthogonal polynomial systems. Utilities --------- buildInterpolatedHelpers.hh -- utilities for nonparametric interpolation of statistical distributions. Not for use by application code. histoUtils.hh -- utilities related to special ways of filling histograms, etc. mirrorWeight.hh -- helper function for scanning multivariate densities. Used by LocalPolyFilterND and KDEFilterND codes. multinomialCovariance1D.hh -- helper function for building multinomial distribution covariance matrices. NMCombinationSequencer.hh -- helper class for a certain type of integer permutations (distinct ways of choosing M out of N objects). StatUtils.hh -- a few useful functions which did not fit naturally anywhere else. SymbetaParams1D.hh -- collects the parameters of symmetric beta kernels. volumeDensityFromBinnedRadial.hh -- convert a density which was obtained from a histogram of radius values into the density per unit area (or per unit volume or hypervolume). WeightedDistro1DPtr.hh -- associates a pointer to AbsDistribution1D with a weight. Not for use by application code. I/O --- Distribution1DReader.hh -- factory for deserializing 1-d distribution functions. DistributionNDReader.hh -- factory for deserializing N-d distribution functions. distributionReadError.hh -- this code throws an appropriate exception if input I/O operations fail for a distribution previously stored on disk. DiscreteDistribution1DReader.hh -- factory for deserializing 1-d discrete distributions. DistributionTransform1DReader.hh -- factory for deserializing 1-d transforms. fillHistoFromText.hh -- utility for filling histograms from text files similar utility for ntuples in declared in the AbsNtuple.hh header). LocalPolyFilter1DReader.hh -- a reader factory for classes derived from LocalPolyFilter1D. NtupleRecordTypes.hh -- mechanisms for locating parts of the ArchivedNtuple NtupleRecordTypesFwd.hh in the archive. Not for use by application code. NtupleReference.hh -- special reference type for ArchivedNtuple. StorableMultivariateFunctorReader.hh -- factory for deserializing for storable multivariate functors. UnfoldingFilterNDReader.hh -- reader factory for classes derived from AbsUnfoldingFilterND.