Index: trunk/npstat/stat/UnbinnedGOFTests1D.icc =================================================================== --- trunk/npstat/stat/UnbinnedGOFTests1D.icc (revision 742) +++ trunk/npstat/stat/UnbinnedGOFTests1D.icc (revision 743) @@ -1,82 +1,157 @@ #include #include #include #include #include #include #include #define npstat_UnbinnedGOFTests1D_get_data_sorted /**/ \ assert(data); \ const Numeric* sorted = data; \ std::unique_ptr > sortedPtr; \ if (!dataIsSorted) \ { \ sortedPtr = std::unique_ptr >( \ new std::vector(data, data + lenData)); \ std::sort(sortedPtr->begin(), sortedPtr->end()); \ sorted = &(*sortedPtr)[0]; \ } namespace npstat { template double KSTest1D::testStat(const Numeric* data, const unsigned long lenData, const bool dataIsSorted) const { if (!lenData) throw std::invalid_argument( "In npstat::KSTest1D::testStat: no data provided"); npstat_UnbinnedGOFTests1D_get_data_sorted; double maxDist = -1.0; const double n = static_cast(lenData); for (unsigned long i = 0; icdf(sorted[i]); maxDist = std::max(maxDist, std::abs(i/n - cdf)); maxDist = std::max(maxDist, std::abs((i + 1UL)/n - cdf)); } return maxDist; } template double ADTest1D::testStat(const Numeric* data, const unsigned long lenData, const bool dataIsSorted) const { if (!lenData) throw std::invalid_argument( "In npstat::ADTest1D::testStat: no data provided"); npstat_UnbinnedGOFTests1D_get_data_sorted; long double sum = 0.0L; for (unsigned long i = 0; icdf(sorted[i]); const long double cdf2 = distro_->cdf(sorted[lenData - 1UL - i]); if (cdf1 == 0.0L || cdf1 == 1.0L || cdf2 == 0.0L || cdf2 == 1.0L) return DBL_MAX; sum += (2UL*i + 1UL)*logl(cdf1*(1.0L - cdf2)); } const long double len = static_cast(lenData); return -sum/len - len; } template double CvMTest1D::testStat(const Numeric* data, const unsigned long lenData, const bool dataIsSorted) const { if (!lenData) throw std::invalid_argument( "In npstat::CvMTest1D::testStat: no data provided"); npstat_UnbinnedGOFTests1D_get_data_sorted; const long double twon = 2.0L*static_cast(lenData); long double sum = 0.0L; for (unsigned long i = 0; icdf(sorted[i]); sum += del*del; } return 1.0L/6.0L/twon + sum; } + + template + double ZhangZKTest1D::testStat( + const Numeric* data, const unsigned long lenData, + const bool dataIsSorted) const + { + if (!lenData) throw std::invalid_argument( + "In npstat::ZhangZKTest1D::testStat: no data provided"); + + npstat_UnbinnedGOFTests1D_get_data_sorted; + + double maxval = -DBL_MAX; + for (unsigned long i = 1; i<=lenData; ++i) + { + const double im12 = i - 0.5; + const double nmi12 = lenData - i + 0.5; + const double cdf = distro_->cdf(sorted[i-1]); + const double exc = distro_->exceedance(sorted[i-1]); + if (cdf <= 0.0 || exc <= 0.0) + return DBL_MAX; + const double value = im12*std::log(im12/(lenData*cdf)) + + nmi12*std::log(nmi12/(lenData*exc)); + if (value > maxval) + maxval = value; + } + return maxval; + } + + template + double ZhangZATest1D::testStat( + const Numeric* data, const unsigned long lenData, + const bool dataIsSorted) const + { + if (!lenData) throw std::invalid_argument( + "In npstat::ZhangZATest1D::testStat: no data provided"); + + npstat_UnbinnedGOFTests1D_get_data_sorted; + + long double sum = 0.0L; + for (unsigned long i = 1; i<=lenData; ++i) + { + const double im12 = i - 0.5; + const double nmi12 = lenData - i + 0.5; + const double cdf = distro_->cdf(sorted[i-1]); + const double exc = distro_->exceedance(sorted[i-1]); + if (cdf <= 0.0 || exc <= 0.0) + return DBL_MAX; + const double value = std::log(cdf)/nmi12 + std::log(exc)/im12; + sum += value; + } + return -sum; + } + + template + double ZhangZCTest1D::testStat( + const Numeric* data, const unsigned long lenData, + const bool dataIsSorted) const + { + if (!lenData) throw std::invalid_argument( + "In npstat::ZhangZCTest1D::testStat: no data provided"); + + npstat_UnbinnedGOFTests1D_get_data_sorted; + + long double sum = 0.0L; + for (unsigned long i = 1; i<=lenData; ++i) + { + const double cdf = distro_->cdf(sorted[i-1]); + if (cdf <= 0.0) + return DBL_MAX; + const double denom = (lenData - 0.5)/(i - 0.75) - 1.0; + const double value = log((1.0/cdf - 1.0)/denom); + sum += value*value; + } + return sum; + } } Index: trunk/npstat/stat/UnbinnedGOFTests1D.hh =================================================================== --- trunk/npstat/stat/UnbinnedGOFTests1D.hh (revision 742) +++ trunk/npstat/stat/UnbinnedGOFTests1D.hh (revision 743) @@ -1,97 +1,177 @@ #ifndef NPSTAT_UNBINNEDGOFTESTS1D_HH_ #define NPSTAT_UNBINNEDGOFTESTS1D_HH_ #include "npstat/stat/AbsUnbinnedGOFTest1D.hh" namespace npstat { /** Kolmogorov-Smirnov test */ class KSTest1D : public AbsUnbinnedGOFTest1D { public: inline explicit KSTest1D(const AbsDistribution1D& d) : AbsUnbinnedGOFTest1D(d) {} inline virtual ~KSTest1D() {} inline virtual std::string shortName() const {return "KS";} inline virtual double testStatistic( const double* data, const unsigned long sz, const bool b) const {return testStat(data, sz, b);} inline virtual double testStatistic( const float* data, const unsigned long sz, const bool b) const {return testStat(data, sz, b);} inline virtual bool hasAnalyticPValue() const {return true;} virtual double analyticPValue(double stat, unsigned long sz) const; private: template double testStat(const Numeric* data, unsigned long lenData, bool dataIsSorted) const; }; /** Anderson-Darling test */ class ADTest1D : public AbsUnbinnedGOFTest1D { public: inline explicit ADTest1D(const AbsDistribution1D& d) : AbsUnbinnedGOFTest1D(d) {} inline virtual ~ADTest1D() {} inline virtual std::string shortName() const {return "AD";} inline virtual double testStatistic( const double* data, const unsigned long sz, const bool b) const {return testStat(data, sz, b);} inline virtual double testStatistic( const float* data, const unsigned long sz, const bool b) const {return testStat(data, sz, b);} inline virtual bool hasAnalyticPValue() const {return true;} virtual double analyticPValue(double stat, unsigned long sz) const; private: template double testStat(const Numeric* data, unsigned long lenData, bool dataIsSorted) const; }; /** Cramer-von Mises test */ class CvMTest1D : public AbsUnbinnedGOFTest1D { public: inline explicit CvMTest1D(const AbsDistribution1D& d) : AbsUnbinnedGOFTest1D(d) {} inline virtual ~CvMTest1D() {} inline virtual std::string shortName() const {return "CvM";} inline virtual double testStatistic( const double* data, const unsigned long sz, const bool b) const {return testStat(data, sz, b);} inline virtual double testStatistic( const float* data, const unsigned long sz, const bool b) const {return testStat(data, sz, b);} inline virtual bool hasAnalyticPValue() const {return true;} virtual double analyticPValue(double stat, unsigned long sz) const; private: template double testStat(const Numeric* data, unsigned long lenData, bool dataIsSorted) const; }; + + /** + // Jin Zhang's ZK test. For this and other Jin Zhang's tests, see + // Jin Zhang, "Powerful Goodness-of-Fit Tests Based on the Likelihood + // Ratio", Journal of the Royal Statistical Society. Series B + // (Statistical Methodology), Vol. 64, No. 2 (2002), pp. 281-294. + */ + class ZhangZKTest1D : public AbsUnbinnedGOFTest1D + { + public: + inline explicit ZhangZKTest1D(const AbsDistribution1D& d) + : AbsUnbinnedGOFTest1D(d) {} + + inline virtual ~ZhangZKTest1D() {} + + inline virtual std::string shortName() const {return "ZK";} + + inline virtual double testStatistic( + const double* data, const unsigned long sz, const bool b) const + {return testStat(data, sz, b);} + + inline virtual double testStatistic( + const float* data, const unsigned long sz, const bool b) const + {return testStat(data, sz, b);} + + private: + template + double testStat(const Numeric* data, unsigned long lenData, + bool dataIsSorted) const; + }; + + /** Jin Zhang's ZA test */ + class ZhangZATest1D : public AbsUnbinnedGOFTest1D + { + public: + inline explicit ZhangZATest1D(const AbsDistribution1D& d) + : AbsUnbinnedGOFTest1D(d) {} + + inline virtual ~ZhangZATest1D() {} + + inline virtual std::string shortName() const {return "ZA";} + + inline virtual double testStatistic( + const double* data, const unsigned long sz, const bool b) const + {return testStat(data, sz, b);} + + inline virtual double testStatistic( + const float* data, const unsigned long sz, const bool b) const + {return testStat(data, sz, b);} + + private: + template + double testStat(const Numeric* data, unsigned long lenData, + bool dataIsSorted) const; + }; + + /** Jin Zhang's ZC test */ + class ZhangZCTest1D : public AbsUnbinnedGOFTest1D + { + public: + inline explicit ZhangZCTest1D(const AbsDistribution1D& d) + : AbsUnbinnedGOFTest1D(d) {} + + inline virtual ~ZhangZCTest1D() {} + + inline virtual std::string shortName() const {return "ZC";} + + inline virtual double testStatistic( + const double* data, const unsigned long sz, const bool b) const + {return testStat(data, sz, b);} + + inline virtual double testStatistic( + const float* data, const unsigned long sz, const bool b) const + {return testStat(data, sz, b);} + + private: + template + double testStat(const Numeric* data, unsigned long lenData, + bool dataIsSorted) const; + }; } #include "npstat/stat/UnbinnedGOFTests1D.icc" #endif // NPSTAT_UNBINNEDGOFTESTS1D_HH_