diff --git a/include/HEJ/detail/Unweighter_impl.hh b/include/HEJ/detail/Unweighter_impl.hh index dce85da..feeb97c 100644 --- a/include/HEJ/detail/Unweighter_impl.hh +++ b/include/HEJ/detail/Unweighter_impl.hh @@ -1,137 +1,147 @@ /** \file * \brief Unweighter Class Implementation for template functions * * \authors The HEJ collaboration (see AUTHORS for details) * \date 2019 * \copyright GPLv2 or later */ #pragma once #include "HEJ/Unweighter.hh" #include <limits> #include <utility> #include <cstdint> #include "HEJ/Event.hh" #include "HEJ/RNG.hh" namespace HEJ { namespace detail { // primitive histogram class // we don't want to do much with this, // so try to avoid an external dependence like YODA or ROOT class Histogram { public: Histogram(size_t nbins, double min, double max): bins_(nbins), min_{min}, width_{(max-min)/nbins} {} - void fill(double pos, double val) { + size_t bin_idx(double pos) { // needs intermediate cast since negative double -> unsigned // is *undefined behaviour* - const size_t bin = static_cast<int>((pos - min_)/width_); + return static_cast<int>((pos - min_)/width_); + } + + void fill(double pos, double val) { + size_t const bin = bin_idx(pos); if(bin >= bins_.size()) return; bins_[bin] += val; } double operator[](size_t i) const { return bins_[i]; } double & operator[](size_t i) { return bins_[i]; } std::pair<double, double> peak_bin() const { const auto max = std::max_element(begin(bins_), end(bins_)); if(max == end(bins_)) { static constexpr double nan = std::numeric_limits<double>::quiet_NaN(); return std::make_pair(nan, nan); } const size_t nbin = std::distance(begin(bins_), max); return std::make_pair(min_ + (nbin + 0.5)*width_, *max); } private: std::vector<double> bins_; double min_, width_; }; template<class ConstIt> std::pair<double, double> awt_range(ConstIt begin, ConstIt end) { // can't use std::minmax_element // because boost filter iterators are not assignable assert(begin != end); double min = std::numeric_limits<double>::infinity(); double max = 0.; for(auto it = begin; it != end; ++it) { const double awt = std::abs(it->central().weight); if(awt == 0) continue; if(awt < min) { min = awt; } else if (awt > max) { max = awt; } } return std::make_pair(min, max); } } template<class ConstIt> void Unweighter::set_cut_to_peakwt(ConstIt begin, ConstIt end, double max_dev){ if(begin == end) { throw std::invalid_argument{"Empty range of events"}; } double err = 0.; double awt_sum = 0.; const auto extremal_awts = detail::awt_range(begin, end); - const double min_lwt = std::log(extremal_awts.first); - const double max_lwt = std::log(extremal_awts.second); + // Set range minimaly higher to include min/max inside range + const double min_lwt = 0.99999*std::log(extremal_awts.first); + const double max_lwt = 1.00001*std::log(extremal_awts.second); const size_t nevents = std::distance(begin, end); // heuristic value for number of bins const size_t nbins = std::sqrt(nevents); detail::Histogram wtwt{nbins, min_lwt, max_lwt}; detail::Histogram nwt{nbins, min_lwt, max_lwt}; - for(; begin != end; ++begin){ - auto const & ev = *begin; - const double awt = std::abs(ev.central().weight); - wtwt.fill(std::log(awt), awt); + for(auto it=begin; it != end; ++it){ + const double awt = std::abs(it->central().weight); nwt.fill(std::log(awt), 1.); - const double tmp = awt*std::log(awt); - err += tmp*tmp; - awt_sum += awt; } - // prune low statistics bins with a heuristic cut const auto cut = nevents/nbins; - for(size_t i = 0; i < nbins; ++i) { - if(nwt[i] < cut) wtwt[i] = 0; + for(; begin != end; ++begin){ + const double awt = std::abs(begin->central().weight); + const double log_wt = std::log(awt); + const double bin_idx = nwt.bin_idx(log_wt); + assert(bin_idx<nbins); + // prune low statistics bins with a heuristic cut + if(nwt[bin_idx] >= cut){ + wtwt.fill(log_wt, awt); + const double tmp = awt*log_wt; + err += tmp*tmp; + awt_sum += awt; + } } + const auto peak = wtwt.peak_bin(); err = std::sqrt(err)/awt_sum; set_cut(std::exp(peak.first + max_dev*err)); } template<class ConstIt> void Unweighter::set_cut_to_maxwt(ConstIt begin, ConstIt end){ set_cut(-1); for(; begin != end; ++begin){ const double awt = std::abs(begin->central().weight); if( awt > get_cut()) set_cut(awt); } } template<class Iterator> Iterator Unweighter::unweight( Iterator begin, Iterator end, RNG & ran ) const { if(get_cut() < 0) return end; return std::remove_if(begin, end, [&](auto & ev) -> bool { return this->discard(ran, ev); }); } } // namespace HEJ diff --git a/t/test_unweighter.cc b/t/test_unweighter.cc index 0efaa80..6c880e2 100644 --- a/t/test_unweighter.cc +++ b/t/test_unweighter.cc @@ -1,147 +1,152 @@ /** * \authors The HEJ collaboration (see AUTHORS for details) * \date 2019 * \copyright GPLv2 or later */ #include "HEJ/CrossSectionAccumulator.hh" #include "HEJ/Event.hh" #include "HEJ/EventReader.hh" #include "HEJ/Mixmax.hh" #include "HEJ/Unweighter.hh" #include "hej_test.hh" namespace{ const fastjet::JetDefinition jet_def{fastjet::kt_algorithm, 0.4}; const double min_pt{30.}; const double sample_ratio{10.}; const double max_dev{2.}; bool compare_xs( HEJ::XSWithError<double> const & xs1, HEJ::XSWithError<double> const & xs2 ){ std::cout << xs1.value << "+/-" << xs1.error << " vs. " << xs2.value << "+/-" << xs2.error << std::endl; return std::abs(xs1.value/xs2.value-1.)<xs1.error; } } int main(int argc, char** argv) { if(argc != 2) { std::cerr << "Usage: " << argv[0] << " event_file\n"; return EXIT_FAILURE; } std::string file{argv[1]}; auto reader = HEJ::make_reader(file); // number of events auto nevents{reader->number_events()}; if(!nevents) { auto t_reader = HEJ::make_reader(file); nevents = 0; while(t_reader->read_event()) ++(*nevents); } ASSERT(*nevents>sample_ratio); const size_t size_sample = floor(*nevents/sample_ratio); HEJ::Mixmax ran{}; // no unweighting HEJ::CrossSectionAccumulator xs_base; std::vector<HEJ::Event> all_evts; // full unweighting HEJ::CrossSectionAccumulator xs_max; HEJ::Unweighter unw_max; size_t n_max{0}; // midpoint on full sample HEJ::CrossSectionAccumulator xs_mid; HEJ::Unweighter unw_mid; size_t n_mid{0}; // calc max from partial sample HEJ::CrossSectionAccumulator xs_pmax; HEJ::Unweighter unw_pmax; size_t n_pmax{0}; // midpoint on partial sample HEJ::CrossSectionAccumulator xs_pmid; HEJ::Unweighter unw_pmid; size_t n_pmid{0}; // initialise sample for(size_t n = 0; n < size_sample; ++n){ if(!reader->read_event()){ std::cerr << "Sample size bigger than event sample\n"; return EXIT_FAILURE; } const HEJ::Event event{ HEJ::Event::EventData{reader->hepeup()}.cluster(jet_def, min_pt) }; xs_base.fill(event); all_evts.push_back(event); } // calculate partial settings unw_pmax.set_cut_to_maxwt(all_evts); unw_pmid.set_cut_to_peakwt(all_evts, max_dev); for(auto const & ev: unw_pmax.unweight(all_evts, ran)){ xs_pmax.fill(ev); ++n_pmax; } for(auto const & ev: unw_pmid.unweight(all_evts, ran)){ xs_pmid.fill(ev); ++n_pmid; } while(reader->read_event()){ const HEJ::Event event{ HEJ::Event::EventData{reader->hepeup()}.cluster(jet_def, min_pt) }; xs_base.fill(event); auto ev{ unw_pmid.unweight(event, ran) }; if(ev){ xs_pmid.fill(*ev); ++n_pmid; } ev = unw_pmax.unweight(event, ran); if(ev){ xs_pmax.fill(*ev); ++n_pmax; } all_evts.push_back(event); } unw_max.set_cut_to_maxwt(all_evts); unw_mid.set_cut_to_peakwt(all_evts, max_dev); for(auto const & ev: unw_max.unweight(all_evts, ran)){ // make sure all the events have the same weight ASSERT( std::abs( std::abs(unw_max.get_cut()/ev.central().weight)-1. ) < 10e-16); xs_max.fill(ev); ++n_max; } for(auto const & ev: unw_mid.unweight(all_evts, ran)){ xs_mid.fill(ev); ++n_mid; } // sanity check number of events + ASSERT( all_evts.size() > 0); + ASSERT( n_pmax > 0); + ASSERT( n_max > 0); + ASSERT( n_pmid > 0); + ASSERT( n_mid > 0); ASSERT( n_pmax < all_evts.size() ); ASSERT( n_max < n_pmax ); ASSERT( n_pmid < all_evts.size() ); ASSERT( n_mid < all_evts.size() ); ASSERT( n_max < n_mid ); std::cout << "all_evts.size() " << all_evts.size() << " n_pmax " << n_pmax << " n_max " << n_max << " n_pmid " << n_pmid << " n_mid " << n_mid << std::endl; // control xs (in circle) ASSERT(compare_xs( xs_base.total(), xs_pmax.total() )); ASSERT(compare_xs( xs_pmax.total(), xs_max.total() )); ASSERT(compare_xs( xs_max.total() , xs_pmid.total() )); ASSERT(compare_xs( xs_pmid.total(), xs_mid.total() )); ASSERT(compare_xs( xs_mid.total() , xs_base.total() )); return EXIT_SUCCESS; }