diff --git a/include/HEJ/EventReader.hh b/include/HEJ/EventReader.hh index 5a81e3f..10cd277 100644 --- a/include/HEJ/EventReader.hh +++ b/include/HEJ/EventReader.hh @@ -1,44 +1,57 @@ /** \file * \brief Header file for event reader interface * * This header defines an abstract base class for reading events from files. * * \authors The HEJ collaboration (see AUTHORS for details) * \date 2019 * \copyright GPLv2 or later */ #pragma once #include <memory> #include <string> #include "LHEF/LHEF.h" +#include "HEJ/optional.hh" + namespace HEJ{ class EventData; // Abstract base class for reading events from files struct EventReader { //! Read an event virtual bool read_event() = 0; //! Access header text virtual std::string const & header() const = 0; //! Access run information virtual LHEF::HEPRUP const & heprup() const = 0; //! Access last read event virtual LHEF::HEPEUP const & hepeup() const = 0; + //! Guess number of events from header + virtual HEJ::optional<int> number_events() const { + size_t start = header().rfind("Number of Events"); + start = header().find_first_of("123456789", start); + if(start == std::string::npos) { + return {}; + } + const size_t end = header().find_first_not_of("0123456789", start); + return std::stoi(header().substr(start, end - start)); + } + virtual ~EventReader() = default; }; //! Factory function for event readers /** * @param filename The name of the input file * @returns A pointer to an instance of an EventReader * for the input file */ std::unique_ptr<EventReader> make_reader(std::string const & filename); } diff --git a/include/HEJ/HDF5Reader.hh b/include/HEJ/HDF5Reader.hh index 13bc75e..d0f67f9 100644 --- a/include/HEJ/HDF5Reader.hh +++ b/include/HEJ/HDF5Reader.hh @@ -1,44 +1,47 @@ /** \file * \brief Header file for reading events in the HDF5 event format. * * \authors The HEJ collaboration (see AUTHORS for details) * \date 2019 * \copyright GPLv2 or later */ #pragma once #include "HEJ/EventReader.hh" namespace HEJ{ //! Class for reading events from a file in the HDF5 file format /** * @details This format is specified in \cite Hoeche:2019rti. */ class HDF5Reader : public EventReader{ public: //! Contruct object reading from the given file explicit HDF5Reader(std::string const & filename); //! Read an event bool read_event() override; //! Access header text std::string const & header() const override; //! Access run information LHEF::HEPRUP const & heprup() const override; //! Access last read event LHEF::HEPEUP const & hepeup() const override; + //! Get number of events + HEJ::optional<int> number_events() const override; + private: struct HDF5ReaderImpl; struct HDF5ReaderImplDeleter { void operator()(HDF5ReaderImpl* p); }; std::unique_ptr<HDF5ReaderImpl, HDF5ReaderImplDeleter> impl_; }; } diff --git a/src/HDF5Reader.cc b/src/HDF5Reader.cc index 50305f0..457a24f 100644 --- a/src/HDF5Reader.cc +++ b/src/HDF5Reader.cc @@ -1,298 +1,305 @@ /** * \authors The HEJ collaboration (see AUTHORS for details) * \date 2019 * \copyright GPLv2 or later */ #include "HEJ/HDF5Reader.hh" #ifdef HEJ_BUILD_WITH_HDF5 #include <numeric> #include <iterator> #include "highfive/H5File.hpp" namespace HEJ { namespace { // buffer size for reader // each "reading from disk" reads "chunk_size" many event at once constexpr std::size_t chunk_size = 10000; struct ParticleData { std::vector<int> id; std::vector<int> status; std::vector<int> mother1; std::vector<int> mother2; std::vector<int> color1; std::vector<int> color2; std::vector<double> px; std::vector<double> py; std::vector<double> pz; std::vector<double> e; std::vector<double> m; std::vector<double> lifetime; std::vector<double> spin; }; struct EventRecords { std::vector<int> particle_start; std::vector<int> nparticles; std::vector<int> pid; std::vector<int> weight; std::vector<double> scale; std::vector<double> fscale; std::vector<double> rscale; std::vector<double> aqed; std::vector<double> aqcd; ParticleData particles; }; class ConstEventIterator { public: // iterator traits using iterator_category = std::bidirectional_iterator_tag; using value_type = LHEF::HEPEUP; using difference_type = std::ptrdiff_t; using pointer = const LHEF::HEPEUP*; using reference = LHEF::HEPEUP const &; using iterator = ConstEventIterator; friend iterator cbegin(EventRecords const & records) noexcept; friend iterator cend(EventRecords const & records) noexcept; iterator& operator++() { particle_offset_ += records_.get().nparticles[idx_]; ++idx_; return *this; } iterator& operator--() { --idx_; particle_offset_ -= records_.get().nparticles[idx_]; return *this; } iterator operator--(int) { auto res = *this; --(*this); return res; } bool operator==(iterator const & other) const { return idx_ == other.idx_; } bool operator!=(iterator other) const { return !(*this == other); } value_type operator*() const { value_type hepeup{}; auto const & r = records_.get(); hepeup.NUP = r.nparticles[idx_]; hepeup.IDPRUP = r.pid[idx_]; hepeup.XWGTUP = r.weight[idx_]; hepeup.weights.emplace_back(hepeup.XWGTUP, nullptr); hepeup.SCALUP = r.scale[idx_]; hepeup.scales.muf = r.fscale[idx_]; hepeup.scales.mur = r.rscale[idx_]; hepeup.AQEDUP = r.aqed[idx_]; hepeup.AQCDUP = r.aqcd[idx_]; const size_t start = particle_offset_; const size_t end = start + hepeup.NUP; auto const & p = r.particles; hepeup.IDUP = std::vector<long>( begin(p.id)+start, begin(p.id)+end ); hepeup.ISTUP = std::vector<int>( begin(p.status)+start, begin(p.status)+end ); hepeup.VTIMUP = std::vector<double>( begin(p.lifetime)+start, begin(p.lifetime)+end ); hepeup.SPINUP = std::vector<double>( begin(p.spin)+start, begin(p.spin)+end ); hepeup.MOTHUP.resize(hepeup.NUP); hepeup.ICOLUP.resize(hepeup.NUP); hepeup.PUP.resize(hepeup.NUP); for(size_t i = 0; i < hepeup.MOTHUP.size(); ++i) { const size_t idx = start + i; assert(idx < end); hepeup.MOTHUP[i] = std::make_pair(p.mother1[idx], p.mother2[idx]); hepeup.ICOLUP[i] = std::make_pair(p.color1[idx], p.color2[idx]); hepeup.PUP[i] = std::vector<double>{ p.px[idx], p.py[idx], p.pz[idx], p.e[idx], p.m[idx] }; } return hepeup; } private: explicit ConstEventIterator(EventRecords const & records): records_{records} {} std::reference_wrapper<const EventRecords> records_; size_t idx_ = 0; size_t particle_offset_ = 0; }; // end ConstEventIterator ConstEventIterator cbegin(EventRecords const & records) noexcept { return ConstEventIterator{records}; } ConstEventIterator cend(EventRecords const & records) noexcept { auto it =ConstEventIterator{records}; it.idx_ = records.aqcd.size(); // or size of any other records member return it; } } // end anonymous namespace struct HDF5Reader::HDF5ReaderImpl{ HighFive::File file; std::size_t event_idx; std::size_t particle_idx; std::size_t nevents; EventRecords records; ConstEventIterator cur_event; LHEF::HEPRUP heprup; LHEF::HEPEUP hepeup; explicit HDF5ReaderImpl(std::string const & filename): file{filename}, event_idx{0}, particle_idx{0}, nevents{ file.getGroup("event") .getDataSet("nparticles") // or any other dataset .getSpace().getDimensions().front() }, records{}, cur_event{cbegin(records)}, heprup{}, hepeup{} { read_heprup(); read_event_records(chunk_size); } void read_heprup() { const auto init = file.getGroup("init"); init.getDataSet( "PDFgroupA" ).read(heprup.PDFGUP.first); init.getDataSet( "PDFgroupB" ).read(heprup.PDFGUP.second); init.getDataSet( "PDFsetA" ).read(heprup.PDFSUP.first); init.getDataSet( "PDFsetB" ).read(heprup.PDFSUP.second); init.getDataSet( "beamA" ).read(heprup.IDBMUP.first); init.getDataSet( "beamB" ).read(heprup.IDBMUP.second); init.getDataSet( "energyA" ).read(heprup.EBMUP.first); init.getDataSet( "energyB" ).read(heprup.EBMUP.second); init.getDataSet( "numProcesses" ).read(heprup.NPRUP); init.getDataSet( "weightingStrategy" ).read(heprup.IDWTUP); const auto proc_info = file.getGroup("procInfo"); proc_info.getDataSet( "procId" ).read(heprup.LPRUP); proc_info.getDataSet( "xSection" ).read(heprup.XSECUP); proc_info.getDataSet( "error" ).read(heprup.XERRUP); // TODO: is this identification correct? proc_info.getDataSet( "unitWeight" ).read(heprup.XMAXUP); } std::size_t read_event_records(std::size_t count) { count = std::min(count, nevents-event_idx); auto events = file.getGroup("event"); events.getDataSet("nparticles").select({event_idx}, {count}).read(records.nparticles); assert(records.nparticles.size() == count); events.getDataSet("pid").select( {event_idx}, {count} ).read( records.pid ); events.getDataSet("weight").select( {event_idx}, {count} ).read( records.weight ); events.getDataSet("scale").select( {event_idx}, {count} ).read( records.scale ); events.getDataSet("fscale").select( {event_idx}, {count} ).read( records.fscale ); events.getDataSet("rscale").select( {event_idx}, {count} ).read( records.rscale ); events.getDataSet("aqed").select( {event_idx}, {count} ).read( records.aqed ); events.getDataSet("aqcd").select( {event_idx}, {count} ).read( records.aqcd ); const std::size_t particle_count = std::accumulate( begin(records.nparticles), end(records.nparticles), 0 ); auto pdata = file.getGroup("particle"); auto & particles = records.particles; pdata.getDataSet("id").select( {particle_idx}, {particle_count} ).read( particles.id ); pdata.getDataSet("status").select( {particle_idx}, {particle_count} ).read( particles.status ); pdata.getDataSet("mother1").select( {particle_idx}, {particle_count} ).read( particles.mother1 ); pdata.getDataSet("mother2").select( {particle_idx}, {particle_count} ).read( particles.mother2 ); pdata.getDataSet("color1").select( {particle_idx}, {particle_count} ).read( particles.color1 ); pdata.getDataSet("color2").select( {particle_idx}, {particle_count} ).read( particles.color2 ); pdata.getDataSet("px").select( {particle_idx}, {particle_count} ).read( particles.px ); pdata.getDataSet("py").select( {particle_idx}, {particle_count} ).read( particles.py ); pdata.getDataSet("pz").select( {particle_idx}, {particle_count} ).read( particles.pz ); pdata.getDataSet("e").select( {particle_idx}, {particle_count} ).read( particles.e ); pdata.getDataSet("m").select( {particle_idx}, {particle_count} ).read( particles.m ); pdata.getDataSet("lifetime").select( {particle_idx}, {particle_count} ).read( particles.lifetime ); pdata.getDataSet("spin").select( {particle_idx}, {particle_count} ).read( particles.spin ); event_idx += count; particle_idx += particle_count; return count; } }; HDF5Reader::HDF5Reader(std::string const & filename): impl_{ new HDF5Reader::HDF5ReaderImpl{filename}, HDF5Reader::HDF5ReaderImplDeleter{} } {} bool HDF5Reader::read_event() { if(impl_->cur_event == cend(impl_->records)) { // end of active chunk, read new events from file const auto events_read = impl_->read_event_records(chunk_size); impl_->cur_event = cbegin(impl_->records); if(events_read == 0) return false; } impl_->hepeup = *impl_->cur_event; ++impl_->cur_event; return true; } namespace { static const std::string nothing = ""; } std::string const & HDF5Reader::header() const { return nothing; } LHEF::HEPRUP const & HDF5Reader::heprup() const { return impl_->heprup; } LHEF::HEPEUP const & HDF5Reader::hepeup() const { return impl_->hepeup; } + HEJ::optional<int> HDF5Reader::number_events() const { + return impl_->nevents; + } } #else // no HDF5 support namespace HEJ { class HDF5Reader::HDF5ReaderImpl{}; HDF5Reader::HDF5Reader(std::string const &){ throw std::invalid_argument{ "Failed to create HDF5 reader: " "HEJ 2 was built without HDF5 support" }; } bool HDF5Reader::read_event() { throw std::logic_error{"unreachable"}; } std::string const & HDF5Reader::header() const { throw std::logic_error{"unreachable"}; } LHEF::HEPRUP const & HDF5Reader::heprup() const { throw std::logic_error{"unreachable"}; } LHEF::HEPEUP const & HDF5Reader::hepeup() const { throw std::logic_error{"unreachable"}; } + + HEJ::optional<int> HDF5Reader::number_events() const { + throw std::logic_error{"unreachable"}; + } } #endif namespace HEJ { void HDF5Reader::HDF5ReaderImplDeleter::operator()(HDF5ReaderImpl* p) { delete p; } } diff --git a/src/bin/HEJ.cc b/src/bin/HEJ.cc index 2472ff3..4733eb4 100644 --- a/src/bin/HEJ.cc +++ b/src/bin/HEJ.cc @@ -1,250 +1,239 @@ /** * \authors The HEJ collaboration (see AUTHORS for details) * \date 2019 * \copyright GPLv2 or later */ #include <array> #include <chrono> #include <iostream> #include <limits> #include <memory> #include <numeric> #include "yaml-cpp/yaml.h" #include "fastjet/ClusterSequence.hh" #include "HEJ/CombinedEventWriter.hh" #include "HEJ/config.hh" #include "HEJ/CrossSectionAccumulator.hh" #include "HEJ/Event.hh" #include "HEJ/EventReader.hh" #include "HEJ/EventReweighter.hh" #include "HEJ/get_analysis.hh" #include "HEJ/make_RNG.hh" #include "HEJ/optional.hh" #include "HEJ/ProgressBar.hh" #include "HEJ/stream.hh" #include "HEJ/Version.hh" #include "HEJ/YAMLreader.hh" -HEJ::optional<int> event_number(std::string const & record){ - size_t start = record.rfind("Number of Events"); - start = record.find_first_of("123456789", start); - if(start == std::string::npos) { - return {}; - } - const size_t end = record.find_first_not_of("0123456789", start); - return std::stoi(record.substr(start, end - start)); -} - HEJ::Config load_config(char const * filename){ try{ return HEJ::load_config(filename); } catch(std::exception const & exc){ std::cerr << "Error: " << exc.what() << '\n'; std::exit(EXIT_FAILURE); } } std::unique_ptr<HEJ::Analysis> get_analysis( YAML::Node const & parameters ){ try{ return HEJ::get_analysis(parameters); } catch(std::exception const & exc){ std::cerr << "Failed to load analysis: " << exc.what() << '\n'; std::exit(EXIT_FAILURE); } } // unique_ptr is a workaround: // HEJ::optional is a better fit, but gives spurious errors with g++ 7.3.0 std::unique_ptr<HEJ::ProgressBar<double>> make_progress_bar( std::vector<double> const & xs ) { if(xs.empty()) return {}; const double Born_xs = std::accumulate(begin(xs), end(xs), 0.); return std::make_unique<HEJ::ProgressBar<double>>(std::cout, Born_xs); } std::string time_to_string(const time_t time){ char s[30]; struct tm * p = localtime(&time); strftime(s, 30, "%a %b %d %Y %H:%M:%S", p); return s; } int main(int argn, char** argv) { using clock = std::chrono::system_clock; if (argn < 3) { std::cerr << "\n# Usage:\n."<< argv[0] <<" config_file input_file\n\n"; return EXIT_FAILURE; } const auto start_time = clock::now(); { std::cout << "Starting " << HEJ::Version::package_name_full() << ", revision " << HEJ::Version::revision() << " (" << time_to_string(clock::to_time_t(start_time)) << ")" << std::endl; } fastjet::ClusterSequence::print_banner(); // read configuration const HEJ::Config config = load_config(argv[1]); auto reader = HEJ::make_reader(argv[2]); assert(reader); std::unique_ptr<HEJ::Analysis> analysis = get_analysis( config.analysis_parameters ); assert(analysis != nullptr); auto heprup{ reader->heprup() }; heprup.generators.emplace_back(LHEF::XMLTag{}); heprup.generators.back().name = HEJ::Version::package_name(); heprup.generators.back().version = HEJ::Version::String(); analysis->initialise(heprup); HEJ::CombinedEventWriter writer{config.output, std::move(heprup)}; double global_reweight = 1.; int max_events = std::numeric_limits<int>::max(); // if we need the event number: if(std::abs(heprup.IDWTUP) == 4 || std::abs(heprup.IDWTUP) == 1 || argn > 3){ // try to read from LHE head - auto input_events{event_number(reader->header())}; + auto input_events{reader->number_events()}; if(!input_events) { // else count manually auto t_reader = HEJ::make_reader(argv[2]); input_events = 0; while(t_reader->read_event()) ++(*input_events); } if(std::abs(heprup.IDWTUP) == 4 || std::abs(heprup.IDWTUP) == 1){ // IDWTUP 4 or 1 assume average(weight)=xs, but we need sum(weights)=xs std::cout << "Found IDWTUP " << heprup.IDWTUP << ": " << "assuming \"cross section = average weight\".\n" << "converting to \"cross section = sum of weights\" "; global_reweight /= *input_events; } if(argn > 3){ // maximal number of events given max_events = std::stoi(argv[3]); global_reweight = *input_events/static_cast<double>(max_events); std::cout << "Processing " << max_events << " out of " << *input_events << " events\n"; } } HEJ::ScaleGenerator scale_gen{ config.scales.base, config.scales.factors, config.scales.max_ratio }; auto ran = HEJ::make_RNG(config.rng.name, config.rng.seed); assert(ran != nullptr); HEJ::EventReweighter hej{ reader->heprup(), std::move(scale_gen), to_EventReweighterConfig(config), *ran }; // status infos & eye candy int nevent = 0; std::array<int, HEJ::event_type::last_type + 1> nevent_type{0}, nfailed_type{0}; auto progress = make_progress_bar(reader->heprup().XSECUP); HEJ::CrossSectionAccumulator xs; std::map<HEJ::StatusCode, int> status_counter; size_t total_trials = 0; // Loop over the events in the input file - while(reader->read_event()){ + while(reader->read_event() && nevent < max_events){ + ++nevent; + // reweight events so that the total cross section is conserved auto hepeup = reader->hepeup(); hepeup.setWeight(0, global_reweight * hepeup.weight()); - if(nevent == max_events) break; - ++nevent; - HEJ::Event::EventData event_data{hepeup}; event_data.reconstruct_intermediate(); // calculate HEJ weight HEJ::Event FO_event{ std::move(event_data).cluster( config.fixed_order_jets.def, config.fixed_order_jets.min_pt ) }; if(FO_event.central().weight == 0) { static const bool warned_once = [argv,nevent](){ std::cerr << "WARNING: event number " << nevent << " in " << argv[2] << " has zero weight. " "Ignoring this and all further events with vanishing weight.\n"; return true; }(); (void) warned_once; // shut up compiler warnings continue; } auto resummed_events = hej.reweight(FO_event, config.trials); for(auto const & s: hej.status()) ++status_counter[s]; total_trials+=hej.status().size(); ++nevent_type[FO_event.type()]; if(resummed_events.empty()) ++nfailed_type[FO_event.type()]; for(auto const & ev: resummed_events){ //TODO: move pass_cuts to after phase space point generation if(analysis->pass_cuts(ev, FO_event)){ analysis->fill(ev, FO_event); writer.write(ev); xs.fill(ev); } } if(progress) progress->increment(FO_event.central().weight); } // main event loop std::cout << '\n'; analysis->finalise(); using namespace HEJ::event_type; std::cout<< "Events processed: " << nevent << '\n'; std::cout << '\t' << name(EventType::first_type) << ": " << nevent_type[EventType::first_type] << ", failed to reconstruct " << nfailed_type[EventType::first_type] << '\n'; for(auto i=EventType::first_type+1; i<=EventType::last_type; i*=2){ std::cout << '\t' << name(static_cast<EventType>(i)) << ": " << nevent_type[i] << ", failed to reconstruct " << nfailed_type[i] << '\n'; } std::cout << '\n' << xs << '\n'; std::cout << "Generation statistic: " << status_counter[HEJ::StatusCode::good] << "/" << total_trials << " trials successful.\n"; for(auto && entry: status_counter){ const double fraction = static_cast<double>(entry.second)/total_trials; const int percent = std::round(100*fraction); std::cout << std::left << std::setw(17) << (to_string(entry.first) + ":") << " ["; for(int i = 0; i < percent/2; ++i) std::cout << '#'; for(int i = percent/2; i < 50; ++i) std::cout << ' '; std::cout << "] " <<std::setw(2)<<std::right<< percent << "%\n"; } std::chrono::duration<double> run_time = (clock::now() - start_time); std::cout << "\nFinished " << HEJ::Version::package_name() << " at " << time_to_string(clock::to_time_t(clock::now())) << "\n=> Runtime: " << run_time.count() << " sec (" << nevent/run_time.count() << " Events/sec).\n"; }