LArSoft  v09_90_00
Liquid Argon Software toolkit - https://larsoft.org/
MixHelper.cc
Go to the documentation of this file.
6 #include "cetlib/container_algorithms.h"
8 #include "range/v3/view.hpp"
9 
10 #include <algorithm>
11 #include <cassert>
12 #include <functional>
13 #include <limits>
14 #include <numeric>
15 #include <ostream>
16 #include <random>
17 #include <regex>
18 #include <unordered_set>
19 
20 using namespace ::ranges;
21 using namespace std::string_literals;
22 
23 namespace {
24  bool
25  event_entries(art::FileIndex::Element const& element)
26  {
27  return element.getEntryType() == art::FileIndex::kEvent;
28  }
29 
30  bool
31  only_events(std::unique_ptr<art::MixOpBase> const& mixOp)
32  {
33  return mixOp->branchType() == art::InEvent;
34  }
35 
37  buildEventIDIndex(art::FileIndex const& fileIndex)
38  {
39  art::EventIDIndex result;
40  for (auto const& element : fileIndex | views::filter(event_entries)) {
41  result.try_emplace(element.entry, element.eventID);
42  }
43  return result;
44  }
45 
47  buildProductIDTransMap(art::MixOpList const& mixOps)
48  {
50  for (auto const& mixOp : mixOps | views::filter(only_events)) {
51  transMap[mixOp->incomingProductID()] = mixOp->outgoingProductID();
52  }
53  return transMap;
54  }
55 
56  class EventIDLookup {
57  public:
58  explicit EventIDLookup(art::EventIDIndex const& index) : index_{index} {}
59 
61  operator()(art::FileIndex::EntryNumber_t const entry) const
62  {
63  auto i = index_.find(entry);
64  if (i == cend(index_)) {
66  << "MixHelper could not find entry number " << entry
67  << " in its own lookup table.\n";
68  }
69  return i->second;
70  }
71 
72  private:
73  art::EventIDIndex const& index_;
74  };
75 
76  double
77  initCoverageFraction(double fraction)
78  {
79  if (fraction > (1 + std::numeric_limits<double>::epsilon())) {
80  mf::LogWarning("Configuration")
81  << "coverageFraction > 1: treating as a percentage.";
82  fraction /= 100.0;
83  }
84  return fraction;
85  }
86 
87 } // namespace
88 
90  std::string const& moduleLabel,
91  ProducesCollector& collector,
92  std::unique_ptr<MixIOPolicy> ioHandle)
93  : EngineCreator{moduleLabel, ScheduleID::first()}
94  , collector_{collector}
95  , moduleLabel_{moduleLabel}
96  , filenames_{pset.get<std::vector<std::string>>("fileNames", {})}
97  , compactMissingProducts_{pset.get<bool>("compactMissingProducts", false)}
98  , fileIter_{filenames_.begin()}
99  , readMode_{initReadMode_(pset.get<std::string>("readMode", "sequential"))}
100  , coverageFraction_{initCoverageFraction(
101  pset.get<double>("coverageFraction", 1.0))}
102  , canWrapFiles_{pset.get<bool>("wrapFiles", false)}
103  , engine_{initEngine_(pset.get<long>("seed", -1), readMode_)}
105  , ioHandle_{std::move(ioHandle)}
106 {}
107 
109  std::string const& moduleLabel,
110  ProducesCollector& collector,
111  std::unique_ptr<MixIOPolicy> ioHandle)
112  : EngineCreator{moduleLabel, ScheduleID::first()}
113  , collector_{collector}
114  , moduleLabel_{moduleLabel}
115  , filenames_{config.filenames()}
116  , compactMissingProducts_{config.compactMissingProducts()}
117  , fileIter_{filenames_.begin()}
118  , readMode_{initReadMode_(config.readMode())}
119  , coverageFraction_{initCoverageFraction(config.coverageFraction())}
120  , canWrapFiles_{config.wrapFiles()}
121  , engine_{initEngine_(config.seed(), readMode_)}
123  , ioHandle_{std::move(ioHandle)}
124 {}
125 
126 art::MixHelper::~MixHelper() = default;
127 
128 std::ostream&
129 art::operator<<(std::ostream& os, MixHelper::Mode const mode)
130 {
131  switch (mode) {
133  return os << "SEQUENTIAL";
135  return os << "RANDOM_REPLACE";
137  return os << "RANDOM_LIM_REPLACE";
139  return os << "RANDOM_NO_REPLACE";
141  return os << "UNKNOWN";
142  // No default so compiler can warn.
143  }
144  return os;
145 }
146 
147 void
149 {
150  if (!filenames_.empty()) {
152  << "Provision of a secondary file name provider is incompatible"
153  << " with a\nnon-empty fileNames parameter to the mix filter.\n";
154  }
155  providerFunc_ = func;
156 }
157 
160 {
161  if (engine_ &&
163  ServiceHandle<RandomNumberGenerator const>()->defaultEngineKind(),
164  ""s)) {
165  return *engine_;
166  }
168 }
169 
172  std::string const& kind_of_engine_to_make)
173 {
174  if (engine_ && consistentRequest_(kind_of_engine_to_make, ""s)) {
175  return *engine_;
176  }
177  return detail::EngineCreator::createEngine(seed, kind_of_engine_to_make);
178 }
179 
182  std::string const& kind_of_engine_to_make,
183  label_t const& engine_label)
184 {
185  if (engine_ && consistentRequest_(kind_of_engine_to_make, engine_label)) {
186  return *engine_;
187  }
189  seed, kind_of_engine_to_make, engine_label);
190 }
191 
192 bool
193 art::MixHelper::generateEventSequence(size_t const nSecondaries,
194  EntryNumberSequence& enSeq,
195  EventIDSequence& eIDseq)
196 {
197  assert(enSeq.empty());
198  assert(eIDseq.empty());
199  if (not ioHandle_->fileOpen() and not openNextFile_()) {
200  return false;
201  }
202 
203  auto const nEventsInFile = ioHandle_->nEventsInFile();
204  bool const over_threshold =
206  ((nEventsReadThisFile_ + nSecondaries) > nEventsInFile) :
207  ((nEventsReadThisFile_ + nSecondaries) >
208  (nEventsInFile * coverageFraction_));
209  if (over_threshold) {
210  if (!providerFunc_) {
212  if (nOpensOverThreshold_ > filenames_.size()) {
214  "An error occurred while preparing product-mixing for "
215  "the current event.\n"}
216  << "The number of requested secondaries (" << nSecondaries
217  << ") exceeds the number of events in any\n"
218  << "of the files specified for product mixing. For a read mode of '"
219  << readMode_ << "',\n"
220  << "the framework does not currently allow product-mixing to span "
221  "multiple secondary\n"
222  << "input files for a given event. Please contact artists@fnal.gov "
223  "for more information.\n";
224  }
225  }
226  if (openNextFile_()) {
227  return generateEventSequence(nSecondaries, enSeq, eIDseq);
228  } else {
229  return false;
230  }
231  }
232 
234  switch (readMode_) {
235  case Mode::SEQUENTIAL:
236  enSeq.resize(nSecondaries);
237  std::iota(begin(enSeq), end(enSeq), nEventsReadThisFile_);
238  break;
240  std::generate_n(
241  std::back_inserter(enSeq), nSecondaries, [this, nEventsInFile] {
242  return dist_.get()->fireInt(nEventsInFile);
243  });
244  std::sort(enSeq.begin(), enSeq.end());
245  break;
247  std::unordered_set<EntryNumberSequence::value_type>
248  entries; // Guaranteed unique.
249  while (entries.size() < nSecondaries) {
250  std::generate_n(
251  std::inserter(entries, entries.begin()),
252  nSecondaries - entries.size(),
253  [this, nEventsInFile] { return dist_.get()->fireInt(nEventsInFile); });
254  }
255  enSeq.assign(cbegin(entries), cend(entries));
256  std::sort(begin(enSeq), end(enSeq));
257  // Since we need to sort at the end anyway, it's unclear whether
258  // unordered_set is faster than set even though inserts are
259  // approximately linear time. Since the complexity of the sort is
260  // NlogN, we'd need a profile run for it all to come out in the
261  // wash.
262  assert(enSeq.size() == nSecondaries); // Should be true by construction.
263  } break;
265  auto i = shuffledSequence_.cbegin() + nEventsReadThisFile_;
266  enSeq.assign(i, i + nSecondaries);
267  } break;
268  default:
270  << "Unrecognized read mode " << static_cast<int>(readMode_)
271  << ". Contact the art developers.\n";
272  }
273  cet::transform_all(
274  enSeq, back_inserter(eIDseq), EventIDLookup{eventIDIndex_});
275  return true;
276 }
277 
280 {
281  return ioHandle_->generateEventAuxiliarySequence(enSeq);
282 }
283 
284 namespace {
285  art::PtrRemapper const nopRemapper{};
286 }
287 
288 void
290  EventIDSequence const& eIDseq,
291  Event& e)
292 {
293  // Create required info only if we're likely to need it.
294  EntryNumberSequence subRunEntries;
295  EntryNumberSequence runEntries;
296  auto const& fileIndex = ioHandle_->fileIndex();
297  if (haveSubRunMixOps_) {
298  subRunEntries.reserve(eIDseq.size());
299  for (auto const& eID : eIDseq) {
300  auto const it = fileIndex.findPosition(eID.subRunID(), true);
301  if (it == std::cend(fileIndex)) {
302  throw Exception(errors::NotFound, "NO_SUBRUN")
303  << "- Unable to find an entry in the SubRun tree corresponding to "
304  "event ID "
305  << eID << " in secondary mixing input file " << *fileIter_ << ".\n";
306  }
307  subRunEntries.emplace_back(it->entry);
308  }
309  }
310  if (haveRunMixOps_) {
311  runEntries.reserve(eIDseq.size());
312  for (auto const& eID : eIDseq) {
313  auto const it = fileIndex.findPosition(eID.runID(), true);
314  if (it == std::cend(fileIndex)) {
315  throw Exception(errors::NotFound, "NO_RUN")
316  << "- Unable to find an entry in the Run tree corresponding to "
317  "event ID "
318  << eID << " in secondary mixing input file " << *fileIter_ << ".\n";
319  }
320  runEntries.emplace_back(it->entry);
321  }
322  }
323 
324  // Populate the remapper in case we need to remap any Ptrs.
326 
327  // Do the branch-wise read, mix and put.
328  for (auto const& op : mixOps_) {
329  switch (op->branchType()) {
330  case InEvent: {
331  auto const inProducts = ioHandle_->readFromFile(*op, eventEntries);
332  op->mixAndPut(e, inProducts, ptrRemapper_);
333  continue;
334  }
335  case InSubRun: {
336  auto const inProducts = ioHandle_->readFromFile(*op, subRunEntries);
337  // Ptrs not supported for subrun product mixing.
338  op->mixAndPut(e, inProducts, nopRemapper);
339  continue;
340  }
341  case InRun: {
342  auto const inProducts = ioHandle_->readFromFile(*op, runEntries);
343  // Ptrs not support for run product mixing.
344  op->mixAndPut(e, inProducts, nopRemapper);
345  continue;
346  }
347  default:
348  throw Exception(errors::LogicError, "Unsupported BranchType")
349  << "- MixHelper::mixAndPut() attempted to handle unsupported branch "
350  "type "
351  << op->branchType() << ".\n";
352  }
353  }
354 
355  nEventsReadThisFile_ += eventEntries.size();
356  totalEventsRead_ += eventEntries.size();
357 }
358 
359 void
360 art::MixHelper::setEventsToSkipFunction(std::function<size_t()> eventsToSkip)
361 {
362  eventsToSkip_ = eventsToSkip;
363 }
364 
365 auto
366 art::MixHelper::initReadMode_(std::string const& mode) const -> Mode
367 {
368  // These regexes must correspond by index to the valid Mode enumerator
369  // values.
370  static std::regex const robjs[4]{
371  std::regex("^seq", std::regex_constants::icase),
372  std::regex("^random(replace)?$", std::regex_constants::icase),
373  std::regex("^randomlimreplace$", std::regex_constants::icase),
374  std::regex("^randomnoreplace$", std::regex_constants::icase)};
375  int i{0};
376  for (auto const& r : robjs) {
377  if (std::regex_search(mode, r)) {
378  return Mode(i);
379  } else {
380  ++i;
381  }
382  }
384  << "Unrecognized value of readMode parameter: \"" << mode
385  << "\". Valid values are:\n"
386  << " sequential,\n"
387  << " randomReplace (random is accepted for reasons of legacy),\n"
388  << " randomLimReplace,\n"
389  << " randomNoReplace.\n";
390 }
391 
392 bool
394 {
395  std::string filename;
396  if (providerFunc_) {
397  filename = providerFunc_();
398  if (filename.empty()) {
399  return false;
400  }
401  } else if (filenames_.empty()) {
402  return false;
403  } else {
404  if (ioHandle_->fileOpen()) { // Already seen one file.
405  ++fileIter_;
406  }
407  if (fileIter_ == filenames_.end()) {
408  if (canWrapFiles_) {
409  mf::LogWarning("MixingInputWrap")
410  << "Wrapping around to initial input file for mixing after "
411  << totalEventsRead_ << " secondary events read.";
412  fileIter_ = filenames_.begin();
413  } else {
414  return false;
415  }
416  }
417  filename = *fileIter_;
418  }
420  eventsToSkip_() :
421  0; // Reset for this file.
422  ioHandle_->openAndReadMetaData(filename, mixOps_);
423 
424  eventIDIndex_ = buildEventIDIndex(ioHandle_->fileIndex());
425  auto transMap = buildProductIDTransMap(mixOps_);
427 
429  // Prepare shuffled event sequence.
430  shuffledSequence_.resize(ioHandle_->nEventsInFile());
431  std::iota(shuffledSequence_.begin(), shuffledSequence_.end(), 0);
432  std::random_device rd;
433  std::mt19937 g{rd()};
434  std::shuffle(shuffledSequence_.begin(), shuffledSequence_.end(), g);
435  }
436 
437  return true;
438 }
439 
440 bool
441 art::MixHelper::consistentRequest_(std::string const& kind_of_engine_to_make,
442  label_t const& engine_label) const
443 {
444  auto const& default_engine_kind =
445  ServiceHandle<RandomNumberGenerator const>()->defaultEngineKind();
446  if (kind_of_engine_to_make == default_engine_kind && engine_label.empty()) {
447  mf::LogInfo{"RANDOM"} << "A random number engine has already been created "
448  "since the read mode is "
449  << readMode_ << '.';
450  return true;
451  }
453  "An error occurred while creating a random number engine "
454  "within a MixFilter detail class.\n"}
455  << "A random number engine with an empty label has already been created "
456  "with an engine type of "
457  << default_engine_kind << ".\n"
458  << "If you would like to use a different engine type, please supply a "
459  "different engine label.\n";
460 }
461 
462 cet::exempt_ptr<art::MixHelper::base_engine_t>
464 {
465  using namespace art;
466  if (readMode > MixHelper::Mode::SEQUENTIAL) {
467  if (ServiceRegistry::isAvailable<RandomNumberGenerator>()) {
468  return cet::make_exempt_ptr(&detail::EngineCreator::createEngine(seed));
469  }
470  throw Exception{errors::Configuration, "MixHelper"}
471  << "Random event mixing selected but RandomNumberGenerator service "
472  "not loaded.\n"
473  << "Ensure service is loaded with: \n"
474  << "services.RandomNumberGenerator: {}\n";
475  }
476  return nullptr;
477 }
478 
479 std::unique_ptr<CLHEP::RandFlat>
480 art::MixHelper::initDist_(cet::exempt_ptr<base_engine_t> const engine) const
481 {
482  return engine ? std::make_unique<CLHEP::RandFlat>(*engine) : nullptr;
483 }
TRandom r
Definition: spectrum.C:23
base_engine_t & createEngine(seed_t seed)
void registerSecondaryFileNameProvider(ProviderFunc_ func)
Definition: MixHelper.cc:148
std::vector< std::string >::const_iterator fileIter_
Definition: MixHelper.h:402
std::vector< EventAuxiliary > EventAuxiliarySequence
Definition: MixTypes.h:28
std::function< size_t()> eventsToSkip_
Definition: MixHelper.h:412
std::vector< EventID > EventIDSequence
Definition: MixTypes.h:26
decltype(auto) constexpr cend(T &&obj)
ADL-aware version of std::cend.
Definition: StdUtils.h:93
ProviderFunc_ providerFunc_
Definition: MixHelper.h:399
Mode initReadMode_(std::string const &mode) const
Definition: MixHelper.cc:366
cet::exempt_ptr< base_engine_t > initEngine_(seed_t seed, Mode readMode)
Definition: MixHelper.cc:463
void mixAndPut(EntryNumberSequence const &enSeq, EventIDSequence const &eIDseq, Event &e)
Definition: MixHelper.cc:289
void setEventsToSkipFunction(std::function< size_t()> eventsToSkip)
Definition: MixHelper.cc:360
bool consistentRequest_(std::string const &kind_of_engine_to_make, label_t const &engine_label) const
Definition: MixHelper.cc:441
std::size_t totalEventsRead_
Definition: MixHelper.h:406
RNGsnapshot::label_t label_t
Definition: EngineCreator.h:37
std::map< ProductID, ProductID > ProductIDTransMap
MixOpList mixOps_
Definition: MixHelper.h:400
bool haveSubRunMixOps_
Definition: MixHelper.h:414
static constexpr ScheduleID first()
Definition: ScheduleID.h:50
unsigned nOpensOverThreshold_
Definition: MixHelper.h:408
EntryNumberSequence shuffledSequence_
Definition: MixHelper.h:413
std::vector< std::string > const filenames_
Definition: MixHelper.h:397
long long EntryNumber_t
Definition: FileIndex.h:41
base_engine_t & createEngine(seed_t seed)
Definition: MixHelper.cc:159
std::unique_ptr< CLHEP::RandFlat > dist_
Definition: MixHelper.h:411
double const coverageFraction_
Definition: MixHelper.h:404
Mode const readMode_
Definition: MixHelper.h:403
decltype(auto) constexpr end(T &&obj)
ADL-aware version of std::end.
Definition: StdUtils.h:77
bool generateEventSequence(size_t nSecondaries, EntryNumberSequence &enSeq, EventIDSequence &eIDseq)
Definition: MixHelper.cc:193
std::ostream & operator<<(std::ostream &os, const GroupSelector &gs)
bool const canWrapFiles_
Definition: MixHelper.h:407
virtual ProductID incomingProductID() const =0
virtual ProductID outgoingProductID() const =0
ProducesCollector & collector_
Definition: MixHelper.h:395
long seed
Definition: chem4.cc:67
bool openNextFile_()
Definition: MixHelper.cc:393
bool compactMissingProducts_
Definition: MixHelper.h:398
void prepareTranslationTables(ProductIDTransMap &transMap)
std::unique_ptr< MixIOPolicy > ioHandle_
Definition: MixHelper.h:418
MixHelper(Config const &config, std::string const &moduleLabel, ProducesCollector &collector, std::unique_ptr< MixIOPolicy > ioHandle)
Definition: MixHelper.cc:108
CLHEP::HepRandomEngine base_engine_t
Definition: EngineCreator.h:36
const_iterator findPosition(EventID const &eID) const
Definition: FileIndex.cc:227
std::string const moduleLabel_
Definition: MixHelper.h:396
std::vector< FileIndex::EntryNumber_t > EntryNumberSequence
Definition: MixTypes.h:27
cet::exempt_ptr< base_engine_t > engine_
Definition: MixHelper.h:410
PtrRemapper ptrRemapper_
Definition: MixHelper.h:401
EntryType getEntryType() const
Definition: FileIndex.cc:43
std::size_t nEventsReadThisFile_
Definition: MixHelper.h:405
cet::coded_exception< errors::ErrorCodes, ExceptionDetail::translate > Exception
Definition: Exception.h:66
std::function< std::string()> ProviderFunc_
Definition: MixHelper.h:246
std::map< FileIndex::EntryNumber_t, EventID > EventIDIndex
Definition: MixTypes.h:23
Mode readMode() const
Definition: MixHelper.h:425
bool haveRunMixOps_
Definition: MixHelper.h:415
EventAuxiliarySequence generateEventAuxiliarySequence(EntryNumberSequence const &)
Definition: MixHelper.cc:279
EventIDIndex eventIDIndex_
Definition: MixHelper.h:416
MaybeLogger_< ELseverityLevel::ELsev_warning, false > LogWarning
decltype(auto) constexpr cbegin(T &&obj)
ADL-aware version of std::cbegin.
Definition: StdUtils.h:85
Definition: MVAAlg.h:12
PtrRemapper getRemapper(Event const &e) const
decltype(auto) constexpr begin(T &&obj)
ADL-aware version of std::begin.
Definition: StdUtils.h:69
std::vector< std::unique_ptr< MixOpBase >> MixOpList
Definition: MixIOPolicy.h:22
Float_t e
Definition: plot.C:35
std::unique_ptr< CLHEP::RandFlat > initDist_(cet::exempt_ptr< base_engine_t > engine) const
Definition: MixHelper.cc:480
ProdToProdMapBuilder ptpBuilder_
Definition: MixHelper.h:409
virtual BranchType branchType() const =0
EntryNumber_t entry
Definition: FileIndex.h:60