LArSoft  v07_13_02
Liquid Argon Software toolkit - http://larsoft.org/
MixHelper.cc
Go to the documentation of this file.
2 
6 
14 #include "canvas_root_io/Streamers/ProductIDStreamer.h"
15 #include "cetlib/container_algorithms.h"
17 
18 #include <algorithm>
19 #include <cassert>
20 #include <functional>
21 #include <limits>
22 #include <numeric>
23 #include <regex>
24 #include <unordered_set>
25 
26 #include "Rtypes.h"
27 
28 namespace {
29 
31  buildEventIDIndex(art::FileIndex const& fileIndex)
32  {
33  art::EventIDIndex result;
34  for (auto const& element : fileIndex) {
35  if (element.getEntryType() != art::FileIndex::kEvent)
36  continue;
37  result.emplace(element.entry_, element.eventID_);
38  }
39  return result;
40  }
41 
42  class EventIDLookup : public std::unary_function<Long64_t, art::EventID> {
43  public:
44  EventIDLookup(art::EventIDIndex const& index);
45  result_type operator()(argument_type entry) const;
46 
47  private:
48  art::EventIDIndex const& index_;
49  };
50 
51  std::array<cet::exempt_ptr<TTree>, art::NumBranchTypes>
52  initDataTrees(cet::value_ptr<TFile> const& currentFile)
53  {
54  std::array<cet::exempt_ptr<TTree>, art::NumBranchTypes> result;
55  for (auto bt = 0; bt != art::NumBranchTypes; ++bt) {
56  result[bt].reset(static_cast<TTree*>(currentFile->Get(
57  art::rootNames::dataTreeName(static_cast<art::BranchType>(bt))
58  .c_str())));
59  if (result[bt].get() == nullptr and bt != art::InResults) {
61  << "Unable to read event tree from secondary event stream file "
62  << currentFile->GetName() << ".\n";
63  }
64  }
65  return result;
66  }
67 } // namespace
68 
69 inline EventIDLookup::EventIDLookup(art::EventIDIndex const& index)
70  : index_(index)
71 {}
72 
73 EventIDLookup::result_type
74 EventIDLookup::operator()(argument_type entry) const
75 {
76  auto i = index_.find(entry);
77  if (i == cend(index_)) {
79  << "MixHelper could not find entry number " << entry
80  << " in its own lookup table.\n";
81  }
82  return i->second;
83 }
84 
85 namespace {
86  double
87  initCoverageFraction(fhicl::ParameterSet const& pset)
88  {
89  auto result = pset.get<double>("coverageFraction", 1.0);
90  if (result > (1 + std::numeric_limits<double>::epsilon())) {
91  mf::LogWarning("Configuration")
92  << "coverageFraction > 1: treating as a percentage.\n";
93  result /= 100.0;
94  }
95  return result;
96  }
97 
98  std::unique_ptr<CLHEP::RandFlat>
99  initDist(art::MixHelper::Mode readMode)
100  {
101  using namespace art;
102  std::unique_ptr<CLHEP::RandFlat> result;
103  if (readMode > MixHelper::Mode::SEQUENTIAL) {
104  if (ServiceRegistry::isAvailable<RandomNumberGenerator>()) {
105  result = std::make_unique<CLHEP::RandFlat>(
106  ServiceHandle<RandomNumberGenerator> {}->getEngine());
107  } else {
108  throw Exception(errors::Configuration, "MixHelper")
109  << "Random event mixing selected but RandomNumberGenerator service "
110  "not loaded.\n"
111  << "Ensure service is loaded with: \n"
112  << "services.RandomNumberGenerator: {}\n";
113  }
114  }
115  return result;
116  }
117 }
118 
120  ProducerBase& producesProvider)
121  : producesProvider_{producesProvider}
122  , filenames_{pset.get<std::vector<std::string>>("fileNames", {})}
123  , compactMissingProducts_{pset.get<bool>("compactMissingProducts", false)}
124  , fileIter_{filenames_.begin()}
125  , readMode_{initReadMode_(pset.get<std::string>("readMode", "sequential"))}
126  , coverageFraction_{initCoverageFraction(pset)}
127  , canWrapFiles_{pset.get<bool>("wrapFiles", false)}
128  , dist_{initDist(readMode_)}
129 {}
130 
131 void
133 {
134  if (!filenames_.empty()) {
136  << "Provision of a secondary file name provider is incompatible"
137  << " with a\nnon-empty fileNames parameter to the mix filter.\n";
138  }
139  providerFunc_ = func;
140 }
141 
142 bool
143 art::MixHelper::generateEventSequence(size_t const nSecondaries,
144  EntryNumberSequence& enSeq,
145  EventIDSequence& eIDseq)
146 {
147  assert(enSeq.empty());
148  assert(eIDseq.empty());
149  assert(nEventsInFile_ >= 0);
150  bool over_threshold =
152  ((nEventsReadThisFile_ + nSecondaries) >
153  static_cast<size_t>(nEventsInFile_)) :
154  ((nEventsReadThisFile_ + nSecondaries) >
156  if (over_threshold || (!ffVersion_.isValid())) {
157  if (openNextFile_()) {
158  return generateEventSequence(nSecondaries, enSeq, eIDseq);
159  } else {
160  return false;
161  }
162  }
163  switch (readMode_) {
164  case Mode::SEQUENTIAL:
165  enSeq.reserve(nSecondaries);
166  for (size_t i = nEventsReadThisFile_,
167  end = nEventsReadThisFile_ + nSecondaries;
168  i < end;
169  ++i) {
170  enSeq.push_back(i);
171  }
172  break;
174  std::generate_n(std::back_inserter(enSeq), nSecondaries, [this] {
175  return dist_.get()->fireInt(nEventsInFile_);
176  });
177  std::sort(enSeq.begin(), enSeq.end());
178  break;
180  std::unordered_set<EntryNumberSequence::value_type>
181  entries; // Guaranteed unique.
182  while (entries.size() < nSecondaries) {
183  std::generate_n(
184  std::inserter(entries, entries.begin()),
185  nSecondaries - entries.size(),
186  [this] { return dist_.get()->fireInt(nEventsInFile_); });
187  }
188  enSeq.assign(cbegin(entries), cend(entries));
189  std::sort(begin(enSeq), end(enSeq));
190  // Since we need to sort at the end anyway, it's unclear whether
191  // unordered_set is faster than set even though inserts are
192  // approximately linear time. Since the complexity of the sort is
193  // NlogN, we'd need a profile run for it all to come out in the
194  // wash.
195  assert(enSeq.size() == nSecondaries); // Should be true by construction.
196  } break;
198  auto i = cbegin(shuffledSequence_) + nEventsReadThisFile_;
199  enSeq.assign(i, i + nSecondaries);
200  } break;
201  default:
203  << "Unrecognized read mode " << static_cast<int>(readMode_)
204  << ". Contact the art developers.\n";
205  }
206  cet::transform_all(
207  enSeq, std::back_inserter(eIDseq), EventIDLookup{eventIDIndex_});
208  return true;
209 }
210 
211 void
213  EventAuxiliarySequence& auxseq)
214 {
215  auto const eventTree = currentDataTrees_[InEvent];
216  auto auxBranch =
217  eventTree->GetBranch(BranchTypeToAuxiliaryBranchName(InEvent).c_str());
218  auto aux = std::make_unique<EventAuxiliary>();
219  auto pAux = aux.get();
220  auxBranch->SetAddress(&pAux);
221  for (auto const entry : enseq) {
222  auto err = eventTree->LoadTree(entry);
223  if (err == -2) {
224  // FIXME: Throw an error here, taking care to disconnect the
225  // branch from the i/o buffer.
226  // FIXME: -2 means entry number too big.
227  }
228  // Note: Root will overwrite the old event
229  // auxiliary with the new one.
230  input::getEntry(auxBranch, entry);
231  // Note: We are intentionally making a copy here
232  // of the fetched event auxiliary!
233  auxseq.push_back(*pAux);
234  }
235  // Disconnect the branch from the i/o buffer.
236  auxBranch->SetAddress(nullptr);
237 }
238 
239 void
241  EventIDSequence const& eIDseq,
242  Event& e)
243 {
244  // Populate the remapper in case we need to remap any Ptrs.
246  // Dummy remapper in case we need it.
247  static PtrRemapper const nopRemapper;
248  // Create required info only if we're likely to need it:
249  EntryNumberSequence subRunEntries;
250  EntryNumberSequence runEntries;
251  if (haveSubRunMixOps_) {
252  subRunEntries.reserve(eIDseq.size());
253  for (auto const& eID : eIDseq) {
254  auto const it = currentFileIndex_.findPosition(eID.subRunID(), true);
255  if (it != currentFileIndex_.cend()) {
256  subRunEntries.emplace_back(it->entry_);
257  } else {
258  throw Exception(errors::NotFound, "NO_SUBRUN")
259  << "- Unable to find an entry in the SubRun tree corresponding to "
260  "event ID "
261  << eID << " in secondary mixing input file "
262  << currentFile_->GetName() << ".\n";
263  }
264  }
265  }
266  if (haveRunMixOps_) {
267  runEntries.reserve(eIDseq.size());
268  for (auto const& eID : eIDseq) {
269  auto const it = currentFileIndex_.findPosition(eID.runID(), true);
270  if (it != currentFileIndex_.cend()) {
271  runEntries.emplace_back(it->entry_);
272  } else {
273  throw Exception(errors::NotFound, "NO_RUN")
274  << "- Unable to find an entry in the Run tree corresponding to event "
275  "ID "
276  << eID << " in secondary mixing input file "
277  << currentFile_->GetName() << ".\n";
278  }
279  }
280  }
281  // Do the branch-wise read, mix and put.
282  for (auto const& op : mixOps_) {
283  switch (op->branchType()) {
284  case InEvent:
285  op->readFromFile(eventEntries, branchIDLists_.get());
286  op->mixAndPut(e, ptrRemapper_);
287  break;
288  case InSubRun:
289  op->readFromFile(subRunEntries, nullptr);
290  // No Ptrs in subrun products.
291  op->mixAndPut(e, nopRemapper);
292  break;
293  case InRun:
294  op->readFromFile(runEntries, nullptr);
295  // No Ptrs in run products.
296  op->mixAndPut(e, nopRemapper);
297  break;
298  default:
299  throw Exception(errors::LogicError, "Unsupported BranchType")
300  << "- MixHelper::mixAndPut() attempted to handle unsupported branch "
301  "type "
302  << op->branchType() << ".\n";
303  }
304  }
305  nEventsReadThisFile_ += eventEntries.size();
306  totalEventsRead_ += eventEntries.size();
307 }
308 
309 void
310 art::MixHelper::setEventsToSkipFunction(std::function<size_t()> eventsToSkip)
311 {
312  eventsToSkip_ = eventsToSkip;
313 }
314 
315 auto
316 art::MixHelper::initReadMode_(std::string const& mode) const -> Mode
317 {
318  // These regexes must correspond by index to the valid Mode enumerator
319  // values.
320  static std::regex const robjs[4]{
321  std::regex("^seq", std::regex_constants::icase),
322  std::regex("^random(replace)?$", std::regex_constants::icase),
323  std::regex("^randomlimreplace$", std::regex_constants::icase),
324  std::regex("^randomnoreplace$", std::regex_constants::icase)};
325  int i{0};
326  for (auto const& r : robjs) {
327  if (std::regex_search(mode, r)) {
328  return Mode(i);
329  } else {
330  ++i;
331  }
332  }
334  << "Unrecognized value of readMode parameter: \"" << mode
335  << "\". Valid values are:\n"
336  << " sequential,\n"
337  << " randomReplace (random is accepted for reasons of legacy),\n"
338  << " randomLimReplace,\n"
339  << " randomNoReplace.\n";
340 }
341 
342 void
344 {
345  // Open file.
346  try {
347  currentFile_.reset(TFile::Open(filename.c_str()));
348  }
349  catch (std::exception const& e) {
350  throw Exception(errors::FileOpenError, e.what())
351  << "Unable to open specified secondary event stream file " << filename
352  << ".\n";
353  }
354  if (!currentFile_ || currentFile_->IsZombie()) {
356  << "Unable to open specified secondary event stream file " << filename
357  << ".\n";
358  }
359  // Obtain meta data tree.
360  currentMetaDataTree_.reset(static_cast<TTree*>(
361  currentFile_->Get(rootNames::metaDataTreeName().c_str())));
362  if (currentMetaDataTree_.get() == nullptr) {
364  << "Unable to read meta data tree from secondary event stream file "
365  << filename << ".\n";
366  }
367  currentDataTrees_ = initDataTrees(currentFile_);
368  nEventsInFile_ = currentDataTrees_[InEvent]->GetEntries();
369 
370  ffVersion_ =
371  detail::readMetadata<FileFormatVersion>(currentMetaDataTree_.get());
372 
373  // Read file index
374  FileIndex* fileIndexPtr = &currentFileIndex_;
376  currentFile_.get(), currentMetaDataTree_.get(), fileIndexPtr);
377 
378  // To support files that contain BranchIDLists
379  BranchIDLists branchIDLists{};
380  if (detail::readMetadata(currentMetaDataTree_.get(), branchIDLists)) {
381  branchIDLists_ = std::make_unique<BranchIDLists>(std::move(branchIDLists));
382  configureProductIDStreamer(branchIDLists_.get());
383  }
384 
385  // Check file format era.
386  std::string const expected_era = getFileFormatEra();
387  if (ffVersion_.era_ != expected_era) {
389  << "Can only read files written during the \"" << expected_era
390  << "\" era: "
391  << "Era of "
392  << "\"" << filename << "\" was "
393  << (ffVersion_.era_.empty() ? "not set" :
394  ("set to \"" + ffVersion_.era_ + "\" "))
395  << ".\n";
396  }
397  auto dbCount = 0;
398  for (auto const tree : currentDataTrees_) {
399  if (tree.get()) {
400  dataBranches_[dbCount].reset(tree.get());
401  }
402  ++dbCount;
403  }
404 
405  eventIDIndex_ = buildEventIDIndex(currentFileIndex_);
407  buildProductIDTransMap_(transMap);
410  // Prepare shuffled event sequence.
411  shuffledSequence_.resize(static_cast<size_t>(nEventsInFile_));
412  std::iota(shuffledSequence_.begin(), shuffledSequence_.end(), 0);
413  std::random_shuffle(shuffledSequence_.begin(),
414  shuffledSequence_.end(),
415  [this](EntryNumberSequence::difference_type const& n) {
416  return dist_.get()->fireInt(n);
417  });
418  }
419 }
420 
421 bool
423 {
424  std::string filename;
425  if (providerFunc_) {
426  filename = providerFunc_();
427  if (filename.empty()) {
428  return false;
429  }
430  } else if (filenames_.empty()) {
431  return false;
432  } else {
433  if (ffVersion_.isValid()) { // Already seen one file.
434  ++fileIter_;
435  }
436  if (fileIter_ == filenames_.end()) {
437  if (canWrapFiles_) {
438  mf::LogWarning("MixingInputWrap")
439  << "Wrapping around to initial input file for mixing after "
440  << totalEventsRead_ << " secondary events read.";
441  fileIter_ = filenames_.begin();
442  } else {
443  return false;
444  }
445  }
446  filename = *fileIter_;
447  }
449  eventsToSkip_() :
450  0; // Reset for this file.
451  openAndReadMetaData_(filename);
452  return true;
453 }
454 
455 void
458 {
459  for (auto& mixOp : mixOps_) {
460  auto const bt = mixOp->branchType();
461  mixOp->initializeBranchInfo(dataBranches_[bt]);
462 #if ART_DEBUG_PTRREMAPPER
463  std::cerr << "BranchIDTransMap: " << std::hex << std::setfill('0')
464  << std::setw(8) << mixOp->incomingProductID() << " -> "
465  << std::setw(8) << mixOp->outgoingProductID() << std::dec
466  << ".\n";
467 #endif
468  if (bt == InEvent) {
469  transMap[mixOp->incomingProductID()] = mixOp->outgoingProductID();
470  }
471  }
472 }
void registerSecondaryFileNameProvider(ProviderFunc_ func)
Definition: MixHelper.cc:132
std::vector< std::string >::const_iterator fileIter_
Definition: MixHelper.h:370
std::vector< EventAuxiliary > EventAuxiliarySequence
Definition: MixTypes.h:24
std::function< size_t()> eventsToSkip_
Definition: MixHelper.h:382
std::vector< EventID > EventIDSequence
Definition: MixTypes.h:22
cet::value_ptr< TFile > currentFile_
Definition: MixHelper.h:388
ProviderFunc_ providerFunc_
Definition: MixHelper.h:367
Mode initReadMode_(std::string const &mode) const
Definition: MixHelper.cc:316
void mixAndPut(EntryNumberSequence const &enSeq, EventIDSequence const &eIDseq, Event &e)
Definition: MixHelper.cc:240
void setEventsToSkipFunction(std::function< size_t()> eventsToSkip)
Definition: MixHelper.cc:310
void readFileIndex(TFile *file, TTree *metaDataTree, FileIndex *&findexPtr)
Definition: readFileIndex.h:20
MixHelper(fhicl::ParameterSet const &pset, ProducerBase &producesProvider)
Definition: MixHelper.cc:119
std::array< cet::exempt_ptr< TTree >, art::BranchType::NumBranchTypes > currentDataTrees_
Definition: MixHelper.h:391
std::string const & dataTreeName(BranchType bt)
Definition: rootNames.cc:74
std::map< ProductID, ProductID > ProductIDTransMap
FileFormatVersion ffVersion_
Definition: MixHelper.h:377
MixOpList mixOps_
Definition: MixHelper.h:368
bool haveSubRunMixOps_
Definition: MixHelper.h:384
EntryNumberSequence shuffledSequence_
Definition: MixHelper.h:383
std::unique_ptr< art::BranchIDLists > branchIDLists_
Definition: MixHelper.h:378
std::string const & BranchTypeToAuxiliaryBranchName(BranchType const bt)
Definition: BranchType.cc:83
std::vector< std::string > const filenames_
Definition: MixHelper.h:365
std::unique_ptr< CLHEP::RandFlat > dist_
Definition: MixHelper.h:381
double const coverageFraction_
Definition: MixHelper.h:372
Mode const readMode_
Definition: MixHelper.h:371
std::array< RootBranchInfoList, art::BranchType::NumBranchTypes > dataBranches_
Definition: MixHelper.h:393
Long64_t nEventsReadThisFile_
Definition: MixHelper.h:373
bool generateEventSequence(size_t nSecondaries, EntryNumberSequence &enSeq, EventIDSequence &eIDseq)
Definition: MixHelper.cc:143
std::string const & metaDataTreeName()
Definition: rootNames.cc:41
bool const canWrapFiles_
Definition: MixHelper.h:376
FileIndex currentFileIndex_
Definition: MixHelper.h:392
cet::exempt_ptr< TTree > currentMetaDataTree_
Definition: MixHelper.h:389
bool openNextFile_()
Definition: MixHelper.cc:422
bool compactMissingProducts_
Definition: MixHelper.h:366
void prepareTranslationTables(ProductIDTransMap &transMap)
T get(std::string const &key) const
Definition: ParameterSet.h:231
std::vector< evd::details::RawDigitInfo_t >::const_iterator begin(RawDigitCacheDataClass const &cache)
void generateEventAuxiliarySequence(EntryNumberSequence const &, EventAuxiliarySequence &)
Definition: MixHelper.cc:212
const_iterator cend() const
Definition: FileIndex.h:119
const_iterator findPosition(EventID const &eID) const
Definition: FileIndex.cc:123
std::vector< FileIndex::EntryNumber_t > EntryNumberSequence
Definition: MixTypes.h:23
void buildProductIDTransMap_(ProdToProdMapBuilder::ProductIDTransMap &transMap)
Definition: MixHelper.cc:456
std::string const & getFileFormatEra()
PtrRemapper ptrRemapper_
Definition: MixHelper.h:369
void openAndReadMetaData_(std::string fileName)
Definition: MixHelper.cc:343
cet::coded_exception< errors::ErrorCodes, ExceptionDetail::translate > Exception
Definition: Exception.h:66
std::map< FileIndex::EntryNumber_t, EventID > EventIDIndex
Definition: MixTypes.h:19
bool haveRunMixOps_
Definition: MixHelper.h:385
std::function< std::string()> ProviderFunc_
Definition: MixHelper.h:246
EventIDIndex eventIDIndex_
Definition: MixHelper.h:395
Long64_t nEventsInFile_
Definition: MixHelper.h:374
MaybeLogger_< ELseverityLevel::ELsev_warning, false > LogWarning
HLT enums.
Int_t getEntry(TBranch *branch, EntryNumber entryNumber)
Definition: getEntry.cc:12
Char_t n[5]
std::vector< evd::details::RawDigitInfo_t >::const_iterator end(RawDigitCacheDataClass const &cache)
Float_t e
Definition: plot.C:34
ProdToProdMapBuilder ptpBuilder_
Definition: MixHelper.h:380
void populateRemapper(PtrRemapper &mapper, Event &e) const
T readMetadata(TTree *md, bool const requireDict=true)
Definition: readMetadata.h:13
cet::coded_exception< error, detail::translate > exception
Definition: exception.h:33
Long64_t totalEventsRead_
Definition: MixHelper.h:375