LArSoft  v09_90_00
Liquid Argon Software toolkit - https://larsoft.org/
PointIdTrainingData_module.cc
Go to the documentation of this file.
1 // Class: PointIdTrainingData
3 // Author: P.Plonski, R.Sulej (Robert.Sulej@cern.ch), D.Stefan, May 2016
4 //
5 // Training data for PointIdAlg
6 //
7 // We use this to dump deconv. ADC for preparation of various classifiers.
8 //
10 
17 
18 // art extensions
20 
21 // Framework includes
26 #include "art_root_io/TFileService.h"
29 #include "fhiclcpp/types/Atom.h"
30 #include "fhiclcpp/types/Comment.h"
31 #include "fhiclcpp/types/Name.h"
33 #include "fhiclcpp/types/Table.h"
35 
36 #include "CLHEP/Random/RandFlat.h"
37 
38 #include "TH2F.h" // ADC and deposit maps
39 #include "TH2I.h" // PDG+vertex info map
40 
41 // C++ Includes
42 #include <cmath>
43 #include <fstream>
44 #include <iostream>
45 #include <sstream>
46 #include <string>
47 #include <vector>
48 
49 namespace {
50  template <typename Hist>
51  void writeAndDelete(Hist*& hist)
52  {
53  if (!hist) return;
54  hist->Write();
55  delete hist;
56  hist = nullptr;
57  } // writeAndDelete()
58 } // local namespace
59 
60 namespace nnet {
61 
63  public:
64  struct Config {
65  using Name = fhicl::Name;
67 
69 
71  Comment("Text files with all needed data dumped.")};
72 
74  Comment("Numpy files with patches.")};
75 
77  Name("DumpToRoot"),
78  Comment("Dump to ROOT histogram file (replaces the text files)")};
79 
81  Comment("Dump to Numpy file (replaces the text files)")};
82 
84  Name("SelectedTPC"),
85  Comment("use selected views only, or all views if empty list")};
86 
88  Name("SelectedView"),
89  Comment("use selected tpc's only, or all tpc's if empty list")};
90 
92  Comment("Crop the projection to the event region plus margin")};
93 
94  fhicl::Atom<int> Patch_size_w{Name("Patch_size_w"), Comment("Patch size in wire dimension")};
95 
96  fhicl::Atom<int> Patch_size_d{Name("Patch_size_d"), Comment("Patch size in drift dimension")};
97 
98  fhicl::Atom<double> Em{Name("Em"), Comment("Fraction of Em patches to keep")};
99 
100  fhicl::Atom<double> Trk{Name("Trk"), Comment("Fraction of Trk patches to keep")};
101 
102  fhicl::Atom<double> Michel{Name("Michel"), Comment("Fraction of Michel patches to keep")};
103 
104  fhicl::Atom<double> None{Name("None"), Comment("Fraction of None patches to keep")};
105 
107  Comment("Fraction of stopping Trk patches to keep")};
108 
110  Comment("Fraction of clean track patches to keep")};
111  };
113 
114  explicit PointIdTrainingData(Parameters const& config);
115 
116  void beginJob() override;
117  void endJob() override;
118 
119  private:
120  void analyze(const art::Event& event) override;
121 
123 
124  std::string fOutTextFilePath;
125  std::string fOutNumpyFileName;
128 
129  std::vector<int> fSelectedTPC;
130  std::vector<int> fSelectedPlane;
131 
132  int fEvent;
133  int fRun;
134  int fSubRun;
135 
136  bool fCrop;
137 
140 
141  double fEm, fTrk, fMichel, fNone;
143 
147 
149 
151 
152  CLHEP::HepRandomEngine& fEngine;
153 
154  int WeightedFit(int n,
155  std::vector<double> const& x,
156  std::vector<double> const& y,
157  std::vector<double> const& w,
158  double* params) const;
159  };
160 
161  //-----------------------------------------------------------------------
163  : art::EDAnalyzer(config)
164  , fTrainingDataAlg(config().TrainingDataAlg())
165  , fOutTextFilePath(config().OutTextFilePath())
166  , fOutNumpyFileName(config().OutNumpyFileName())
167  , fDumpToRoot(config().DumpToRoot())
168  , fDumpToNumpy(config().DumpToNumpy())
169  , fSelectedTPC(config().SelectedTPC())
170  , fSelectedPlane(config().SelectedView())
171  , fCrop(config().Crop())
172  , fPatch_size_w(config().Patch_size_w())
173  , fPatch_size_d(config().Patch_size_d())
174  , fEm(config().Em())
175  , fTrk(config().Trk())
176  , fMichel(config().Michel())
177  , fNone(config().None())
178  , fStopTrk(config().StopTrk())
179  , fCleanTrk(config().CleanTrk())
180  , fEngine(art::ServiceHandle<rndm::NuRandomService>()->registerAndSeedEngine(createEngine(0)))
181  {
183 
184  if (fSelectedTPC.empty()) {
185  for (size_t tpc = 0; tpc < fGeometry->NTPC(); ++tpc)
186  fSelectedTPC.push_back(tpc);
187  }
188 
189  if (fSelectedPlane.empty()) {
190  for (size_t p = 0; p < fGeometry->MaxPlanes(); ++p)
191  fSelectedPlane.push_back(p);
192  }
193  }
194 
195  //-----------------------------------------------------------------------
197  {
198 
211  for (int i = 0; i < fPatch_size_w * fPatch_size_d; ++i) {
212  c2numpy_addcolumn(&npywriter, Form("x%d", i), C2NUMPY_FLOAT32);
213  }
214  nEm = 0;
215  nTrk = 0;
216  nMichel = 0;
217  nNone = 0;
218  nEm_sel = 0;
219  nTrk_sel = 0;
220  nMichel_sel = 0;
221  nNone_sel = 0;
222  nStopTrk_sel = 0;
223  nCleanTrk_sel = 0;
224  }
225 
226  //-----------------------------------------------------------------------
228  {
229  std::cout << "nEm = " << nEm << std::endl;
230  std::cout << "nTrk = " << nTrk << std::endl;
231  std::cout << "nMichel = " << nMichel << std::endl;
232  std::cout << "nNone = " << nNone << std::endl;
233  std::cout << std::endl;
234  std::cout << "nEm_sel = " << nEm_sel << std::endl;
235  std::cout << "nTrk_sel = " << nTrk_sel << std::endl;
236  std::cout << "nMichel_sel = " << nMichel_sel << std::endl;
237  std::cout << "nNone_sel = " << nNone_sel << std::endl;
238  std::cout << "nStopTrk_sel = " << nStopTrk_sel << std::endl;
239  std::cout << "nCleanTrk_sel = " << nCleanTrk_sel << std::endl;
241  }
242 
243  //-----------------------------------------------------------------------
245  {
246  fEvent = event.id().event();
247  fRun = event.run();
248  fSubRun = event.subRun();
249 
250  bool saveSim = fTrainingDataAlg.saveSimInfo() && !event.isRealData();
251 
252  std::ostringstream os;
253  os << "event_" << fEvent << "_run_" << fRun << "_subrun_" << fSubRun;
254 
255  std::cout << "analyze " << os.str() << std::endl;
256 
257  auto const clockData =
259  auto const detProp =
261 
262  CLHEP::RandFlat flat(fEngine);
263 
264  for (size_t i = 0; i < fSelectedTPC.size(); ++i)
265  for (size_t v = 0; v < fSelectedPlane.size(); ++v) {
267  event, clockData, detProp, fSelectedPlane[v], fSelectedTPC[i], 0);
268 
269  unsigned int w0, w1, d0, d1;
270  if (fCrop && saveSim) {
271  if (fTrainingDataAlg.findCrop(0.004F, w0, w1, d0, d1)) {
272  std::cout << " crop: " << w0 << " " << w1 << " " << d0 << " " << d1 << std::endl;
273  }
274  else {
275  std::cout << " skip empty tpc:" << fSelectedTPC[i] << " / view:" << fSelectedPlane[v]
276  << std::endl;
277  continue;
278  }
279  }
280  else {
281  w0 = 0;
282  w1 = fTrainingDataAlg.NWires();
283  d0 = 0;
285  }
286 
287  if (fDumpToRoot) {
288  std::ostringstream ss1;
289  ss1 << "raw_" << os.str() << "_tpc_" << fSelectedTPC[i] << "_view_"
290  << fSelectedPlane[v]; // TH2's name
291 
293  TH2F* rawHist =
294  tfs->make<TH2F>((ss1.str() + "_raw").c_str(), "ADC", w1 - w0, w0, w1, d1 - d0, d0, d1);
295  TH2F* depHist = 0;
296  TH2I* pdgHist = 0;
297  if (saveSim) {
298  depHist = tfs->make<TH2F>(
299  (ss1.str() + "_deposit").c_str(), "Deposit", w1 - w0, w0, w1, d1 - d0, d0, d1);
300  pdgHist = tfs->make<TH2I>(
301  (ss1.str() + "_pdg").c_str(), "PDG", w1 - w0, w0, w1, d1 - d0, d0, d1);
302  }
303 
304  for (size_t w = w0; w < w1; ++w) {
305  auto const& raw = fTrainingDataAlg.wireData(w);
306  for (size_t d = d0; d < d1; ++d) {
307  rawHist->Fill(w, d, raw[d]);
308  }
309 
310  if (saveSim) {
311  auto const& edep = fTrainingDataAlg.wireEdep(w);
312  for (size_t d = d0; d < d1; ++d) {
313  depHist->Fill(w, d, edep[d]);
314  }
315 
316  auto const& pdg = fTrainingDataAlg.wirePdg(w);
317  for (size_t d = d0; d < d1; ++d) {
318  pdgHist->Fill(w, d, pdg[d]);
319  }
320  }
321  }
322 
323  writeAndDelete(rawHist);
324  writeAndDelete(depHist);
325  writeAndDelete(pdgHist);
326  }
327  else if (fDumpToNumpy) {
328  for (size_t w = w0; w < w1; ++w) {
329  int w_start = w - fPatch_size_w / 2;
330  int w_stop = w_start + fPatch_size_w;
331  if (w_start < int(w0) || w_start > int(w1)) continue;
332  if (w_stop < int(w0) || w_stop > int(w1)) continue;
333  auto const& pdg = fTrainingDataAlg.wirePdg(w);
334  auto const& deposit = fTrainingDataAlg.wireEdep(w);
335  auto const& raw = fTrainingDataAlg.wireData(w);
336  for (size_t d = d0; d < d1; ++d) {
337  int d_start = d - fPatch_size_d / 2;
338  int d_stop = d_start + fPatch_size_d;
339  if (d_start < int(d0) || d_start > int(d1)) continue;
340  if (d_stop < int(d0) || d_stop > int(d1)) continue;
341  int y0 = 0, y1 = 0, y2 = 0, y3 = 0;
342  if (deposit[d] < 2e-5 || raw[d] < 0.05) { //empty pixel
343  y3 = 1;
344  ++nNone;
345  if (flat.fire() > fNone) continue;
346  ++nNone_sel;
347  }
348  else if ((pdg[d] & 0x0FFF) == 11) { //shower
349  y1 = 1;
350  ++nEm;
351  if ((pdg[d] & 0xF000) == 0x2000) { //Michel
352  y2 = 1;
353  ++nMichel;
354  if (flat.fire() > fMichel) continue;
355  ++nMichel_sel;
356  }
357  else {
358  if (flat.fire() > fEm) continue;
359  ++nEm_sel;
360  }
361  }
362  else { //track
363  y0 = 1;
364  ++nTrk;
365  //Check if the track appears to be stopping
366  int nPxlw0 = 0, nPxlw1 = 0, nPxld0 = 0, nPxld1 = 0;
367  //Try to fit a track
368  std::vector<double> wfit, dfit, chgfit;
369  double total_trkchg = 0;
370  for (int ww = w_start; ww < w_stop; ++ww) {
371  auto const& pdg1 = fTrainingDataAlg.wirePdg(ww);
372  auto const& deposit1 = fTrainingDataAlg.wireEdep(ww);
373  auto const& raw1 = fTrainingDataAlg.wireData(ww);
374  for (int dd = d_start; dd < d_stop; ++dd) {
375  if (deposit1[dd] < 2e-5 || raw1[dd] < 0.05) continue; //empty pixel
376  if ((pdg1[dd] & 0x0FFF) != 11) { //track pixel
377  wfit.push_back(ww);
378  dfit.push_back(dd);
379  chgfit.push_back(raw1[dd]);
380  total_trkchg += raw1[dd];
381  if (ww - w_start < 3) ++nPxlw0;
382  if (w_stop - ww - 1 < 3) ++nPxlw1;
383  if (dd - d_start < 3) ++nPxld0;
384  if (d_stop - dd - 1 < 3) ++nPxld1;
385  }
386  }
387  }
388  //Fit track pixels
389  double fit_trkchg = 0;
390  double parm[2] = {};
391  if (!wfit.empty()) {
392  if (!WeightedFit(wfit.size(), wfit, dfit, chgfit, &parm[0])) {
393  for (size_t j = 0; j < wfit.size(); ++j) {
394  if (std::abs((dfit[j] - (parm[0] + wfit[j] * parm[1])) * cos(atan(parm[1]))) <
395  3) {
396  fit_trkchg += chgfit[j];
397  }
398  }
399  }
400  else if (!WeightedFit(dfit.size(), dfit, wfit, chgfit, &parm[0])) {
401  for (size_t j = 0; j < dfit.size(); ++j) {
402  if (std::abs((wfit[j] - (parm[0] + dfit[j] * parm[1])) * cos(atan(parm[1]))) <
403  3) {
404  fit_trkchg += chgfit[j];
405  }
406  }
407  }
408  }
409  if (((nPxlw0) && (!nPxlw1) && (!nPxld0) && (!nPxld1)) ||
410  ((!nPxlw0) && (nPxlw1) && (!nPxld0) && (!nPxld1)) ||
411  ((!nPxlw0) && (!nPxlw1) && (nPxld0) && (!nPxld1)) ||
412  ((!nPxlw0) && (!nPxlw1) && (!nPxld0) && (nPxld1))) { //stopping track
413  if (flat.fire() > fStopTrk) continue;
414  ++nStopTrk_sel;
415  }
416  else if (total_trkchg && fit_trkchg / total_trkchg > 0.9) { //clean track
417  if (flat.fire() > fCleanTrk) continue;
418  ++nCleanTrk_sel;
419  }
420  else {
421  if (flat.fire() > fTrk) continue;
422  ++nTrk_sel;
423  }
424  }
425  c2numpy_uint32(&npywriter, event.id().run());
426  c2numpy_uint32(&npywriter, event.id().subRun());
427  c2numpy_uint32(&npywriter, event.id().event());
432  c2numpy_uint8(&npywriter, y0);
436  for (int ww = w_start; ww < w_stop; ++ww) {
437  auto const& raw1 = fTrainingDataAlg.wireData(ww);
438  for (int dd = d_start; dd < d_stop; ++dd) {
439  c2numpy_float32(&npywriter, raw1[dd]);
440  }
441  }
442  }
443  }
444  }
445  else {
446  std::ostringstream ss1;
447  ss1 << fOutTextFilePath << "/raw_" << os.str() << "_tpc_" << fSelectedTPC[i] << "_view_"
448  << fSelectedPlane[v];
449 
450  std::ofstream fout_raw, fout_deposit, fout_pdg;
451 
452  fout_raw.open(ss1.str() + ".raw");
453  if (saveSim) {
454  fout_deposit.open(ss1.str() + ".deposit");
455  fout_pdg.open(ss1.str() + ".pdg");
456  }
457 
458  for (size_t w = w0; w < w1; ++w) {
459  auto const& raw = fTrainingDataAlg.wireData(w);
460  for (size_t d = d0; d < d1; ++d) {
461  fout_raw << raw[d] << " ";
462  }
463  fout_raw << std::endl;
464 
465  if (saveSim) {
466  auto const& edep = fTrainingDataAlg.wireEdep(w);
467  for (size_t d = d0; d < d1; ++d) {
468  fout_deposit << edep[d] << " ";
469  }
470  fout_deposit << std::endl;
471 
472  auto const& pdg = fTrainingDataAlg.wirePdg(w);
473  for (size_t d = d0; d < d1; ++d) {
474  fout_pdg << pdg[d] << " ";
475  }
476  fout_pdg << std::endl;
477  }
478  }
479 
480  fout_raw.close();
481  if (saveSim) {
482  fout_deposit.close();
483  fout_pdg.close();
484  }
485  }
486  }
487 
488  } // PointIdTrainingData::analyze()
489 
490  //-----------------------------------------------------------------------
492  std::vector<double> const& x,
493  std::vector<double> const& y,
494  std::vector<double> const& w,
495  double* params) const
496  {
497  Double_t sumx = 0.;
498  Double_t sumx2 = 0.;
499  Double_t sumy = 0.;
500  Double_t sumy2 = 0.;
501  Double_t sumxy = 0.;
502  Double_t sumw = 0.;
503  Double_t eparams[2] = {};
504 
505  for (Int_t i = 0; i < n; i++) {
506  sumx += x[i] * w[i];
507  sumx2 += x[i] * x[i] * w[i];
508  sumy += y[i] * w[i];
509  sumy2 += y[i] * y[i] * w[i];
510  sumxy += x[i] * y[i] * w[i];
511  sumw += w[i];
512  }
513 
514  if (sumx2 * sumw - sumx * sumx == 0.) return 1;
515  if (sumx2 - sumx * sumx / sumw == 0.) return 1;
516 
517  params[0] = (sumy * sumx2 - sumx * sumxy) / (sumx2 * sumw - sumx * sumx);
518  params[1] = (sumxy - sumx * sumy / sumw) / (sumx2 - sumx * sumx / sumw);
519 
520  eparams[0] = sumx2 * (sumx2 * sumw - sumx * sumx);
521  eparams[1] = (sumx2 - sumx * sumx / sumw);
522 
523  if (eparams[0] < 0. || eparams[1] < 0.) return 1;
524 
525  eparams[0] = sqrt(eparams[0]) / (sumx2 * sumw - sumx * sumx);
526  eparams[1] = sqrt(eparams[1]) / (sumx2 - sumx * sumx / sumw);
527 
528  return 0;
529  }
530 
532 
533 }
Float_t x
Definition: compare.C:6
int c2numpy_init(c2numpy_writer *writer, const std::string outputFilePrefix, int32_t numRowsPerFile)
Definition: c2numpy.h:140
base_engine_t & createEngine(seed_t seed)
double fEm
patch size in drift dimension
unsigned int NTPC(CryostatID const &cryoid=cryostat_zero) const
Returns the total number of TPCs in the specified cryostat.
Definition: GeometryCore.h:686
std::vector< float > const & wireEdep(size_t widx) const
Definition: PointIdAlg.h:287
Float_t y1[n_points_granero]
Definition: compare.C:5
int WeightedFit(int n, std::vector< double > const &x, std::vector< double > const &y, std::vector< double > const &w, double *params) const
bool saveSimInfo() const
Definition: PointIdAlg.h:262
Float_t y
Definition: compare.C:6
bool setEventData(const art::Event &event, detinfo::DetectorClocksData const &clockData, detinfo::DetectorPropertiesData const &detProp, unsigned int plane, unsigned int tpc, unsigned int cryo)
Definition: PointIdAlg.cxx:867
int fPatch_size_d
patch size in wire dimension
constexpr auto abs(T v)
Returns the absolute value of the argument.
int c2numpy_uint8(c2numpy_writer *writer, uint8_t data)
Definition: c2numpy.h:316
Raw data description.
Definition: RawTypes.h:6
EDAnalyzer(fhicl::ParameterSet const &pset)
Definition: EDAnalyzer.cc:6
RunNumber_t run() const
Definition: EventID.h:98
Float_t y2[n_points_geant4]
Definition: compare.C:26
int fRun
number of the event being processed
int c2numpy_close(c2numpy_writer *writer)
Definition: c2numpy.h:425
unsigned int NWires() const
geo::GeometryCore const * fGeometry
Access the description of detector geometry.
unsigned int MaxPlanes() const
Returns the largest number of planes among all TPCs in this detector.
int c2numpy_addcolumn(c2numpy_writer *writer, const std::string name, c2numpy_type type)
Definition: c2numpy.h:157
#define DEFINE_ART_MODULE(klass)
Definition: ModuleMacros.h:65
int fPatch_size_w
crop data to event (set to false when dumping noise!)
Definition: EmTrack.h:40
Float_t d
Definition: plot.C:235
bool fCrop
number of the sub-run being processed
bool findCrop(float max_e_cut, unsigned int &w0, unsigned int &w1, unsigned int &d0, unsigned int &d1) const
Description of geometry of one entire detector.
Definition: GeometryCore.h:119
Double_t edep
Definition: macro.C:13
int c2numpy_uint32(c2numpy_writer *writer, uint32_t data)
Definition: c2numpy.h:334
An art service to assist in the distribution of guaranteed unique seeds to all engines within an art ...
int c2numpy_float32(c2numpy_writer *writer, float data)
Definition: c2numpy.h:369
int fSubRun
number of the run being processed
void analyze(const art::Event &event) override
TH2F * hist
Definition: plot.C:134
int c2numpy_uint16(c2numpy_writer *writer, uint16_t data)
Definition: c2numpy.h:325
PointIdTrainingData(Parameters const &config)
CLHEP::HepRandomEngine & fEngine
art-managed random-number engine
unsigned int NScaledDrifts() const
Definition: MVAAlg.h:12
EventNumber_t event() const
Definition: EventID.h:116
Char_t n[5]
std::vector< int > const & wirePdg(size_t widx) const
Definition: PointIdAlg.h:288
Float_t y3[n_points_geant4]
Float_t e
Definition: plot.C:35
Float_t w
Definition: plot.C:20
SubRunNumber_t subRun() const
Definition: EventID.h:110
EventID id() const
Definition: Event.cc:23
art framework interface to geometry description
Event finding and building.
std::vector< float > const & wireData(size_t widx) const