LArSoft  v09_90_00
Liquid Argon Software toolkit - https://larsoft.org/
ClusterParamsBuilder_tool.cc
Go to the documentation of this file.
1 
8 // Framework Includes
10 #include "fhiclcpp/ParameterSet.h"
11 
12 // LArSoft includes
17 
18 // std includes
19 #include <iostream>
20 #include <memory>
21 #include <unordered_set>
22 
23 namespace lar_cluster3d {
24 
29  public:
35  explicit ClusterParamsBuilder(fhicl::ParameterSet const& pset);
36 
40  virtual ~ClusterParamsBuilder();
41 
42  void configure(const fhicl::ParameterSet&) override;
43 
54  void BuildClusterInfo(reco::ClusterParametersList& clusterParametersList) const override;
55 
67  double /* NminUniqueFrac */,
68  double /* maxLostFrac */) const override;
69 
70  private:
79 
81 
85 
92 
93  PrincipalComponentsAlg m_pcaAlg; // For running Principal Components Analysis
94  };
95 
96  //------------------------------------------------------------------------------------------------------------------------------------------
97  // implementation follows
98 
100  : m_pcaAlg(pset.get<fhicl::ParameterSet>("PrincipalComponentsAlg"))
101  {
102  this->configure(pset);
103  }
104 
105  //------------------------------------------------------------------------------------------------------------------------------------------
106 
108 
109  //------------------------------------------------------------------------------------------------------------------------------------------
110 
112  {
113  m_clusterMinHits = pset.get<size_t>("ClusterMinHits", 3);
114  m_clusterMinUniqueFraction = pset.get<double>("ClusterMinUniqueFraction", 0.5);
115  m_clusterMaxLostFraction = pset.get<double>("ClusterMaxLostFraction", 0.5);
116 
117  return;
118  }
119 
121  reco::ClusterParametersList& clusterParametersList) const
122  {
132  // This is a remote possibility but why not check?
133  if (!clusterParametersList.empty()) {
134  // We want to order our clusters on by largest (most number hits) to smallest. So, we'll loop through the clusters,
135  // weeding out the unwanted ones and keep track of things in a set of "good" clusters which we'll order
136  // by cluster size.
137  clusterParametersList.sort();
138 
139  // The smallest clusters are now at the end, drop those off the back that are less than the mininum necessary
140  while (!clusterParametersList.empty() &&
141  clusterParametersList.back().getHitPairListPtr().size() < m_clusterMinHits)
142  clusterParametersList.pop_back();
143 
144  // The next step is to build out a mapping of all 2D hits to clusters
145  // Keep track of where the hits get distributed...
146  reco::Hit2DToClusterMap hit2DToClusterMap;
147 
148  reco::ClusterParametersList::iterator clusterItr = clusterParametersList.begin();
149 
150  // for(auto& clusterParams : clusterParametersList)
151  // {
152  // for(const auto& hit3D : clusterParams.getHitPairListPtr())
153  // {
154  // for(const auto& hit2D : hit3D->getHits())
155  // {
156  // if (!hit2D) continue;
157  //
158  // hit2DToClusterMap[hit2D][&clusterParams].insert(hit3D);
159  // }
160  // }
161  // }
162 
163  // Ok, spin through again to remove ambiguous hits
164  // for(auto& clusterParams : clusterParametersList) PruneAmbiguousHits(clusterParams,hit2DToClusterMap);
165 
166  // What remains is an order set of clusters, largest first
167  // Now go through and obtain cluster parameters
168  // clusterItr = clusterParametersList.begin();
169 
170  // while(clusterItr != clusterParametersList.end())
171  // {
172  // // Dereference for ease...
173  // reco::ClusterParameters& clusterParams = *clusterItr;
174  //
175  // // Do the actual work of filling the parameters
176  // FillClusterParams(clusterParams, hit2DToClusterMap, m_clusterMinUniqueFraction, m_clusterMaxLostFraction);
177  //
178  // // If this cluster is rejected then the parameters will be empty
179  // if (clusterParams.getClusterParams().empty() || !clusterParams.getFullPCA().getSvdOK())
180  // {
181  // clusterItr = clusterParametersList.erase(clusterItr);
182  // }
183  // else clusterItr++;
184  // }
185 
186  while (clusterItr != clusterParametersList.end()) {
187  // Dereference for ease...
188  reco::ClusterParameters& clusterParams = *clusterItr;
189 
190  if (keepThisCluster(clusterParams, hit2DToClusterMap)) {
191  storeThisCluster(clusterParams, hit2DToClusterMap);
192  clusterItr++;
193  }
194  else
195  clusterItr = clusterParametersList.erase(clusterItr);
196  }
197  }
198 
199  return;
200  }
201 
203  const reco::Hit2DToClusterMap& hit2DToClusterMap) const
204  {
205  // Try to keep simple by looking at the 2D hits associated to the cluster and checking to see how many, by plane, are already
206  // in use. Reject clusters where too many hits are shared.
207 
208  bool keepThisCluster = false;
209 
210  // Define some handy data structures for counting the number of times hits get used and shared
211  using HitCountMap = std::unordered_map<const reco::ClusterHit2D*, int>;
212  using PlaneHitCountMapVec = std::vector<HitCountMap>;
213 
214  PlaneHitCountMapVec totalPlaneHitCountMapVec(3); // counts total number of hits
215  PlaneHitCountMapVec sharedPlaneHitCountMapVec(
216  3); // this is the number shared with a bigger cluster
217  PlaneHitCountMapVec uniquePlaneHitCountMapVec(
218  3); // this is the number unique to this cluster (so far)
219 
220  // Go through the hits and check usage...
221  for (const auto& hit3D : clusterParams.getHitPairListPtr()) {
222  for (const auto& hit2D : hit3D->getHits()) {
223  if (!hit2D) continue;
224 
225  size_t hitPlane = hit2D->WireID().Plane;
226 
227  totalPlaneHitCountMapVec[hitPlane][hit2D]++;
228 
229  reco::Hit2DToClusterMap::const_iterator hit2DToClusIter = hit2DToClusterMap.find(hit2D);
230 
231  if (hit2DToClusIter != hit2DToClusterMap.end()) {
232  sharedPlaneHitCountMapVec[hitPlane][hit2D]++;
233  }
234  else
235  uniquePlaneHitCountMapVec[hitPlane][hit2D]++;
236  }
237  }
238 
239  // First try... look at fractions of unique hits each plane
240  std::vector<float> uniqueFractionVec(3, 0.);
241 
242  for (size_t idx = 0; idx < 3; idx++) {
243  if (!totalPlaneHitCountMapVec[idx].empty())
244  uniqueFractionVec[idx] = float(uniquePlaneHitCountMapVec[idx].size()) /
245  float(totalPlaneHitCountMapVec[idx].size());
246  }
247 
248  float overallFraction = uniqueFractionVec[0] * uniqueFractionVec[1] * uniqueFractionVec[2];
249  float maxFraction = *std::max_element(uniqueFractionVec.begin(), uniqueFractionVec.end());
250 
251  if (maxFraction > 0.9 || overallFraction > 0.2) keepThisCluster = true;
252 
253  return keepThisCluster;
254  }
255 
257  reco::Hit2DToClusterMap& hit2DToClusterMap) const
258  {
259  // See if we can avoid duplicates by temporarily transferring to a set
260  std::unordered_set<const reco::ClusterHit2D*> hitSet;
261 
262  // first task is to mark the hits and update the hit to cluster mapping
263  for (const auto& hit3D : clusterParams.getHitPairListPtr()) {
264  for (const auto& hit2D : hit3D->getHits()) {
265  if (!hit2D) continue;
266 
267  hit2DToClusterMap[hit2D][&clusterParams].insert(hit3D);
268  hitSet.insert(hit2D);
269  }
270  }
271 
272  // First stage of feature extraction runs here
273  m_pcaAlg.PCAAnalysis_3D(clusterParams.getHitPairListPtr(), clusterParams.getFullPCA());
274 
275  // Must have a valid pca
276  if (clusterParams.getFullPCA().getSvdOK()) {
277  // Set the skeleton PCA to make sure it has some value
278  clusterParams.getSkeletonPCA() = clusterParams.getFullPCA();
279 
280  // Add the "good" hits to our cluster parameters
281  for (const auto& hit2D : hitSet) {
282  hit2D->setStatusBit(reco::ClusterHit2D::USED);
283  clusterParams.UpdateParameters(hit2D);
284  }
285  }
286 
287  return;
288  }
289 
291  reco::Hit2DToClusterMap& /* hit2DToClusterMap */,
292  double /* minUniqueFrac */,
293  double /* maxLostFrac */) const
294  {
299  // Recover the HitPairListPtr from the input clusterParams (which will be the
300  // only thing that has been provided)
301  reco::HitPairListPtr& hitPairVector = clusterParams.getHitPairListPtr();
302 
303  // To be sure, we should clear the other data members
304  clusterParams.getClusterParams().clear();
305  clusterParams.getFullPCA() = reco::PrincipalComponents();
306 
307  // A test of the emergency broadcast system...
308  // FindBestPathInCluster(clusterParams);
309  // CheckHitSorting(clusterParams);
310 
311  // See if we can avoid duplicates by temporarily transferring to a set
312  std::set<const reco::ClusterHit2D*> hitSet;
313 
314  // Ultimately we want to keep track of the number of unique 2D hits in this cluster
315  // So use a vector (by plane) of sets of hits
316  std::vector<size_t> planeHit2DVec;
317  std::vector<size_t> planeUniqueHit2DVec;
318 
319  planeHit2DVec.resize(3);
320  planeUniqueHit2DVec.resize(3);
321 
322  // Map from 2D hits to associated 3D hits
323  reco::Hit2DToHit3DListMap& hit2DToHit3DListMap = clusterParams.getHit2DToHit3DListMap();
324 
325  // The map from 2D to 3D hits will contain unique entries for 2D hits so we can do some quick accounting here
326  for (const auto& hitMapPair : hit2DToHit3DListMap) {
327  size_t plane = hitMapPair.first->WireID().Plane;
328 
329  planeHit2DVec[plane] += hitMapPair.second.size();
330  if (!(hitMapPair.first->getStatusBits() & reco::ClusterHit2D::USED))
331  planeUniqueHit2DVec[plane] += hitMapPair.second.size();
332  }
333 
334  // Get totals
335  int numTotalHits(0);
336  int numUniqueHits(0);
337 
338  // Also consider the number of hits shared on a given view...
339  std::vector<float> uniqueHitFracVec(3, 0.);
340  int nPlanesWithHits(0);
341  int nPlanesWithUniqueHits(0);
342  size_t minPlane(0);
343  size_t minPlaneCnt = planeUniqueHit2DVec[0];
344 
345  // Loop through the planes
346  for (int idx = 0; idx < 3; idx++) {
347  // numerology
348  numTotalHits += planeHit2DVec[idx];
349  numUniqueHits += planeUniqueHit2DVec[idx];
350 
351  if (planeHit2DVec[idx] > 0) nPlanesWithHits++;
352  if (planeUniqueHit2DVec[idx] > 0) nPlanesWithUniqueHits++;
353 
354  // Compute the fraction of unique hits in this plane
355  uniqueHitFracVec[idx] =
356  float(planeUniqueHit2DVec[idx]) / std::max(float(planeHit2DVec[idx]), float(1.));
357 
358  // Finding the plane with the fewest hits
359  if (planeHit2DVec[idx] < minPlaneCnt) {
360  minPlaneCnt = planeHit2DVec[idx];
361  minPlane = idx;
362  }
363  }
364 
365  // If we have something left then at this point we make one more check
366  // This check is intended to weed out clusters made from isolated groups of ambiguous hits which
367  // really belong to a larger cluster
368  if (numUniqueHits > 0.25 * numTotalHits && nPlanesWithHits > 1 && nPlanesWithUniqueHits > 1) {
369  // Sorts lowest to highest
370  std::sort(uniqueHitFracVec.begin(), uniqueHitFracVec.end());
371 
372  float acceptRatio = 0.;
373 
374  if (uniqueHitFracVec[0] * uniqueHitFracVec[1] > 0.25) acceptRatio = 1.;
375 
376  float uniqueFraction = uniqueHitFracVec[0] * uniqueHitFracVec[1] * uniqueHitFracVec[2];
377 
378  // Arbitrary rejection criteria... need to understand
379  if (uniqueFraction > 0.6 && acceptRatio > 0.) {
380  // Create a list to hold 3D hits which are already in use (criteria below)
381  reco::HitPairListPtr usedHitPairList;
382 
383  // std::cout << "--------> Starting 3D hit removal, # 2D hits/plane: " << planeHit2DVec[0] << "/" << planeHit2DVec[1] << "/" << planeHit2DVec[2] << std::endl;
384 
385  // Have survived laugh test, do final processing...
386  // In this first loop go through all the 2D hits and identify the 3D hits that are candidates for deletion
387  for (auto& pair : hit2DToHit3DListMap) {
388  // Check to see if this can happen
389  if (pair.second.empty()) {
390  std::cout << "<<<<<< no matching 3D hits for reco hit in final hit processing >>>>>>"
391  << std::endl;
392  continue;
393  }
394 
395  // Which plane for this hit?
396  size_t hitPlane = pair.first->WireID().Plane;
397 
398  // Only reject hits on the planes not the fewest 2D hits and really only do this if more than a couple
399  if (hitPlane != minPlane && pair.second.size() > 2) {
400  // If this hit is associated to a number of 3D hits then do some arbitration
401  // Start by sorting the 3D hits by "significance"
402  // --> Really should do this by the significance of adding the hit we are looking at?
403  //pair.second.sort([hitPlane](const auto& left, const auto& right){return left->getHitDelTSigVec()[hitPlane] < right->getHitDelTSigVec()[hitPlane];});
404  pair.second.sort([](const auto& left, const auto& right) {
405  return left->getHitChiSquare() < right->getHitChiSquare();
406  });
407 
409  //std::cout << "~~~~> Checking hit removal, # matches: " << pair.second.size() << ", first params: " << pair.second.front()->getHitChiSquare() << ", last params: "<< pair.second.back()->getHitChiSquare();
410 
411  // From sorted list, determine a rejection value to eliminate bad hits
412  //float cutDeltaTSig = std::min(2.0,std::max(0.5, double((pair.second.front()->getHitDelTSigVec()[hitPlane]))));
413  float cutDeltaTSig =
414  std::min(2.0, std::max(0.5, double(pair.second.front()->getHitChiSquare())));
415 
416  //std::cout << ", cutDeltaTSig: " << cutDeltaTSig;
417 
418  cutDeltaTSig = 10.;
419 
420  // And here go through the process of eliminating it
421  //reco::HitPairListPtr::iterator firstBadHitItr = std::find_if(pair.second.begin(),pair.second.end(),[hitPlane,cutDeltaTSig](const auto& hitPtr){return hitPtr->getHitDelTSigVec()[hitPlane] > cutDeltaTSig;});
422  reco::HitPairListPtr::iterator firstBadHitItr = std::find_if(
423  pair.second.begin(), pair.second.end(), [cutDeltaTSig](const auto& hitPtr) {
424  return hitPtr->getHitChiSquare() > cutDeltaTSig;
425  });
426 
427  // We need to worry about cutting too many hits... use this loop to try to expand the range in a reasonable fashion
428  // while(std::distance(pair.second.begin(),firstBadHitItr) < int(pair.second.size()/3) && cutDeltaTSig < 0.5)
429  // {
430  // float candDeltaTSig = (*firstBadHitItr)->getHitDelTSigVec()[hitPlane];
431  //
432  // if (candDeltaTSig > 2. * cutDeltaTSig) break;
433  //
434  // firstBadHitItr++;
435  // cutDeltaTSig = candDeltaTSig;
436  // }
437 
438  reco::HitPairListPtr rejectCandList;
439 
440  std::copy(firstBadHitItr, pair.second.end(), std::back_inserter(rejectCandList));
441 
442  //std::cout << ", bad hits: " << rejectCandList.size() << std::endl;
443 
444  // Remove the 3D hits from all the lists
445  for (const auto& hit3D : rejectCandList) {
446  bool rejectThisHit(true);
447  std::vector<std::pair<reco::HitPairListPtr&, reco::HitPairListPtr::iterator>>
448  deleteVec;
449 
450  for (const auto& hit2D : hit3D->getHits()) {
451  // Watch for null hit (dead channels)
452  if (!hit2D) continue;
453 
454  reco::HitPairListPtr& removeHitList = hit2DToHit3DListMap[hit2D];
455 
456  // Don't allow all the 3D hits associated to this 2D hit to be rejected?
457  if (removeHitList.size() < 2) {
458  //std::cout << " ---> remove list too small, size: " << removeHitList.size() << " for hit: " << hit2D << ", pair.first: " << pair.first << std::endl;
459  rejectThisHit = false;
460  break;
461  }
462 
464  std::find(removeHitList.begin(), removeHitList.end(), hit3D);
465 
466  if (removeItr != removeHitList.end())
467  deleteVec.emplace_back(removeHitList, removeItr);
468  //else std::cout << "======>> Did not find 3D hit to remove from list for 2D hit! <<+++++++++" << std::endl;
469  }
470 
471  if (rejectThisHit) {
472  for (auto& rejectPair : deleteVec)
473  rejectPair.first.erase(rejectPair.second);
474 
475  usedHitPairList.push_back(hit3D);
476  }
477  }
478  }
479 
480  hitSet.insert(pair.first);
481  }
482 
483  // Now we go through the list of candidates and delete those which are unworthy of being processed...
484  if (!usedHitPairList.empty()) {
485  // Loop through the hits watching out for double counting
486  const reco::ClusterHit3D* lastHit3D = 0;
487 
488  for (const auto& hit3D : usedHitPairList) {
489  if (hit3D == lastHit3D) continue;
490 
492  std::find(hitPairVector.begin(), hitPairVector.end(), hit3D);
493 
494  if (hit3DItr != hitPairVector.end()) {
495  // Mark the hit
496  hit3D->setStatusBit(reco::ClusterHit3D::REJECTEDHIT);
497 
498  // Remove from the cluster's hit container
499  hitPairVector.erase(hit3DItr);
500 
501  // If the clustering algorithm includes edges then need to get rid of those as well
502  if (!clusterParams.getHit3DToEdgeMap().empty()) {
503  reco::Hit3DToEdgeMap& edgeMap = clusterParams.getHit3DToEdgeMap();
504 
505  edgeMap.erase(edgeMap.find(hit3D));
506  }
507  }
508 
509  lastHit3D = hit3D;
510  }
511  // removeUsedHitsFromMap(clusterParams, usedHitPairList, hit2DToClusterMap);
512  }
513 
514  // First stage of feature extraction runs here
515  m_pcaAlg.PCAAnalysis_3D(hitPairVector, clusterParams.getFullPCA());
516 
517  // Must have a valid pca
518  if (clusterParams.getFullPCA().getSvdOK()) {
519  // Set the skeleton PCA to make sure it has some value
520  clusterParams.getSkeletonPCA() = clusterParams.getFullPCA();
521 
522  // Add the "good" hits to our cluster parameters
523  for (const auto& hit2D : hitSet) {
524  hit2D->setStatusBit(reco::ClusterHit2D::USED);
525  clusterParams.UpdateParameters(hit2D);
526  }
527  }
528  }
529  }
530 
531  return;
532  }
533 
535  reco::HitPairListPtr& usedHitPairList,
536  reco::Hit2DToClusterMap& hit2DToClusterMap) const
537  {
538  // Clean up our hit to cluster map
539  for (const auto& hit3D : usedHitPairList) {
540  for (const auto& hit2D : hit3D->getHits()) {
541  if (!hit2D) continue;
542 
543  reco::Hit2DToClusterMap::iterator hitToClusMapItr = hit2DToClusterMap.find(hit2D);
544 
545  // I am pretty sure this can't happen but let's check anyway...
546  if (hitToClusMapItr == hit2DToClusterMap.end()) {
547  std::cout << "*********** COULD NOT FIND ENTRY FOR 2D HIT! **************" << std::endl;
548  break;
549  }
550 
551  // Ok, the same hit can be shared in the same cluster so must be careful here
552  // First loop over clusters looking for match
553  reco::ClusterToHitPairSetMap::iterator clusToHit3DMapItr =
554  hitToClusMapItr->second.find(&clusterParams);
555 
556  // This also can't happen
557  if (clusToHit3DMapItr == hitToClusMapItr->second.end()) {
558  std::cout << "************ DUCK! THE SKY HAS FALLEN!! *********" << std::endl;
559  break;
560  }
561 
562  // If this hit is shared by more than one 3D hit then pick the right one
563  if (clusToHit3DMapItr->second.size() > 1) {
564  reco::HitPairSetPtr::iterator hit3DItr = clusToHit3DMapItr->second.find(hit3D);
565 
566  clusToHit3DMapItr->second.erase(hit3DItr);
567  }
568  else
569  hitToClusMapItr->second.erase(clusToHit3DMapItr);
570  }
571  }
572 
573  return;
574  }
575 
577 } // namespace lar_cluster3d
bool keepThisCluster(reco::ClusterParameters &, const reco::Hit2DToClusterMap &) const
Is a cluster "good" and worth keeping?
intermediate_table::iterator iterator
void configure(const fhicl::ParameterSet &) override
bool getSvdOK() const
Definition: Cluster3D.h:240
ClusterParamsBuilder(fhicl::ParameterSet const &pset)
Constructor.
constexpr auto const & right(const_AssnsIter< L, R, D, Dir > const &a, const_AssnsIter< L, R, D, Dir > const &b)
Definition: AssnsIter.h:102
#define DEFINE_ART_CLASS_TOOL(tool)
Definition: ToolMacros.h:42
void PCAAnalysis_3D(const reco::HitPairListPtr &hitPairList, reco::PrincipalComponents &pca, bool skeletonOnly=false) const
void BuildClusterInfo(reco::ClusterParametersList &clusterParametersList) const override
Given the results of running DBScan, format the clusters so that they can be easily transferred back ...
reco::PrincipalComponents & getSkeletonPCA()
Definition: Cluster3D.h:465
reco::Hit2DToHit3DListMap & getHit2DToHit3DListMap()
Definition: Cluster3D.h:462
Hit has been rejected for any reason.
Definition: Cluster3D.h:96
reco::Hit3DToEdgeMap & getHit3DToEdgeMap()
Definition: Cluster3D.h:466
intermediate_table::const_iterator const_iterator
reco::HitPairListPtr & getHitPairListPtr()
Definition: Cluster3D.h:463
reco::PlaneToClusterParamsMap & getClusterParams()
Definition: Cluster3D.h:461
void removeUsedHitsFromMap(reco::ClusterParameters &, reco::HitPairListPtr &, reco::Hit2DToClusterMap &) const
decltype(auto) constexpr size(T &&obj)
ADL-aware version of std::size.
Definition: StdUtils.h:101
size_t m_clusterMinHits
Data members to follow.
reco::PrincipalComponents & getFullPCA()
Definition: Cluster3D.h:464
ClusterParamsBuilder class definiton.
void storeThisCluster(reco::ClusterParameters &, reco::Hit2DToClusterMap &) const
parameter set interface
std::unordered_map< const reco::ClusterHit2D *, ClusterToHitPairSetMap > Hit2DToClusterMap
Definition: Cluster3D.h:499
T get(std::string const &key) const
Definition: ParameterSet.h:314
std::unordered_map< const reco::ClusterHit2D *, reco::HitPairListPtr > Hit2DToHit3DListMap
Definition: Cluster3D.h:340
std::list< const reco::ClusterHit3D * > HitPairListPtr
Definition: Cluster3D.h:326
void UpdateParameters(const reco::ClusterHit2D *hit)
Definition: Cluster3D.h:440
Definition of data types for geometry description.
This header file defines the interface to a principal components analysis designed to be used within ...
constexpr auto const & left(const_AssnsIter< L, R, D, Dir > const &a, const_AssnsIter< L, R, D, Dir > const &b)
Definition: AssnsIter.h:94
decltype(auto) get(T &&obj)
ADL-aware version of std::to_string.
Definition: StdUtils.h:120
ClusterParamsBuilder class definiton.
std::unordered_map< const reco::ClusterHit3D *, reco::EdgeList > Hit3DToEdgeMap
Definition: Cluster3D.h:339
void FillClusterParams(reco::ClusterParameters &, reco::Hit2DToClusterMap &, double, double) const override
Fill the cluster parameters (expose to outside world for case of splitting/merging clusters) ...
std::list< ClusterParameters > ClusterParametersList
Definition: Cluster3D.h:393
decltype(auto) constexpr empty(T &&obj)
ADL-aware version of std::empty.
Definition: StdUtils.h:109