LArSoft  v09_90_00
Liquid Argon Software toolkit - https://larsoft.org/
DBScanAlg.cxx
Go to the documentation of this file.
1 //
3 // \file DBScanAlg.cxx
4 //
5 // kinga.partyka@yale.edu
6 //
7 // This algorithm finds clusters of hits, they can be of arbitrary shape.You need to specify 2(3) parameters:
8 // epsilon, epsilon2 and MinPoints as explained in the corresponding xml file.In my comments a 'point' reference
9 // appears quite often. A 'point' is basically a simple hit which only contains wire and time information. This
10 // algorithm is based on DBSCAN(Density Based Spatial Clustering of Applications with Noise): M. Ester, H.-P. Kriegel,
11 // J. Sander, and X. Xu, A density-based algorithm for discovering clusters in large spatial databases with noise,
12 // Second International Conference on Knowledge Discovery and Data Mining, pp. 226-231, AAAI Press. 1996.
13 // ( Some of this code is from "Antonio Gulli's coding playground")
15 
16 //Framework includes:
18 #include "fhiclcpp/ParameterSet.h"
20 
21 #include "RStarTree/RStarBoundingBox.h"
24 #include "larcorealg/CoreUtils/NumericUtils.h" // util::absDiff()
29 
30 #include <cmath>
31 #include <cstdlib>
32 
33 //----------------------------------------------------------
34 // RStarTree stuff
35 //----------------------------------------------------------
37 {
38  BoundingBox bb;
39  bb.edges[0].first = x - std::abs(dx);
40  bb.edges[0].second = x + std::abs(dx);
41 
42  bb.edges[1].first = y - std::abs(dy);
43  bb.edges[1].second = y + std::abs(dy);
44  return bb;
45 }
46 
47 //----------------------------------------------------------
48 // Set Visitor
49 //
50 // collects accepted leafs in the std::set sResult and the std::vector
51 // vResult
52 struct Visitor {
53  unsigned int count;
54  std::vector<unsigned int> vResult;
55  std::set<unsigned int> sResult;
56  const bool ContinueVisiting;
57  Visitor() : count(0), vResult(), sResult(), ContinueVisiting(true){};
58  void operator()(const RTree::Leaf* const leaf)
59  {
60  vResult.push_back(leaf->leaf);
61  sResult.insert(leaf->leaf);
62  count++;
63  }
64 };
65 
66 //----------------------------------------------------------
67 // Ellipse acceptor
68 //
69 // Roughly quivalent to what FindNeighbors was doing before, except
70 // that it doesn't handle dead wires
71 struct AcceptEllipse {
73  double r[2];
74  double c[2];
75  double d[2];
76  explicit AcceptEllipse(const BoundingBox& b, double r1, double r2) : m_bound(b), r(), c()
77  {
78  r[0] = r1;
79  r[1] = r2;
80  c[0] = (m_bound.edges[0].second + m_bound.edges[0].first) / 2.0;
81  c[1] = (m_bound.edges[1].second + m_bound.edges[1].first) / 2.0;
82  d[0] = (m_bound.edges[0].second - m_bound.edges[0].first) / 2.0;
83  d[1] = (m_bound.edges[1].second - m_bound.edges[1].first) / 2.0;
84  }
85  bool operator()(const RTree::Node* const node) const
86  {
87  // At the node level we use a rectangualr overlap condition
88  return m_bound.overlaps(node->bound);
89  }
90  bool operator()(const RTree::Leaf* const leaf) const
91  {
92  // At the leaf level we have to more careful
93  double C[2], D[2];
94  C[0] = (leaf->bound.edges[0].second + leaf->bound.edges[0].first) / 2.0;
95  C[1] = (leaf->bound.edges[1].second + leaf->bound.edges[1].first) / 2.0;
96  D[0] = (leaf->bound.edges[0].second - leaf->bound.edges[0].first) / 2.0;
97  D[1] = (leaf->bound.edges[1].second - leaf->bound.edges[1].first) / 2.0;
98  double t = 0;
99  for (int i = 0; i < 2; ++i) {
100  // This is only approximate, it will accept a few classes of
101  // non-overlapping ellipses
102  t += ((c[i] - C[i]) * (c[i] - C[i])) / ((d[i] + D[i]) * (d[i] + D[i]));
103  }
104  return (t < 1);
105  }
106 
107 private:
108  static const BoundingBox EmptyBoundingBox; // for uninitialized bounds
109 
110  AcceptEllipse() : m_bound(EmptyBoundingBox), r(), c()
111  {
112  r[0] = r[1] = 1.0;
113  c[0] = (m_bound.edges[0].second - m_bound.edges[0].first) / 2.0;
114  c[1] = (m_bound.edges[1].second - m_bound.edges[1].first) / 2.0;
115  }
116 };
117 
119 
120 //----------------------------------------------------------
121 // FindNeighbors acceptor
122 //
123 // Exactly equivalent to what FindNeighbors was doing before (assuming
124 // that points are preresented with no width in wire-space and with
125 // width in time-space
126 //
127 // We're going to make this work with nodal bounding boxes by
128 // accepting on center-inside the box or comparing to the nearest
129 // point on the edge using the point-to-point comparison function and
130 // a the maximum time-width.
133  double fEps[2];
134  double fMaxWidth;
135  double fWireDist;
136  std::vector<unsigned int>& fBadWireSum;
138  double eps,
139  double eps2,
140  double maxWidth,
141  double wireDist,
142  std::vector<unsigned int>& badWireSum)
143  : fBound(b), fEps(), fMaxWidth(maxWidth), fWireDist(wireDist), fBadWireSum(badWireSum)
144  {
145  fEps[0] = eps;
146  fEps[1] = eps2;
147  }
148  // return a point-like box at the center of b
150  {
151  BoundingBox c;
152  c.edges[0].first = c.edges[0].second = (b.edges[0].first + b.edges[0].second) / 2.0;
153  c.edges[1].first = c.edges[1].second = (b.edges[1].first + b.edges[1].second) / 2.0;
154  return c;
155  }
156  BoundingBox center() const { return center(fBound); }
157  // Here we implement the findNeighbor algorithm from point to point
158  bool isNear(const BoundingBox& b) const
159  {
160  // Precomupation of a few things...box centers, wire bridging
161  // quantities, etc...
162  double bCenter0 = center(b).edges[0].first;
163  double bCenter1 = center(b).edges[1].first;
164  double tCenter0 = center().edges[0].first; // "t" is for test-point
165  double tCenter1 = center().edges[1].first;
166  // widths in the time direction
167  double bWidth = std::abs(b.edges[1].second - b.edges[1].first);
168  double tWidth = std::abs(fBound.edges[1].second - fBound.edges[1].first);
169  // bad channel counting
170  unsigned int wire1 = (unsigned int)(tCenter0 / fWireDist + 0.5);
171  unsigned int wire2 = (unsigned int)(bCenter0 / fWireDist + 0.5);
172  // Clamp the wize number to something resonably.
174  if (wire1 < fBadWireSum.size()) wire1 = fBadWireSum.size();
175  if (wire2 < fBadWireSum.size()) wire2 = fBadWireSum.size();
176  // The getSimilarity[2] wirestobridge calculation is asymmetric,
177  // but is plugged into the cache symmetrically.I am assuming that
178  // this is OK because the wires that are hit cannot be bad.
179  unsigned int wirestobridge = lar::util::absDiff(fBadWireSum[wire1], fBadWireSum[wire2]);
180  double cmtobridge = wirestobridge * fWireDist;
181 
182  double sim = std::abs(tCenter0 - bCenter0) - cmtobridge;
183  sim *= sim; // square it
184 
185  if (std::abs(tCenter0 - bCenter0) > 1e-10) {
186  cmtobridge *= std::abs((tCenter1 - bCenter1) / (tCenter0 - bCenter0));
187  }
188  double sim2 = std::abs(tCenter1 - bCenter1) - cmtobridge;
189  sim2 *= sim2; // square it
190 
191  double k = 0.1;
192  double WFactor = (exp(4.6 * ((tWidth * tWidth) + (bWidth * bWidth)))) * k; // ??
193  // We clamp WFactor on [ 1.0, 6.25 ]
194  if (WFactor < 1.0) WFactor = 1.0;
195  if (WFactor > 6.25) WFactor = 6.25;
196 
197  // Now we implement the test...see FindNeighbors
198  return (((sim) / (fEps[0] * fEps[0])) + ((sim2) / (fEps[1] * fEps[1] * (WFactor))) <= 1);
199  }
201  {
202  BoundingBox n;
203  BoundingBox c = center();
204  for (int i = 0; i < 2; ++i) {
205  // The work for finding the nearest point is the same in both
206  // dimensions
207  if (b.edges[i].first > c.edges[i].second) {
208  // Our point is lower than the low edge of the box
209  n.edges[i].first = n.edges[i].second = b.edges[i].first;
210  }
211  else if (b.edges[0].second < c.edges[0].first) {
212  // Our point is higher than the high edge of the box
213  n.edges[i].first = n.edges[i].second = b.edges[i].second;
214  }
215  else {
216  // In this dimension our point lies within the boxes bounds
217  n.edges[i].first = n.edges[i].second = c.edges[i].first;
218  }
219  }
220  // Now give the time dimension a width
221  n.edges[1].first -= fMaxWidth / 2.0;
222  n.edges[1].second += fMaxWidth / 2.0;
223  return n;
224  }
225  bool operator()(const RTree::Node* const node) const
226  {
227  // if the our point overlaps the bounding box, accept immediately
228  if (fBound.overlaps(node->bound)) return true;
229  // No overlap, so compare to the nearest point on the bounding box
230  // under the assumption that the maximum width applies for that
231  // point
232  return isNear(nearestPoint(node->bound));
233  }
234  bool operator()(const RTree::Leaf* const leaf) const { return isNear(leaf->bound); }
235 };
236 
237 namespace cluster {
238  const unsigned int kNO_CLUSTER = UINT_MAX;
239  const unsigned int kNOISE_CLUSTER = UINT_MAX - 1;
240 }
241 
242 //----------------------------------------------------------
243 // DBScanAlg stuff
244 //----------------------------------------------------------
246 {
247  fEps = p.get<double>("eps");
248  fEps2 = p.get<double>("epstwo");
249  fMinPts = p.get<int>("minPts");
250  fClusterMethod = p.get<int>("Method");
251  fDistanceMetric = p.get<int>("Metric");
252 }
253 
254 //----------------------------------------------------------
256  const detinfo::DetectorPropertiesData& detProp,
257  const std::vector<art::Ptr<recob::Hit>>& allhits,
258  std::set<uint32_t> badChannels,
259  const std::vector<geo::WireID>& wireids)
260 {
261  if (wireids.size() && wireids.size() != allhits.size()) {
262  throw cet::exception("DBScanAlg") << "allhits size = " << allhits.size()
263  << " wireids size = " << wireids.size() << " do not match\n";
264  }
265  // clear all the data member vectors for the new set of hits
266  fps.clear();
267  fpointId_to_clusterId.clear();
268  fnoise.clear();
269  fvisited.clear();
270  fsim.clear();
271  fsim2.clear();
272  fsim3.clear();
273  fclusters.clear();
274  fWirePitch.clear();
275 
276  fBadChannels = badChannels;
277  fBadWireSum.clear();
278 
279  // Clear the RTree
280  fRTree.Remove(RTree::AcceptAny(), RTree::RemoveLeaf());
281  // and the bounds list
282  fRect.clear();
283 
284  //------------------------------------------------------------------
285  // Determine spacing between wires (different for each detector)
287 
289  constexpr geo::TPCID tpcid{0, 0};
290  for (auto const& plane : geom->Iterate<geo::PlaneGeo>(tpcid))
291  fWirePitch.push_back(plane.WirePitch());
292 
293  // Collect the bad wire list into a useful form
294  if (fClusterMethod) { // Using the R*-tree
295  fBadWireSum.resize(geom->Nchannels());
296  unsigned int count = 0;
297  for (unsigned int i = 0; i < fBadWireSum.size(); ++i) {
298  count += fBadChannels.count(i);
299  fBadWireSum[i] = count;
300  }
301  }
302 
303  // Collect the hits in a useful form,
304  // and take note of the maximum time width
305  fMaxWidth = 0.0;
306  for (unsigned int j = 0; j < allhits.size(); ++j) {
307  int dims = 3; //our point is defined by 3 elements:wire#,center of the hit, and the hit width
308  std::vector<double> p(dims);
309 
310  double tickToDist = detProp.DriftVelocity(detProp.Efield(), detProp.Temperature());
311  tickToDist *= 1.e-3 * sampling_rate(clockData); // 1e-3 is conversion of 1/us to 1/ns
312  if (!wireids.size())
313  p[0] = (allhits[j]->WireID().Wire) * fWirePitch[allhits[j]->WireID().Plane];
314  else
315  p[0] = (wireids[j].Wire) * fWirePitch[allhits[j]->WireID().Plane];
316  p[1] = allhits[j]->PeakTime() * tickToDist;
317  p[2] = 2. * allhits[j]->RMS() * tickToDist; //width of a hit in cm
318 
319  // check on the maximum width condition
320  if (p[2] > fMaxWidth) fMaxWidth = p[2];
321 
322  fps.push_back(p);
323 
324  if (fClusterMethod) { // Using the R*-tree
325  // Convert these same values into dbsPoints to feed into the R*-tree
326  dbsPoint pp(p[0], p[1], 0.0, p[2] / 2.0); // note dividing by two
327  fRTree.Insert(j, pp.bounds());
328  // Keep a parallel list already made up. We could use fps instead, but...
329  fRect.push_back(pp);
330  }
331  }
332 
333  fpointId_to_clusterId.resize(fps.size(), kNO_CLUSTER); // Not zero as before!
334  fnoise.resize(fps.size(), false);
335  fvisited.resize(fps.size(), false);
336 
337  if (fClusterMethod) { // Using the R*-tree
338  Visitor visitor = fRTree.Query(RTree::AcceptAny(), Visitor());
339  mf::LogInfo("DBscan") << "InitScan: hits RTree loaded with " << visitor.count << " items.";
340  }
341  mf::LogInfo("DBscan") << "InitScan: hits vector size is " << fps.size();
342 
343  return;
344 }
345 
346 //----------------------------------------------------------
347 double cluster::DBScanAlg::getSimilarity(const std::vector<double> v1, const std::vector<double> v2)
348 {
349 
350  //for Euclidean distance comment everything out except this-->>>
351  // return std::sqrt((v2[1]-v1[1])*(v2[1]-v1[1])+(v2[0]-v1[0])*(v2[0]-v1[0]));
352  //------------------------------------------------------------------------
353  // return std::abs( v2[0]-v1[0]); //for rectangle
354  //----------------------------------------------------------------------
355  //Manhattan distance:
356  //return std::abs(v1[0]-v2[0])+std::abs(v1[1]-v2[1]);
357 
359  double wire_dist = fWirePitch[0];
360 
361  unsigned int wire1 =
362  (unsigned int)(v1[0] / wire_dist + 0.5); //to make sure to get desired integer
363  unsigned int wire2 = (unsigned int)(v2[0] / wire_dist + 0.5);
364  int wirestobridge = 0;
365 
366  if (wire1 > wire2) {
367  unsigned int wire = wire1;
368  wire1 = wire2;
369  wire2 = wire;
370  }
371 
372  for (unsigned int i = wire1; i < wire2; i++) {
373  if (fBadChannels.find(i) != fBadChannels.end()) wirestobridge++;
374  }
375 
376  double cmtobridge = wirestobridge * wire_dist;
377  //---------------------------------------------------------------------
378  return ((std::abs(v2[0] - v1[0]) - cmtobridge) *
379  (std::abs(v2[0] - v1[0]) - cmtobridge)); //for ellipse
380 }
381 
382 //----------------------------------------------------------------
383 double cluster::DBScanAlg::getSimilarity2(const std::vector<double> v1,
384  const std::vector<double> v2)
385 {
386 
387  //-------------------------------------------
388  //return std::abs( v2[1]-v1[1]);//for rectangle
389  //------------------------------------------
390 
392  double wire_dist = fWirePitch[0];
393 
394  unsigned int wire1 =
395  (unsigned int)(v1[0] / wire_dist + 0.5); //to make sure to get desired integer
396  unsigned int wire2 = (unsigned int)(v2[0] / wire_dist + 0.5);
397  int wirestobridge = 0;
398 
399  if (wire1 > wire2) {
400  unsigned int wire = wire1;
401  wire1 = wire2;
402  wire2 = wire;
403  }
404 
405  for (unsigned int i = wire1; i < wire2; i++) {
406  if (fBadChannels.find(i) != fBadChannels.end()) wirestobridge++;
407  }
408 
409  double cmtobridge = wirestobridge * wire_dist;
410 
411  if (std::abs(v2[0] - v1[0]) > 1e-10) {
412  cmtobridge *= std::abs((v2[1] - v1[1]) / (v2[0] - v1[0]));
413  }
414  else
415  cmtobridge = 0;
416 
417  return ((std::abs(v2[1] - v1[1]) - cmtobridge) *
418  (std::abs(v2[1] - v1[1]) - cmtobridge)); //for ellipse
419 }
420 
421 //----------------------------------------------------------------
422 double cluster::DBScanAlg::getWidthFactor(const std::vector<double> v1,
423  const std::vector<double> v2)
424 {
425 
426  //double k=0.13; //this number was determined by looking at flat muon hits' widths.
427  //The average width of these hits in cm is 0.505, so 4*2*(w1^2)=2.04
428  //where w1=w2=0.505, e^2.044= 7.69. In order not to change the distance
429  //in time direction of the ellipse we want to make it equal to 1 for
430  //these hits. Thus the k factor is k=1/7.69=0.13//for coeff=4
431 
432  //..................................................
433  double k = 0.1; //for 4.5 coeff
434  double WFactor = (exp(4.6 * ((v1[2] * v1[2]) + (v2[2] * v2[2])))) * k;
435  //........................................................
436  // Let's try something different:
437  if (WFactor > 1) {
438  if (WFactor < 6.25)
439  return WFactor; //remember that we are increasing the distance in
440  //eps2 as std::sqrt of this number (i.e std::sqrt(6.25))
441  else
442  return 6.25;
443  }
444  else
445  return 1.0;
446 }
447 
448 //----------------------------------------------------------------
449 //\todo this is O(n) in the number of hits, while the high performance
450 // claimed for DBSCAN relies on it being O(log n)!
451 std::vector<unsigned int> cluster::DBScanAlg::findNeighbors(unsigned int pid,
452  double threshold,
453  double threshold2)
454 {
455  std::vector<unsigned int> ne;
456 
457  for (int unsigned j = 0; j < fsim.size(); j++) {
458  if ((pid != j) &&
459  (((fsim[pid][j]) / (threshold * threshold)) +
460  ((fsim2[pid][j]) / (threshold2 * threshold2 * (fsim3[pid][j])))) < 1) { //ellipse
461  ne.push_back(j);
462  }
463  } // end loop over fsim
464 
465  return ne;
466 }
467 
468 //-----------------------------------------------------------------
470 {
471  int size = fps.size();
472  fsim.resize(size, std::vector<double>(size));
473  for (int i = 0; i < size; i++) {
474  for (int j = i + 1; j < size; j++) {
475  fsim[j][i] = fsim[i][j] = getSimilarity(fps[i], fps[j]);
476  }
477  }
478 }
479 
480 //------------------------------------------------------------------
482 {
483  int size = fps.size();
484  fsim2.resize(size, std::vector<double>(size));
485  for (int i = 0; i < size; i++) {
486  for (int j = i + 1; j < size; j++) {
487  fsim2[j][i] = fsim2[i][j] = getSimilarity2(fps[i], fps[j]);
488  }
489  }
490 }
491 
492 //------------------------------------------------------------------
494 {
495  int size = fps.size();
496  fsim3.resize(size, std::vector<double>(size));
497 
498  for (int i = 0; i < size; i++) {
499  for (int j = i + 1; j < size; j++) {
500  fsim3[j][i] = fsim3[i][j] = getWidthFactor(fps[i], fps[j]);
501  }
502  }
503 }
504 
505 //----------------------------------------------------------------
507 // This is the algorithm that finds clusters:
508 // Run the selected clustering algorithm
510 {
511  switch (fClusterMethod) {
512  case 2: return run_dbscan_cluster();
513  case 1: return run_FN_cluster();
514  default:
515  computeSimilarity(); // watch out for this, they are *slow*
516  computeSimilarity2(); // "
517  computeWidthFactor(); // "
518  return run_FN_naive_cluster();
519  }
520 }
521 
522 //----------------------------------------------------------------
524 // This is the algorithm that finds clusters:
525 //
526 // DWM's implementation of DBScanAlg as much like the paper as possible
528 {
529  unsigned int cid = 0;
530  // foreach pid
531  for (size_t pid = 0; pid < fps.size(); pid++) {
532  // not already visited
533  if (fpointId_to_clusterId[pid] == kNO_CLUSTER) {
534  if (ExpandCluster(pid, cid)) { cid++; }
535  } // if (!visited
536  } // for
537  // END DBSCAN
538 
539  // Construct clusters, count noise, etc..
540  int noise = 0;
541  fclusters.resize(cid);
542  for (size_t y = 0; y < fpointId_to_clusterId.size(); ++y) {
543  if (fpointId_to_clusterId[y] == kNO_CLUSTER) {
544  // This shouldn't happen...all points should be clasified by now!
545  mf::LogWarning("DBscan") << "Unclassified point!";
546  }
547  else if (fpointId_to_clusterId[y] == kNOISE_CLUSTER) {
548  ++noise;
549  }
550  else {
551  unsigned int c = fpointId_to_clusterId[y];
552  if (c >= cid) {
553  mf::LogWarning("DBscan") << "Point in cluster " << c << " when only " << cid
554  << " clusters wer found [0-" << cid - 1 << "]";
555  }
556  fclusters[c].push_back(y);
557  }
558  }
559  mf::LogInfo("DBscan") << "DWM (R*-tree): Found " << cid << " clusters...";
560  for (unsigned int c = 0; c < cid; ++c) {
561  mf::LogVerbatim("DBscan") << "\t"
562  << "Cluster " << c << ":\t" << fclusters[c].size();
563  }
564  mf::LogVerbatim("DBscan") << "\t"
565  << "...and " << noise << " noise points.";
566 }
567 
568 //----------------------------------------------------------------
569 // Find the neighbos of the given point
570 std::set<unsigned int> cluster::DBScanAlg::RegionQuery(unsigned int point)
571 {
572  dbsPoint region(fRect[point]);
573  Visitor visitor = fRTree.Query(AcceptFindNeighbors(region.bounds(),
574  fEps,
575  fEps2,
576  fMaxWidth,
577  fWirePitch[0], //\todo
578  fBadWireSum), // assumes
579  Visitor()); // equal
580  // pitch
581  return visitor.sResult;
582 }
583 //----------------------------------------------------------------
584 // Find the neighbos of the given point
585 std::vector<unsigned int> cluster::DBScanAlg::RegionQuery_vector(unsigned int point)
586 {
587  dbsPoint region(fRect[point]);
588  Visitor visitor = fRTree.Query(AcceptFindNeighbors(region.bounds(),
589  fEps,
590  fEps2,
591  fMaxWidth,
592  fWirePitch[0], //\todo
593  fBadWireSum), // assumes
594  Visitor()); // equal
595  // pitch
596  std::vector<unsigned int>& v = visitor.vResult;
597  // find neighbors insures that the called point is not in the
598  // returned and this is intended as a drop-in replacement, so insure
599  // this condition
600  v.erase(std::remove(v.begin(), v.end(), point), v.end());
601  return v;
602 }
603 
604 //----------------------------------------------------------------
605 // Try to make a new cluster on the basis of point
606 bool cluster::DBScanAlg::ExpandCluster(unsigned int point, unsigned int clusterID)
607 {
608  /* GetSetOfPoints for point*/
609  std::set<unsigned int> seeds = RegionQuery(point);
610 
611  // not enough support -> mark as noise
612  if (seeds.size() < fMinPts) {
613  fpointId_to_clusterId[point] = kNOISE_CLUSTER;
614  return false;
615  }
616  else {
617  // Add to the currecnt cluster
618  fpointId_to_clusterId[point] = clusterID;
619  for (std::set<unsigned int>::iterator itr = seeds.begin(); itr != seeds.end(); itr++) {
620  fpointId_to_clusterId[*itr] = clusterID;
621  }
622  seeds.erase(point);
623  while (!seeds.empty()) {
624  unsigned int currentP = *(seeds.begin());
625  std::set<unsigned int> result = RegionQuery(currentP);
626 
627  if (result.size() >= fMinPts) {
628  for (std::set<unsigned int>::iterator itr = result.begin(); itr != result.end(); itr++) {
629  unsigned int resultP = *itr;
630  // not already assigned to a cluster
631  if (fpointId_to_clusterId[resultP] == kNO_CLUSTER ||
632  fpointId_to_clusterId[resultP] == kNOISE_CLUSTER) {
633  if (fpointId_to_clusterId[resultP] == kNO_CLUSTER) { seeds.insert(resultP); }
634  fpointId_to_clusterId[resultP] = clusterID;
635  } // unclassified or noise
636  } // for
637  } // enough support
638  seeds.erase(currentP);
639  } // while
640  return true;
641  }
642 }
643 
644 //----------------------------------------------------------------
646 // This is the algorithm that finds clusters:
647 //
648 // The original findNeignbor based code converted to use a R*-tree,
649 // but not rearranged
651 {
652 
653  unsigned int cid = 0;
654  // foreach pid
655  for (size_t pid = 0; pid < fps.size(); pid++) {
656  // not already visited
657  if (!fvisited[pid]) {
658 
659  fvisited[pid] = true;
660  // get the neighbors
661  std::vector<unsigned int> ne = RegionQuery_vector(pid);
662 
663  // not enough support -> mark as noise
664  if (ne.size() < fMinPts) { fnoise[pid] = true; }
665  else {
666  // Add p to current cluster
667 
668  std::vector<unsigned int> c; // a new cluster
669 
670  c.push_back(pid); // assign pid to cluster
671  fpointId_to_clusterId[pid] = cid;
672  // go to neighbors
673  for (size_t i = 0; i < ne.size(); ++i) {
674  unsigned int nPid = ne[i];
675 
676  // not already visited
677  if (!fvisited[nPid]) {
678  fvisited[nPid] = true;
679  // go to neighbors
680  std::vector<unsigned int> ne1 = RegionQuery_vector(nPid);
681  // enough support
682  if (ne1.size() >= fMinPts) {
683 
684  // join
685 
686  for (size_t i = 0; i < ne1.size(); ++i) {
687  // join neighbord
688  ne.push_back(ne1[i]);
689  }
690  }
691  }
692 
693  // not already assigned to a cluster
694  if (fpointId_to_clusterId[nPid] == kNO_CLUSTER) {
695  c.push_back(nPid);
696  fpointId_to_clusterId[nPid] = cid;
697  }
698  }
699 
700  fclusters.push_back(c);
701 
702  cid++;
703  }
704  } // if (!visited
705  } // for
706 
707  int noise = 0;
708  // no_hits=fnoise.size();
709 
710  for (size_t y = 0; y < fpointId_to_clusterId.size(); ++y) {
711  if (fpointId_to_clusterId[y] == kNO_CLUSTER) noise++;
712  }
713  mf::LogInfo("DBscan") << "FindNeighbors (R*-tree): Found " << cid << " clusters...";
714  for (unsigned int c = 0; c < cid; ++c) {
715  mf::LogVerbatim("DBscan") << "\t"
716  << "Cluster " << c << ":\t" << fclusters[c].size();
717  }
718  mf::LogVerbatim("DBscan") << "\t"
719  << "...and " << noise << " noise points.";
720 }
721 
722 //----------------------------------------------------------------
724 // This is the algorithm that finds clusters:
725 //
726 // The original findNeighrbor-based code.
728 {
729 
730  unsigned int cid = 0;
731  // foreach pid
732  for (size_t pid = 0; pid < fps.size(); ++pid) {
733  // not already visited
734  if (!fvisited[pid]) {
735 
736  fvisited[pid] = true;
737  // get the neighbors
738  std::vector<unsigned int> ne = findNeighbors(pid, fEps, fEps2);
739 
740  // not enough support -> mark as noise
741  if (ne.size() < fMinPts) { fnoise[pid] = true; }
742  else {
743  // Add p to current cluster
744 
745  std::vector<unsigned int> c; // a new cluster
746 
747  c.push_back(pid); // assign pid to cluster
748  fpointId_to_clusterId[pid] = cid;
749  // go to neighbors
750  for (size_t i = 0; i < ne.size(); ++i) {
751  unsigned int nPid = ne[i];
752 
753  // not already visited
754  if (!fvisited[nPid]) {
755  fvisited[nPid] = true;
756  // go to neighbors
757  std::vector<unsigned int> ne1 = findNeighbors(nPid, fEps, fEps2);
758  // enough support
759  if (ne1.size() >= fMinPts) {
760 
761  // join
762 
763  for (unsigned int i = 0; i < ne1.size(); i++) {
764  // join neighbord
765  ne.push_back(ne1[i]);
766  }
767  }
768  }
769 
770  // not already assigned to a cluster
771  if (fpointId_to_clusterId[nPid] == kNO_CLUSTER) {
772  c.push_back(nPid);
773  fpointId_to_clusterId[nPid] = cid;
774  }
775  }
776 
777  fclusters.push_back(c);
778 
779  cid++;
780  }
781  } // if (!visited
782  } // for
783 
784  int noise = 0;
785 
786  for (size_t y = 0; y < fpointId_to_clusterId.size(); ++y) {
787  if (fpointId_to_clusterId[y] == kNO_CLUSTER) ++noise;
788  }
789  mf::LogInfo("DBscan") << "FindNeighbors (naive): Found " << cid << " clusters...";
790  for (unsigned int c = 0; c < cid; ++c) {
791  mf::LogVerbatim("DBscan") << "\t"
792  << "Cluster " << c << ":\t" << fclusters[c].size() << " points";
793  }
794  mf::LogVerbatim("DBscan") << "\t"
795  << "...and " << noise << " noise points.";
796 }
TRandom r
Definition: spectrum.C:23
intermediate_table::iterator iterator
details::range_type< T > Iterate() const
Initializes the specified ID with the ID of the first cryostat.
Definition: GeometryCore.h:541
MaybeLogger_< ELseverityLevel::ELsev_info, true > LogVerbatim
double getSimilarity(const std::vector< double > v1, const std::vector< double > v2)
Definition: DBScanAlg.cxx:347
const unsigned int kNO_CLUSTER
Definition: DBScanAlg.cxx:238
Functions to help with numbers.
Utilities related to art service access.
unsigned int count
Definition: DBScanAlg.cxx:53
void computeSimilarity()
Definition: DBScanAlg.cxx:469
void computeWidthFactor()
Definition: DBScanAlg.cxx:493
MaybeLogger_< ELseverityLevel::ELsev_info, false > LogInfo
bool operator()(const RTree::Leaf *const leaf) const
Definition: DBScanAlg.cxx:234
Declaration of signal hit object.
AcceptFindNeighbors(const BoundingBox &b, double eps, double eps2, double maxWidth, double wireDist, std::vector< unsigned int > &badWireSum)
Definition: DBScanAlg.cxx:137
double Temperature() const
In kelvin.
std::vector< unsigned int > & fBadWireSum
Definition: DBScanAlg.cxx:136
static const BoundingBox EmptyBoundingBox
Definition: DBScanAlg.cxx:108
constexpr auto abs(T v)
Returns the absolute value of the argument.
Cluster finding and building.
void run_dbscan_cluster()
Definition: DBScanAlg.cxx:527
std::set< unsigned int > RegionQuery(unsigned int point)
Definition: DBScanAlg.cxx:570
double dy
Definition: DBScanAlg.h:37
void run_FN_naive_cluster()
Definition: DBScanAlg.cxx:727
bool isNear(const BoundingBox &b) const
Definition: DBScanAlg.cxx:158
unsigned int Nchannels() const
Returns the number of TPC readout channels in the detector.
unsigned int noise()
Definition: chem4.cc:261
double Efield(unsigned int planegap=0) const
kV/cm
bool operator()(const RTree::Leaf *const leaf) const
Definition: DBScanAlg.cxx:90
decltype(auto) constexpr size(T &&obj)
ADL-aware version of std::size.
Definition: StdUtils.h:101
auto vector(Vector const &v)
Returns a manipulator which will print the specified array.
Definition: DumpUtils.h:289
const unsigned int kNOISE_CLUSTER
Definition: DBScanAlg.cxx:239
IDparameter< geo::WireID > WireID
Member type of validated geo::WireID parameter.
double dx
Definition: DBScanAlg.h:37
RTree::BoundingBox BoundingBox
Definition: DBScanAlg.h:33
std::set< unsigned int > sResult
Definition: DBScanAlg.cxx:55
const BoundingBox & fBound
Definition: DBScanAlg.cxx:132
void operator()(const RTree::Leaf *const leaf)
Definition: DBScanAlg.cxx:58
BoundingBox bounds() const
Definition: DBScanAlg.cxx:36
T get(std::string const &key) const
Definition: ParameterSet.h:314
Geometry information for a single wire plane.The plane is represented in the geometry by a solid whic...
Definition: PlaneGeo.h:78
const BoundingBox & m_bound
Definition: DBScanAlg.cxx:72
BoundingBox center(const BoundingBox &b) const
Definition: DBScanAlg.cxx:149
Float_t d
Definition: plot.C:235
bool operator()(const RTree::Node *const node) const
Definition: DBScanAlg.cxx:85
std::vector< unsigned int > vResult
Definition: DBScanAlg.cxx:54
double DriftVelocity(double efield=0., double temperature=0.) const
cm/us
The data type to uniquely identify a TPC.
Definition: geo_types.h:381
DBScanAlg(fhicl::ParameterSet const &pset)
Definition: DBScanAlg.cxx:245
Monte Carlo Simulation.
double getWidthFactor(const std::vector< double > v1, const std::vector< double > v2)
Definition: DBScanAlg.cxx:422
bool operator()(const RTree::Node *const node) const
Definition: DBScanAlg.cxx:225
void computeSimilarity2()
Definition: DBScanAlg.cxx:481
std::vector< unsigned int > findNeighbors(unsigned int pid, double threshold, double threshold2)
Definition: DBScanAlg.cxx:451
AcceptEllipse(const BoundingBox &b, double r1, double r2)
Definition: DBScanAlg.cxx:76
std::vector< TrajPoint > seeds
Definition: DataStructs.cxx:14
std::vector< unsigned int > RegionQuery_vector(unsigned int point)
Definition: DBScanAlg.cxx:585
void InitScan(const detinfo::DetectorClocksData &clockData, const detinfo::DetectorPropertiesData &detProp, const std::vector< art::Ptr< recob::Hit >> &allhits, std::set< uint32_t > badChannels, const std::vector< geo::WireID > &wireids=std::vector< geo::WireID >())
Definition: DBScanAlg.cxx:255
bool ExpandCluster(unsigned int point, unsigned int clusterID)
Definition: DBScanAlg.cxx:606
Contains all timing reference information for the detector.
constexpr auto absDiff(A const &a, B const &b)
Returns the absolute value of the difference between two values.
Definition: NumericUtils.h:69
MaybeLogger_< ELseverityLevel::ELsev_warning, false > LogWarning
double x
Definition: DBScanAlg.h:36
Char_t n[5]
double y
Definition: DBScanAlg.h:36
double getSimilarity2(const std::vector< double > v1, const std::vector< double > v2)
Definition: DBScanAlg.cxx:383
Float_t e
Definition: plot.C:35
double sampling_rate(DetectorClocksData const &data)
Returns the period of the TPC readout electronics clock.
BoundingBox center() const
Definition: DBScanAlg.cxx:156
art framework interface to geometry description
cet::coded_exception< error, detail::translate > exception
Definition: exception.h:33
const bool ContinueVisiting
Definition: DBScanAlg.cxx:56
BoundingBox nearestPoint(const BoundingBox &b) const
Definition: DBScanAlg.cxx:200