LArSoft  v10_04_05
Liquid Argon Software toolkit - https://larsoft.org/
DBScanAlg.cxx
Go to the documentation of this file.
1 //
3 // \file DBScanAlg.cxx
4 //
5 // kinga.partyka@yale.edu
6 //
7 // This algorithm finds clusters of hits, they can be of arbitrary shape.You need to specify 2(3) parameters:
8 // epsilon, epsilon2 and MinPoints as explained in the corresponding xml file.In my comments a 'point' reference
9 // appears quite often. A 'point' is basically a simple hit which only contains wire and time information. This
10 // algorithm is based on DBSCAN(Density Based Spatial Clustering of Applications with Noise): M. Ester, H.-P. Kriegel,
11 // J. Sander, and X. Xu, A density-based algorithm for discovering clusters in large spatial databases with noise,
12 // Second International Conference on Knowledge Discovery and Data Mining, pp. 226-231, AAAI Press. 1996.
13 // ( Some of this code is from "Antonio Gulli's coding playground")
15 
16 //Framework includes:
18 #include "fhiclcpp/ParameterSet.h"
20 
21 #include "RStarTree/RStarBoundingBox.h"
25 #include "larcorealg/CoreUtils/NumericUtils.h" // util::absDiff()
30 
31 #include <cmath>
32 #include <cstdlib>
33 
34 //----------------------------------------------------------
35 // RStarTree stuff
36 //----------------------------------------------------------
38 {
39  BoundingBox bb;
40  bb.edges[0].first = x - std::abs(dx);
41  bb.edges[0].second = x + std::abs(dx);
42 
43  bb.edges[1].first = y - std::abs(dy);
44  bb.edges[1].second = y + std::abs(dy);
45  return bb;
46 }
47 
48 //----------------------------------------------------------
49 // Set Visitor
50 //
51 // collects accepted leafs in the std::set sResult and the std::vector
52 // vResult
53 struct Visitor {
54  unsigned int count;
55  std::vector<unsigned int> vResult;
56  std::set<unsigned int> sResult;
57  const bool ContinueVisiting;
58  Visitor() : count(0), vResult(), sResult(), ContinueVisiting(true){};
59  void operator()(const RTree::Leaf* const leaf)
60  {
61  vResult.push_back(leaf->leaf);
62  sResult.insert(leaf->leaf);
63  count++;
64  }
65 };
66 
67 //----------------------------------------------------------
68 // Ellipse acceptor
69 //
70 // Roughly quivalent to what FindNeighbors was doing before, except
71 // that it doesn't handle dead wires
72 struct AcceptEllipse {
74  double r[2];
75  double c[2];
76  double d[2];
77  explicit AcceptEllipse(const BoundingBox& b, double r1, double r2) : m_bound(b), r(), c()
78  {
79  r[0] = r1;
80  r[1] = r2;
81  c[0] = (m_bound.edges[0].second + m_bound.edges[0].first) / 2.0;
82  c[1] = (m_bound.edges[1].second + m_bound.edges[1].first) / 2.0;
83  d[0] = (m_bound.edges[0].second - m_bound.edges[0].first) / 2.0;
84  d[1] = (m_bound.edges[1].second - m_bound.edges[1].first) / 2.0;
85  }
86  bool operator()(const RTree::Node* const node) const
87  {
88  // At the node level we use a rectangualr overlap condition
89  return m_bound.overlaps(node->bound);
90  }
91  bool operator()(const RTree::Leaf* const leaf) const
92  {
93  // At the leaf level we have to more careful
94  double C[2], D[2];
95  C[0] = (leaf->bound.edges[0].second + leaf->bound.edges[0].first) / 2.0;
96  C[1] = (leaf->bound.edges[1].second + leaf->bound.edges[1].first) / 2.0;
97  D[0] = (leaf->bound.edges[0].second - leaf->bound.edges[0].first) / 2.0;
98  D[1] = (leaf->bound.edges[1].second - leaf->bound.edges[1].first) / 2.0;
99  double t = 0;
100  for (int i = 0; i < 2; ++i) {
101  // This is only approximate, it will accept a few classes of
102  // non-overlapping ellipses
103  t += ((c[i] - C[i]) * (c[i] - C[i])) / ((d[i] + D[i]) * (d[i] + D[i]));
104  }
105  return (t < 1);
106  }
107 
108 private:
109  static const BoundingBox EmptyBoundingBox; // for uninitialized bounds
110 
111  AcceptEllipse() : m_bound(EmptyBoundingBox), r(), c()
112  {
113  r[0] = r[1] = 1.0;
114  c[0] = (m_bound.edges[0].second - m_bound.edges[0].first) / 2.0;
115  c[1] = (m_bound.edges[1].second - m_bound.edges[1].first) / 2.0;
116  }
117 };
118 
120 
121 //----------------------------------------------------------
122 // FindNeighbors acceptor
123 //
124 // Exactly equivalent to what FindNeighbors was doing before (assuming
125 // that points are preresented with no width in wire-space and with
126 // width in time-space
127 //
128 // We're going to make this work with nodal bounding boxes by
129 // accepting on center-inside the box or comparing to the nearest
130 // point on the edge using the point-to-point comparison function and
131 // a the maximum time-width.
134  double fEps[2];
135  double fMaxWidth;
136  double fWireDist;
137  std::vector<unsigned int>& fBadWireSum;
139  double eps,
140  double eps2,
141  double maxWidth,
142  double wireDist,
143  std::vector<unsigned int>& badWireSum)
144  : fBound(b), fEps(), fMaxWidth(maxWidth), fWireDist(wireDist), fBadWireSum(badWireSum)
145  {
146  fEps[0] = eps;
147  fEps[1] = eps2;
148  }
149  // return a point-like box at the center of b
151  {
152  BoundingBox c;
153  c.edges[0].first = c.edges[0].second = (b.edges[0].first + b.edges[0].second) / 2.0;
154  c.edges[1].first = c.edges[1].second = (b.edges[1].first + b.edges[1].second) / 2.0;
155  return c;
156  }
157  BoundingBox center() const { return center(fBound); }
158  // Here we implement the findNeighbor algorithm from point to point
159  bool isNear(const BoundingBox& b) const
160  {
161  // Precomupation of a few things...box centers, wire bridging
162  // quantities, etc...
163  double bCenter0 = center(b).edges[0].first;
164  double bCenter1 = center(b).edges[1].first;
165  double tCenter0 = center().edges[0].first; // "t" is for test-point
166  double tCenter1 = center().edges[1].first;
167  // widths in the time direction
168  double bWidth = std::abs(b.edges[1].second - b.edges[1].first);
169  double tWidth = std::abs(fBound.edges[1].second - fBound.edges[1].first);
170  // bad channel counting
171  unsigned int wire1 = (unsigned int)(tCenter0 / fWireDist + 0.5);
172  unsigned int wire2 = (unsigned int)(bCenter0 / fWireDist + 0.5);
173  // Clamp the wize number to something resonably.
175  if (wire1 < fBadWireSum.size()) wire1 = fBadWireSum.size();
176  if (wire2 < fBadWireSum.size()) wire2 = fBadWireSum.size();
177  // The getSimilarity[2] wirestobridge calculation is asymmetric,
178  // but is plugged into the cache symmetrically.I am assuming that
179  // this is OK because the wires that are hit cannot be bad.
180  unsigned int wirestobridge = lar::util::absDiff(fBadWireSum[wire1], fBadWireSum[wire2]);
181  double cmtobridge = wirestobridge * fWireDist;
182 
183  double sim = std::abs(tCenter0 - bCenter0) - cmtobridge;
184  sim *= sim; // square it
185 
186  if (std::abs(tCenter0 - bCenter0) > 1e-10) {
187  cmtobridge *= std::abs((tCenter1 - bCenter1) / (tCenter0 - bCenter0));
188  }
189  double sim2 = std::abs(tCenter1 - bCenter1) - cmtobridge;
190  sim2 *= sim2; // square it
191 
192  double k = 0.1;
193  double WFactor = (exp(4.6 * ((tWidth * tWidth) + (bWidth * bWidth)))) * k; // ??
194  // We clamp WFactor on [ 1.0, 6.25 ]
195  if (WFactor < 1.0) WFactor = 1.0;
196  if (WFactor > 6.25) WFactor = 6.25;
197 
198  // Now we implement the test...see FindNeighbors
199  return (((sim) / (fEps[0] * fEps[0])) + ((sim2) / (fEps[1] * fEps[1] * (WFactor))) <= 1);
200  }
202  {
203  BoundingBox n;
204  BoundingBox c = center();
205  for (int i = 0; i < 2; ++i) {
206  // The work for finding the nearest point is the same in both
207  // dimensions
208  if (b.edges[i].first > c.edges[i].second) {
209  // Our point is lower than the low edge of the box
210  n.edges[i].first = n.edges[i].second = b.edges[i].first;
211  }
212  else if (b.edges[0].second < c.edges[0].first) {
213  // Our point is higher than the high edge of the box
214  n.edges[i].first = n.edges[i].second = b.edges[i].second;
215  }
216  else {
217  // In this dimension our point lies within the boxes bounds
218  n.edges[i].first = n.edges[i].second = c.edges[i].first;
219  }
220  }
221  // Now give the time dimension a width
222  n.edges[1].first -= fMaxWidth / 2.0;
223  n.edges[1].second += fMaxWidth / 2.0;
224  return n;
225  }
226  bool operator()(const RTree::Node* const node) const
227  {
228  // if the our point overlaps the bounding box, accept immediately
229  if (fBound.overlaps(node->bound)) return true;
230  // No overlap, so compare to the nearest point on the bounding box
231  // under the assumption that the maximum width applies for that
232  // point
233  return isNear(nearestPoint(node->bound));
234  }
235  bool operator()(const RTree::Leaf* const leaf) const { return isNear(leaf->bound); }
236 };
237 
238 namespace cluster {
239  const unsigned int kNO_CLUSTER = UINT_MAX;
240  const unsigned int kNOISE_CLUSTER = UINT_MAX - 1;
241 }
242 
243 //----------------------------------------------------------
244 // DBScanAlg stuff
245 //----------------------------------------------------------
247 {
248  fEps = p.get<double>("eps");
249  fEps2 = p.get<double>("epstwo");
250  fMinPts = p.get<int>("minPts");
251  fClusterMethod = p.get<int>("Method");
252  fDistanceMetric = p.get<int>("Metric");
253 }
254 
255 //----------------------------------------------------------
257  const detinfo::DetectorPropertiesData& detProp,
258  const std::vector<art::Ptr<recob::Hit>>& allhits,
259  std::set<uint32_t> badChannels,
260  const std::vector<geo::WireID>& wireids)
261 {
262  if (wireids.size() && wireids.size() != allhits.size()) {
263  throw cet::exception("DBScanAlg") << "allhits size = " << allhits.size()
264  << " wireids size = " << wireids.size() << " do not match\n";
265  }
266  // clear all the data member vectors for the new set of hits
267  fps.clear();
268  fpointId_to_clusterId.clear();
269  fnoise.clear();
270  fvisited.clear();
271  fsim.clear();
272  fsim2.clear();
273  fsim3.clear();
274  fclusters.clear();
275  fWirePitch.clear();
276 
277  fBadChannels = badChannels;
278  fBadWireSum.clear();
279 
280  // Clear the RTree
281  fRTree.Remove(RTree::AcceptAny(), RTree::RemoveLeaf());
282  // and the bounds list
283  fRect.clear();
284 
285  //------------------------------------------------------------------
286  // Determine spacing between wires (different for each detector)
287  // get 2 first wires and find their spacing (wire_dist)
288 
289  auto const& wireReadoutGeom = art::ServiceHandle<geo::WireReadout const>()->Get();
291  constexpr geo::TPCID tpcid{0, 0};
292  for (auto const& plane : wireReadoutGeom.Iterate<geo::PlaneGeo>(tpcid))
293  fWirePitch.push_back(plane.WirePitch());
294 
295  // Collect the bad wire list into a useful form
296  if (fClusterMethod) { // Using the R*-tree
297  auto const nchannels = wireReadoutGeom.Nchannels();
298  fBadWireSum.resize(nchannels);
299  unsigned int count = 0;
300  for (unsigned int i = 0; i < fBadWireSum.size(); ++i) {
301  count += fBadChannels.count(i);
302  fBadWireSum[i] = count;
303  }
304  }
305 
306  // Collect the hits in a useful form,
307  // and take note of the maximum time width
308  fMaxWidth = 0.0;
309  for (unsigned int j = 0; j < allhits.size(); ++j) {
310  int dims = 3; //our point is defined by 3 elements:wire#,center of the hit, and the hit width
311  std::vector<double> p(dims);
312 
313  double tickToDist = detProp.DriftVelocity(detProp.Efield(), detProp.Temperature());
314  tickToDist *= 1.e-3 * sampling_rate(clockData); // 1e-3 is conversion of 1/us to 1/ns
315  if (!wireids.size())
316  p[0] = (allhits[j]->WireID().Wire) * fWirePitch[allhits[j]->WireID().Plane];
317  else
318  p[0] = (wireids[j].Wire) * fWirePitch[allhits[j]->WireID().Plane];
319  p[1] = allhits[j]->PeakTime() * tickToDist;
320  p[2] = 2. * allhits[j]->RMS() * tickToDist; //width of a hit in cm
321 
322  // check on the maximum width condition
323  if (p[2] > fMaxWidth) fMaxWidth = p[2];
324 
325  fps.push_back(p);
326 
327  if (fClusterMethod) { // Using the R*-tree
328  // Convert these same values into dbsPoints to feed into the R*-tree
329  dbsPoint pp(p[0], p[1], 0.0, p[2] / 2.0); // note dividing by two
330  fRTree.Insert(j, pp.bounds());
331  // Keep a parallel list already made up. We could use fps instead, but...
332  fRect.push_back(pp);
333  }
334  }
335 
336  fpointId_to_clusterId.resize(fps.size(), kNO_CLUSTER); // Not zero as before!
337  fnoise.resize(fps.size(), false);
338  fvisited.resize(fps.size(), false);
339 
340  if (fClusterMethod) { // Using the R*-tree
341  Visitor visitor = fRTree.Query(RTree::AcceptAny(), Visitor());
342  mf::LogInfo("DBscan") << "InitScan: hits RTree loaded with " << visitor.count << " items.";
343  }
344  mf::LogInfo("DBscan") << "InitScan: hits vector size is " << fps.size();
345 
346  return;
347 }
348 
349 //----------------------------------------------------------
350 double cluster::DBScanAlg::getSimilarity(const std::vector<double> v1, const std::vector<double> v2)
351 {
352 
353  //for Euclidean distance comment everything out except this-->>>
354  // return std::sqrt((v2[1]-v1[1])*(v2[1]-v1[1])+(v2[0]-v1[0])*(v2[0]-v1[0]));
355  //------------------------------------------------------------------------
356  // return std::abs( v2[0]-v1[0]); //for rectangle
357  //----------------------------------------------------------------------
358  //Manhattan distance:
359  //return std::abs(v1[0]-v2[0])+std::abs(v1[1]-v2[1]);
360 
362  double wire_dist = fWirePitch[0];
363 
364  unsigned int wire1 =
365  (unsigned int)(v1[0] / wire_dist + 0.5); //to make sure to get desired integer
366  unsigned int wire2 = (unsigned int)(v2[0] / wire_dist + 0.5);
367  int wirestobridge = 0;
368 
369  if (wire1 > wire2) {
370  unsigned int wire = wire1;
371  wire1 = wire2;
372  wire2 = wire;
373  }
374 
375  for (unsigned int i = wire1; i < wire2; i++) {
376  if (fBadChannels.find(i) != fBadChannels.end()) wirestobridge++;
377  }
378 
379  double cmtobridge = wirestobridge * wire_dist;
380  //---------------------------------------------------------------------
381  return ((std::abs(v2[0] - v1[0]) - cmtobridge) *
382  (std::abs(v2[0] - v1[0]) - cmtobridge)); //for ellipse
383 }
384 
385 //----------------------------------------------------------------
386 double cluster::DBScanAlg::getSimilarity2(const std::vector<double> v1,
387  const std::vector<double> v2)
388 {
389 
390  //-------------------------------------------
391  //return std::abs( v2[1]-v1[1]);//for rectangle
392  //------------------------------------------
393 
395  double wire_dist = fWirePitch[0];
396 
397  unsigned int wire1 =
398  (unsigned int)(v1[0] / wire_dist + 0.5); //to make sure to get desired integer
399  unsigned int wire2 = (unsigned int)(v2[0] / wire_dist + 0.5);
400  int wirestobridge = 0;
401 
402  if (wire1 > wire2) {
403  unsigned int wire = wire1;
404  wire1 = wire2;
405  wire2 = wire;
406  }
407 
408  for (unsigned int i = wire1; i < wire2; i++) {
409  if (fBadChannels.find(i) != fBadChannels.end()) wirestobridge++;
410  }
411 
412  double cmtobridge = wirestobridge * wire_dist;
413 
414  if (std::abs(v2[0] - v1[0]) > 1e-10) {
415  cmtobridge *= std::abs((v2[1] - v1[1]) / (v2[0] - v1[0]));
416  }
417  else
418  cmtobridge = 0;
419 
420  return ((std::abs(v2[1] - v1[1]) - cmtobridge) *
421  (std::abs(v2[1] - v1[1]) - cmtobridge)); //for ellipse
422 }
423 
424 //----------------------------------------------------------------
425 double cluster::DBScanAlg::getWidthFactor(const std::vector<double> v1,
426  const std::vector<double> v2)
427 {
428 
429  //double k=0.13; //this number was determined by looking at flat muon hits' widths.
430  //The average width of these hits in cm is 0.505, so 4*2*(w1^2)=2.04
431  //where w1=w2=0.505, e^2.044= 7.69. In order not to change the distance
432  //in time direction of the ellipse we want to make it equal to 1 for
433  //these hits. Thus the k factor is k=1/7.69=0.13//for coeff=4
434 
435  //..................................................
436  double k = 0.1; //for 4.5 coeff
437  double WFactor = (exp(4.6 * ((v1[2] * v1[2]) + (v2[2] * v2[2])))) * k;
438  //........................................................
439  // Let's try something different:
440  if (WFactor > 1) {
441  if (WFactor < 6.25)
442  return WFactor; //remember that we are increasing the distance in
443  //eps2 as std::sqrt of this number (i.e std::sqrt(6.25))
444  else
445  return 6.25;
446  }
447  else
448  return 1.0;
449 }
450 
451 //----------------------------------------------------------------
452 //\todo this is O(n) in the number of hits, while the high performance
453 // claimed for DBSCAN relies on it being O(log n)!
454 std::vector<unsigned int> cluster::DBScanAlg::findNeighbors(unsigned int pid,
455  double threshold,
456  double threshold2)
457 {
458  std::vector<unsigned int> ne;
459 
460  for (int unsigned j = 0; j < fsim.size(); j++) {
461  if ((pid != j) &&
462  (((fsim[pid][j]) / (threshold * threshold)) +
463  ((fsim2[pid][j]) / (threshold2 * threshold2 * (fsim3[pid][j])))) < 1) { //ellipse
464  ne.push_back(j);
465  }
466  } // end loop over fsim
467 
468  return ne;
469 }
470 
471 //-----------------------------------------------------------------
473 {
474  int size = fps.size();
475  fsim.resize(size, std::vector<double>(size));
476  for (int i = 0; i < size; i++) {
477  for (int j = i + 1; j < size; j++) {
478  fsim[j][i] = fsim[i][j] = getSimilarity(fps[i], fps[j]);
479  }
480  }
481 }
482 
483 //------------------------------------------------------------------
485 {
486  int size = fps.size();
487  fsim2.resize(size, std::vector<double>(size));
488  for (int i = 0; i < size; i++) {
489  for (int j = i + 1; j < size; j++) {
490  fsim2[j][i] = fsim2[i][j] = getSimilarity2(fps[i], fps[j]);
491  }
492  }
493 }
494 
495 //------------------------------------------------------------------
497 {
498  int size = fps.size();
499  fsim3.resize(size, std::vector<double>(size));
500 
501  for (int i = 0; i < size; i++) {
502  for (int j = i + 1; j < size; j++) {
503  fsim3[j][i] = fsim3[i][j] = getWidthFactor(fps[i], fps[j]);
504  }
505  }
506 }
507 
508 //----------------------------------------------------------------
510 // This is the algorithm that finds clusters:
511 // Run the selected clustering algorithm
513 {
514  switch (fClusterMethod) {
515  case 2: return run_dbscan_cluster();
516  case 1: return run_FN_cluster();
517  default:
518  computeSimilarity(); // watch out for this, they are *slow*
519  computeSimilarity2(); // "
520  computeWidthFactor(); // "
521  return run_FN_naive_cluster();
522  }
523 }
524 
525 //----------------------------------------------------------------
527 // This is the algorithm that finds clusters:
528 //
529 // DWM's implementation of DBScanAlg as much like the paper as possible
531 {
532  unsigned int cid = 0;
533  // foreach pid
534  for (size_t pid = 0; pid < fps.size(); pid++) {
535  // not already visited
536  if (fpointId_to_clusterId[pid] == kNO_CLUSTER) {
537  if (ExpandCluster(pid, cid)) { cid++; }
538  } // if (!visited
539  } // for
540  // END DBSCAN
541 
542  // Construct clusters, count noise, etc..
543  int noise = 0;
544  fclusters.resize(cid);
545  for (size_t y = 0; y < fpointId_to_clusterId.size(); ++y) {
546  if (fpointId_to_clusterId[y] == kNO_CLUSTER) {
547  // This shouldn't happen...all points should be clasified by now!
548  mf::LogWarning("DBscan") << "Unclassified point!";
549  }
550  else if (fpointId_to_clusterId[y] == kNOISE_CLUSTER) {
551  ++noise;
552  }
553  else {
554  unsigned int c = fpointId_to_clusterId[y];
555  if (c >= cid) {
556  mf::LogWarning("DBscan") << "Point in cluster " << c << " when only " << cid
557  << " clusters wer found [0-" << cid - 1 << "]";
558  }
559  fclusters[c].push_back(y);
560  }
561  }
562  mf::LogInfo("DBscan") << "DWM (R*-tree): Found " << cid << " clusters...";
563  for (unsigned int c = 0; c < cid; ++c) {
564  mf::LogVerbatim("DBscan") << "\t"
565  << "Cluster " << c << ":\t" << fclusters[c].size();
566  }
567  mf::LogVerbatim("DBscan") << "\t"
568  << "...and " << noise << " noise points.";
569 }
570 
571 //----------------------------------------------------------------
572 // Find the neighbos of the given point
573 std::set<unsigned int> cluster::DBScanAlg::RegionQuery(unsigned int point)
574 {
575  dbsPoint region(fRect[point]);
576  Visitor visitor = fRTree.Query(AcceptFindNeighbors(region.bounds(),
577  fEps,
578  fEps2,
579  fMaxWidth,
580  fWirePitch[0], //\todo
581  fBadWireSum), // assumes
582  Visitor()); // equal
583  // pitch
584  return visitor.sResult;
585 }
586 //----------------------------------------------------------------
587 // Find the neighbos of the given point
588 std::vector<unsigned int> cluster::DBScanAlg::RegionQuery_vector(unsigned int point)
589 {
590  dbsPoint region(fRect[point]);
591  Visitor visitor = fRTree.Query(AcceptFindNeighbors(region.bounds(),
592  fEps,
593  fEps2,
594  fMaxWidth,
595  fWirePitch[0], //\todo
596  fBadWireSum), // assumes
597  Visitor()); // equal
598  // pitch
599  std::vector<unsigned int>& v = visitor.vResult;
600  // find neighbors insures that the called point is not in the
601  // returned and this is intended as a drop-in replacement, so insure
602  // this condition
603  v.erase(std::remove(v.begin(), v.end(), point), v.end());
604  return v;
605 }
606 
607 //----------------------------------------------------------------
608 // Try to make a new cluster on the basis of point
609 bool cluster::DBScanAlg::ExpandCluster(unsigned int point, unsigned int clusterID)
610 {
611  /* GetSetOfPoints for point*/
612  std::set<unsigned int> seeds = RegionQuery(point);
613 
614  // not enough support -> mark as noise
615  if (seeds.size() < fMinPts) {
616  fpointId_to_clusterId[point] = kNOISE_CLUSTER;
617  return false;
618  }
619  else {
620  // Add to the currecnt cluster
621  fpointId_to_clusterId[point] = clusterID;
622  for (std::set<unsigned int>::iterator itr = seeds.begin(); itr != seeds.end(); itr++) {
623  fpointId_to_clusterId[*itr] = clusterID;
624  }
625  seeds.erase(point);
626  while (!seeds.empty()) {
627  unsigned int currentP = *(seeds.begin());
628  std::set<unsigned int> result = RegionQuery(currentP);
629 
630  if (result.size() >= fMinPts) {
631  for (std::set<unsigned int>::iterator itr = result.begin(); itr != result.end(); itr++) {
632  unsigned int resultP = *itr;
633  // not already assigned to a cluster
634  if (fpointId_to_clusterId[resultP] == kNO_CLUSTER ||
635  fpointId_to_clusterId[resultP] == kNOISE_CLUSTER) {
636  if (fpointId_to_clusterId[resultP] == kNO_CLUSTER) { seeds.insert(resultP); }
637  fpointId_to_clusterId[resultP] = clusterID;
638  } // unclassified or noise
639  } // for
640  } // enough support
641  seeds.erase(currentP);
642  } // while
643  return true;
644  }
645 }
646 
647 //----------------------------------------------------------------
649 // This is the algorithm that finds clusters:
650 //
651 // The original findNeignbor based code converted to use a R*-tree,
652 // but not rearranged
654 {
655 
656  unsigned int cid = 0;
657  // foreach pid
658  for (size_t pid = 0; pid < fps.size(); pid++) {
659  // not already visited
660  if (!fvisited[pid]) {
661 
662  fvisited[pid] = true;
663  // get the neighbors
664  std::vector<unsigned int> ne = RegionQuery_vector(pid);
665 
666  // not enough support -> mark as noise
667  if (ne.size() < fMinPts) { fnoise[pid] = true; }
668  else {
669  // Add p to current cluster
670 
671  std::vector<unsigned int> c; // a new cluster
672 
673  c.push_back(pid); // assign pid to cluster
674  fpointId_to_clusterId[pid] = cid;
675  // go to neighbors
676  for (size_t i = 0; i < ne.size(); ++i) {
677  unsigned int nPid = ne[i];
678 
679  // not already visited
680  if (!fvisited[nPid]) {
681  fvisited[nPid] = true;
682  // go to neighbors
683  std::vector<unsigned int> ne1 = RegionQuery_vector(nPid);
684  // enough support
685  if (ne1.size() >= fMinPts) {
686 
687  // join
688 
689  for (size_t i = 0; i < ne1.size(); ++i) {
690  // join neighbord
691  ne.push_back(ne1[i]);
692  }
693  }
694  }
695 
696  // not already assigned to a cluster
697  if (fpointId_to_clusterId[nPid] == kNO_CLUSTER) {
698  c.push_back(nPid);
699  fpointId_to_clusterId[nPid] = cid;
700  }
701  }
702 
703  fclusters.push_back(c);
704 
705  cid++;
706  }
707  } // if (!visited
708  } // for
709 
710  int noise = 0;
711  // no_hits=fnoise.size();
712 
713  for (size_t y = 0; y < fpointId_to_clusterId.size(); ++y) {
714  if (fpointId_to_clusterId[y] == kNO_CLUSTER) noise++;
715  }
716  mf::LogInfo("DBscan") << "FindNeighbors (R*-tree): Found " << cid << " clusters...";
717  for (unsigned int c = 0; c < cid; ++c) {
718  mf::LogVerbatim("DBscan") << "\t"
719  << "Cluster " << c << ":\t" << fclusters[c].size();
720  }
721  mf::LogVerbatim("DBscan") << "\t"
722  << "...and " << noise << " noise points.";
723 }
724 
725 //----------------------------------------------------------------
727 // This is the algorithm that finds clusters:
728 //
729 // The original findNeighrbor-based code.
731 {
732 
733  unsigned int cid = 0;
734  // foreach pid
735  for (size_t pid = 0; pid < fps.size(); ++pid) {
736  // not already visited
737  if (!fvisited[pid]) {
738 
739  fvisited[pid] = true;
740  // get the neighbors
741  std::vector<unsigned int> ne = findNeighbors(pid, fEps, fEps2);
742 
743  // not enough support -> mark as noise
744  if (ne.size() < fMinPts) { fnoise[pid] = true; }
745  else {
746  // Add p to current cluster
747 
748  std::vector<unsigned int> c; // a new cluster
749 
750  c.push_back(pid); // assign pid to cluster
751  fpointId_to_clusterId[pid] = cid;
752  // go to neighbors
753  for (size_t i = 0; i < ne.size(); ++i) {
754  unsigned int nPid = ne[i];
755 
756  // not already visited
757  if (!fvisited[nPid]) {
758  fvisited[nPid] = true;
759  // go to neighbors
760  std::vector<unsigned int> ne1 = findNeighbors(nPid, fEps, fEps2);
761  // enough support
762  if (ne1.size() >= fMinPts) {
763 
764  // join
765 
766  for (unsigned int i = 0; i < ne1.size(); i++) {
767  // join neighbord
768  ne.push_back(ne1[i]);
769  }
770  }
771  }
772 
773  // not already assigned to a cluster
774  if (fpointId_to_clusterId[nPid] == kNO_CLUSTER) {
775  c.push_back(nPid);
776  fpointId_to_clusterId[nPid] = cid;
777  }
778  }
779 
780  fclusters.push_back(c);
781 
782  cid++;
783  }
784  } // if (!visited
785  } // for
786 
787  int noise = 0;
788 
789  for (size_t y = 0; y < fpointId_to_clusterId.size(); ++y) {
790  if (fpointId_to_clusterId[y] == kNO_CLUSTER) ++noise;
791  }
792  mf::LogInfo("DBscan") << "FindNeighbors (naive): Found " << cid << " clusters...";
793  for (unsigned int c = 0; c < cid; ++c) {
794  mf::LogVerbatim("DBscan") << "\t"
795  << "Cluster " << c << ":\t" << fclusters[c].size() << " points";
796  }
797  mf::LogVerbatim("DBscan") << "\t"
798  << "...and " << noise << " noise points.";
799 }
TRandom r
Definition: spectrum.C:23
intermediate_table::iterator iterator
MaybeLogger_< ELseverityLevel::ELsev_info, true > LogVerbatim
double getSimilarity(const std::vector< double > v1, const std::vector< double > v2)
Definition: DBScanAlg.cxx:350
const unsigned int kNO_CLUSTER
Definition: DBScanAlg.cxx:239
Functions to help with numbers.
Utilities related to art service access.
unsigned int count
Definition: DBScanAlg.cxx:54
void computeSimilarity()
Definition: DBScanAlg.cxx:472
void computeWidthFactor()
Definition: DBScanAlg.cxx:496
MaybeLogger_< ELseverityLevel::ELsev_info, false > LogInfo
bool operator()(const RTree::Leaf *const leaf) const
Definition: DBScanAlg.cxx:235
Declaration of signal hit object.
AcceptFindNeighbors(const BoundingBox &b, double eps, double eps2, double maxWidth, double wireDist, std::vector< unsigned int > &badWireSum)
Definition: DBScanAlg.cxx:138
double Temperature() const
In kelvin.
std::vector< unsigned int > & fBadWireSum
Definition: DBScanAlg.cxx:137
static const BoundingBox EmptyBoundingBox
Definition: DBScanAlg.cxx:109
constexpr auto abs(T v)
Returns the absolute value of the argument.
cout<< "Opened file "<< fin<< " ixs= "<< ixs<< endl;if(ixs==0) hhh=(TH1F *) fff-> Get("h1")
Definition: AddMC.C:8
Cluster finding and building.
void run_dbscan_cluster()
Definition: DBScanAlg.cxx:530
std::set< unsigned int > RegionQuery(unsigned int point)
Definition: DBScanAlg.cxx:573
double dy
Definition: DBScanAlg.h:37
void run_FN_naive_cluster()
Definition: DBScanAlg.cxx:730
bool isNear(const BoundingBox &b) const
Definition: DBScanAlg.cxx:159
unsigned int noise()
Definition: chem4.cc:261
double Efield(unsigned int planegap=0) const
kV/cm
bool operator()(const RTree::Leaf *const leaf) const
Definition: DBScanAlg.cxx:91
decltype(auto) constexpr size(T &&obj)
ADL-aware version of std::size.
Definition: StdUtils.h:101
auto vector(Vector const &v)
Returns a manipulator which will print the specified array.
Definition: DumpUtils.h:289
const unsigned int kNOISE_CLUSTER
Definition: DBScanAlg.cxx:240
IDparameter< geo::WireID > WireID
Member type of validated geo::WireID parameter.
double dx
Definition: DBScanAlg.h:37
RTree::BoundingBox BoundingBox
Definition: DBScanAlg.h:33
std::set< unsigned int > sResult
Definition: DBScanAlg.cxx:56
const BoundingBox & fBound
Definition: DBScanAlg.cxx:133
void operator()(const RTree::Leaf *const leaf)
Definition: DBScanAlg.cxx:59
BoundingBox bounds() const
Definition: DBScanAlg.cxx:37
T get(std::string const &key) const
Definition: ParameterSet.h:314
Geometry information for a single wire plane.The plane is represented in the geometry by a solid whic...
Definition: PlaneGeo.h:67
const BoundingBox & m_bound
Definition: DBScanAlg.cxx:73
BoundingBox center(const BoundingBox &b) const
Definition: DBScanAlg.cxx:150
Float_t d
Definition: plot.C:235
bool operator()(const RTree::Node *const node) const
Definition: DBScanAlg.cxx:86
std::vector< unsigned int > vResult
Definition: DBScanAlg.cxx:55
double DriftVelocity(double efield=0., double temperature=0.) const
cm/us
The data type to uniquely identify a TPC.
Definition: geo_types.h:306
DBScanAlg(fhicl::ParameterSet const &pset)
Definition: DBScanAlg.cxx:246
Monte Carlo Simulation.
double getWidthFactor(const std::vector< double > v1, const std::vector< double > v2)
Definition: DBScanAlg.cxx:425
bool operator()(const RTree::Node *const node) const
Definition: DBScanAlg.cxx:226
void computeSimilarity2()
Definition: DBScanAlg.cxx:484
std::vector< unsigned int > findNeighbors(unsigned int pid, double threshold, double threshold2)
Definition: DBScanAlg.cxx:454
AcceptEllipse(const BoundingBox &b, double r1, double r2)
Definition: DBScanAlg.cxx:77
std::vector< TrajPoint > seeds
Definition: DataStructs.cxx:14
std::vector< unsigned int > RegionQuery_vector(unsigned int point)
Definition: DBScanAlg.cxx:588
void InitScan(const detinfo::DetectorClocksData &clockData, const detinfo::DetectorPropertiesData &detProp, const std::vector< art::Ptr< recob::Hit >> &allhits, std::set< uint32_t > badChannels, const std::vector< geo::WireID > &wireids=std::vector< geo::WireID >())
Definition: DBScanAlg.cxx:256
bool ExpandCluster(unsigned int point, unsigned int clusterID)
Definition: DBScanAlg.cxx:609
Contains all timing reference information for the detector.
constexpr auto absDiff(A const &a, B const &b)
Returns the absolute value of the difference between two values.
Definition: NumericUtils.h:69
MaybeLogger_< ELseverityLevel::ELsev_warning, false > LogWarning
double x
Definition: DBScanAlg.h:36
Char_t n[5]
double y
Definition: DBScanAlg.h:36
double getSimilarity2(const std::vector< double > v1, const std::vector< double > v2)
Definition: DBScanAlg.cxx:386
Float_t e
Definition: plot.C:35
double sampling_rate(DetectorClocksData const &data)
Returns the period of the TPC readout electronics clock.
BoundingBox center() const
Definition: DBScanAlg.cxx:157
art framework interface to geometry description
cet::coded_exception< error, detail::translate > exception
Definition: exception.h:33
const bool ContinueVisiting
Definition: DBScanAlg.cxx:57
BoundingBox nearestPoint(const BoundingBox &b) const
Definition: DBScanAlg.cxx:201