LArSoft  v07_13_02
Liquid Argon Software toolkit - http://larsoft.org/
DBScanAlg.cxx
Go to the documentation of this file.
1 //
3 // \file DBScanAlg.cxx
4 //
5 // kinga.partyka@yale.edu
6 //
7 // This algorithm finds clusters of hits, they can be of arbitrary shape.You need to specify 2(3) parameters:
8 // epsilon, epsilon2 and MinPoints as explained in the corresponding xml file.In my comments a 'point' reference
9 // appears quite often. A 'point' is basically a simple hit which only contains wire and time information. This
10 // algorithm is based on DBSCAN(Density Based Spatial Clustering of Applications with Noise): M. Ester, H.-P. Kriegel,
11 // J. Sander, and X. Xu, A density-based algorithm for discovering clusters in large spatial databases with noise,
12 // Second International Conference on Knowledge Discovery and Data Mining, pp. 226-231, AAAI Press. 1996.
13 // ( Some of this code is from "Antonio Gulli's coding playground")
15 
16 
17 //Framework includes:
24 
30 #include "larcorealg/CoreUtils/NumericUtils.h" // util::absDiff()
31 
32 #include <cmath>
33 #include <iostream>
34 #include <fstream>
35 #include <cstdlib>
36 
37 #include "TH1.h"
38 
39 //----------------------------------------------------------
40 // RStarTree stuff
41 //----------------------------------------------------------
42 class dbsPoint{
43 public:
44  double x, y;
45  double dx, dy;
46  dbsPoint(double X=0.0, double Y=0.0, double dX=0.0, double dY=0.0)
47  :x(X), y(Y), dx(dX), dy(dY){};
48  BoundingBox bounds()const;
49  void Expand(double DX, double DY){dx+=DX; dy+=DY;};
50 };
51 
53  BoundingBox bb;
54  bb.edges[0].first = x - std::abs(dx);
55  bb.edges[0].second = x + std::abs(dx);
56 
57  bb.edges[1].first = y - std::abs(dy);
58  bb.edges[1].second = y + std::abs(dy);
59  return bb;
60 }
61 
62 //----------------------------------------------------------
63 // Set Visitor
64 //
65 // collects accepted leafs in the std::set sResult and the std::vector
66 // vResult
67 struct Visitor {
68  unsigned int count;
69  std::vector< unsigned int > vResult;
70  std::set< unsigned int > sResult;
71  const bool ContinueVisiting;
72  Visitor() : count(0), vResult(), sResult(), ContinueVisiting(true) {};
73  void operator()(const RTree::Leaf * const leaf){
74  vResult.push_back(leaf->leaf);
75  sResult.insert(leaf->leaf);
76  count++;
77  }
78 };
79 
80 //----------------------------------------------------------
81 // Ellipse acceptor
82 //
83 // Roughly quivalent to what FindNeighbors was doing before, except
84 // that it doesn't handle dead wires
85 struct AcceptEllipse {
87  double r[2];
88  double c[2];
89  double d[2];
90  explicit AcceptEllipse(const BoundingBox&b,
91  double r1, double r2):m_bound(b),r(),c(){
92  r[0] = r1;
93  r[1] = r2;
94  c[0] = (m_bound.edges[0].second+m_bound.edges[0].first)/2.0;
95  c[1] = (m_bound.edges[1].second+m_bound.edges[1].first)/2.0;
96  d[0] = (m_bound.edges[0].second-m_bound.edges[0].first)/2.0;
97  d[1] = (m_bound.edges[1].second-m_bound.edges[1].first)/2.0;
98  }
99  bool operator()(const RTree::Node * const node) const {
100  // At the node level we use a rectangualr overlap condition
101  return m_bound.overlaps(node->bound);
102  }
103  bool operator()(const RTree::Leaf * const leaf) const {
104  // At the leaf level we have to more careful
105  double C[2], D[2];
106  C[0] = (leaf->bound.edges[0].second + leaf->bound.edges[0].first)/2.0;
107  C[1] = (leaf->bound.edges[1].second + leaf->bound.edges[1].first)/2.0;
108  D[0] = (leaf->bound.edges[0].second - leaf->bound.edges[0].first)/2.0;
109  D[1] = (leaf->bound.edges[1].second - leaf->bound.edges[1].first)/2.0;
110  double t=0;
111  for (int i=0; i<2; ++i){
112  // This is only approximate, it will accept a few classes of
113  // non-overlapping ellipses
114  t += ((c[i]-C[i])*(c[i]-C[i]))/((d[i]+D[i])*(d[i]+D[i]));
115  }
116  return (t < 1);
117  }
118 private:
119  static const BoundingBox EmptyBoundingBox; // for uninitialized bounds
120 
122  :m_bound(EmptyBoundingBox),r(),c(){
123  r[0] = r[1] = 1.0;
124  c[0] = (m_bound.edges[0].second-m_bound.edges[0].first)/2.0;
125  c[1] = (m_bound.edges[1].second-m_bound.edges[1].first)/2.0;
126  }
127 };
128 
130 
131 //----------------------------------------------------------
132 // FindNeighbors acceptor
133 //
134 // Exactly equivalent to what FindNeighbors was doing before (assuming
135 // that points are preresented with no width in wire-space and with
136 // width in time-space
137 //
138 // We're going to make this work with nodal bounding boxes by
139 // accepting on center-inside the box or comparing to the nearest
140 // point on the edge using the point-to-point comparison function and
141 // a the maximum time-width.
144  double fEps[2];
145  double fMaxWidth;
146  double fWireDist;
147  std::vector< unsigned int > &fBadWireSum;
148  AcceptFindNeighbors(const BoundingBox&b, double eps, double eps2,
149  double maxWidth, double wireDist,
150  std::vector< unsigned int > &badWireSum)
151  :fBound(b), fEps(), fMaxWidth(maxWidth), fWireDist(wireDist)
152  ,fBadWireSum(badWireSum)
153  {
154  fEps[0] = eps;
155  fEps[1] = eps2;
156  }
157  // return a point-like box at the center of b
158  BoundingBox center(const BoundingBox&b) const {
159  BoundingBox c;
160  c.edges[0].first = c.edges[0].second = ( b.edges[0].first +
161  b.edges[0].second )/2.0;
162  c.edges[1].first = c.edges[1].second = ( b.edges[1].first +
163  b.edges[1].second )/2.0;
164  return c;
165  }
166  BoundingBox center() const { return center(fBound); }
167  // Here we implement the findNeighbor algorithm from point to point
168  bool isNear(const BoundingBox &b) const {
169  // Precomupation of a few things...box centers, wire bridging
170  // quantities, etc...
171  double bCenter0 = center(b).edges[0].first;
172  double bCenter1 = center(b).edges[1].first;
173  double tCenter0 = center().edges[0].first; // "t" is for test-point
174  double tCenter1 = center().edges[1].first;
175  // widths in the time direction
176  double bWidth = std::abs( b.edges[1].second - b.edges[1].first);
177  double tWidth = std::abs(fBound.edges[1].second - fBound.edges[1].first);
178  // bad channel counting
179  unsigned int wire1 = (unsigned int) (tCenter0/fWireDist + 0.5);
180  unsigned int wire2 = (unsigned int) (bCenter0/fWireDist + 0.5);
181  // Clamp the wize number to something resonably.
183  if (wire1 < fBadWireSum.size()) wire1 = fBadWireSum.size();
184  if (wire2 < fBadWireSum.size()) wire2 = fBadWireSum.size();
185  // The getSimilarity[2] wirestobridge calculation is asymmetric,
186  // but is plugged into the cache symmetrically.I am assuming that
187  // this is OK because the wires that are hit cannot be bad.
188  unsigned int wirestobridge = util::absDiff(fBadWireSum[wire1], fBadWireSum[wire2]);
189  double cmtobridge = wirestobridge*fWireDist;
190 
191  // getSimilarity()
192  double sim = std::abs(tCenter0 - bCenter0) - cmtobridge;
193  sim*=sim; // square it
194 
195  // getSimilarity2()
196  if ( std::abs(tCenter0 - bCenter0) > 1e-10 ) {
197  cmtobridge *= std::abs((tCenter1-bCenter1)/(tCenter0-bCenter0));
198  }
199  double sim2 = std::abs(tCenter1 - bCenter1) - cmtobridge;
200  sim2 *= sim2; // square it
201 
202  // getWidthFactor()
203  // double k=0.13; // see the comments on getWidthFactor
204  // double k=0.78; // as I don't know where these came from
205  // double k=4.5;
206  // double k=1.96;
207  double k=0.1;
208  double WFactor = (exp(4.6*((tWidth*tWidth)+(bWidth*bWidth))))*k; // ??
209  // We clamp WFactor on [ 1.0, 6.25 ]
210  if (WFactor<1.0) WFactor = 1.0;
211  if (WFactor>6.25) WFactor = 6.25;
212 
213  // Now we implement the test...see FindNeighbors
214  return (((sim )/(fEps[0]*fEps[0]) ) +
215  ((sim2)/(fEps[1]*fEps[1]*(WFactor))) <= 1 );
216  }
218  BoundingBox n;
219  BoundingBox c = center();
220  for (int i=0; i<2; ++i) {
221  // The work for finding the nearest point is the same in both
222  // dimensions
223  if ( b.edges[i].first > c.edges[i].second ) {
224  // Our point is lower than the low edge of the box
225  n.edges[i].first = n.edges[i].second = b.edges[i].first;
226  } else if ( b.edges[0].second < c.edges[0].first ) {
227  // Our point is higher than the high edge of the box
228  n.edges[i].first = n.edges[i].second = b.edges[i].second;
229  } else {
230  // In this dimension our point lies within the boxes bounds
231  n.edges[i].first = n.edges[i].second = c.edges[i].first;
232  }
233  }
234  // Now give the time dimension a width
235  n.edges[1].first -= fMaxWidth/2.0;
236  n.edges[1].second += fMaxWidth/2.0;
237  return n;
238  }
239  bool operator()(const RTree::Node * const node) const {
240  // if the our point overlaps the bounding box, accept immediately
241  if (fBound.overlaps(node->bound)) return true;
242  // No overlap, so compare to the nearest point on the bounding box
243  // under the assumption that the maximum width applies for that
244  // point
245  return isNear(nearestPoint(node->bound));
246  }
247  bool operator()(const RTree::Leaf * const leaf) const {
248  return isNear(leaf->bound);
249  }
250 };
251 
252 namespace cluster{
253  const unsigned int kNO_CLUSTER = UINT_MAX;
254  const unsigned int kNOISE_CLUSTER = UINT_MAX-1;
255 }
256 
257 //----------------------------------------------------------
258 // DBScanAlg stuff
259 //----------------------------------------------------------
261 {
262  this->reconfigure(pset);
263 }
264 
265 //----------------------------------------------------------
267 {
268 }
269 
270 //----------------------------------------------------------
272 {
273  fEps = p.get< double >("eps" );
274  fEps2 = p.get< double >("epstwo");
275  fMinPts = p.get< int >("minPts");
276  fClusterMethod = p.get< int >("Method");
277  fDistanceMetric = p.get< int >("Metric");
278 }
279 
280 //----------------------------------------------------------
282  std::set<uint32_t> badChannels,
283  const std::vector<geo::WireID> & wireids)
284 {
285  if (wireids.size()&&wireids.size()!=allhits.size()){
286  throw cet::exception("DBScanAlg") << "allhits size = "<<allhits.size()<<" wireids size = "<<wireids.size()<<" do not match\n";
287  }
288  // clear all the data member vectors for the new set of hits
289  fps.clear();
290  fpointId_to_clusterId.clear();
291  fnoise.clear();
292  fvisited.clear();
293  fsim.clear();
294  fsim2.clear();
295  fsim3.clear();
296  fclusters.clear();
297  fWirePitch.clear();
298 
299  fBadChannels = badChannels;
300  fBadWireSum.clear();
301 
302  // Clear the RTree
303  fRTree.Remove(RTree::AcceptAny(),RTree::RemoveLeaf());
304  // and the bounds list
305  fRect.clear();
306 
307  //------------------------------------------------------------------
308  // Determine spacing between wires (different for each detector)
310 
311  const detinfo::DetectorProperties* detp = lar::providerFrom<detinfo::DetectorPropertiesService>();
313 
314  for(size_t p = 0; p < geom->Nplanes(); ++p)
315  fWirePitch.push_back(geom->WirePitch(p));
316 
317 
318  // Collect the bad wire list into a useful form
319  if (fClusterMethod) { // Using the R*-tree
320  fBadWireSum.resize(geom->Nchannels());
321  unsigned int count=0;
322  for (unsigned int i=0; i<fBadWireSum.size(); ++i) {
323  count += fBadChannels.count(i);
324  fBadWireSum[i] = count;
325  }
326  }
327 
328  // Collect the hits in a useful form,
329  // and take note of the maximum time width
330  fMaxWidth=0.0;
331  for (unsigned int j = 0; j < allhits.size(); ++j){
332  int dims = 3;//our point is defined by 3 elements:wire#,center of the hit, and the hit width
333  std::vector<double> p(dims);
334 
335  double tickToDist = detp->DriftVelocity(detp->Efield(),detp->Temperature());
336  tickToDist *= 1.e-3 * detp->SamplingRate(); // 1e-3 is conversion of 1/us to 1/ns
337  if (!wireids.size()) p[0] = (allhits[j]->WireID().Wire)*fWirePitch[allhits[j]->WireID().Plane];
338  else p[0] = (wireids[j].Wire)*fWirePitch[allhits[j]->WireID().Plane];
339  p[1] = allhits[j]->PeakTime()*tickToDist;
340  p[2] = 2.*allhits[j]->RMS()*tickToDist; //width of a hit in cm
341 
342  // check on the maximum width condition
343  if ( p[2] > fMaxWidth ) fMaxWidth = p[2];
344 
345  fps.push_back(p);
346 
347  if (fClusterMethod) { // Using the R*-tree
348  // Convert these same values into dbsPoints to feed into the R*-tree
349  dbsPoint pp(p[0], p[1], 0.0, p[2]/2.0); // note dividing by two
350  fRTree.Insert(j, pp.bounds());
351  // Keep a parallel list already made up. We could use fps instead, but...
352  fRect.push_back(pp);
353  }
354  }
355 
356  fpointId_to_clusterId.resize(fps.size(), kNO_CLUSTER); // Not zero as before!
357  fnoise.resize(fps.size(), false);
358  fvisited.resize(fps.size(), false);
359 
360  if (fClusterMethod) { // Using the R*-tree
361  Visitor visitor =
362  fRTree.Query(RTree::AcceptAny(),Visitor());
363  mf::LogInfo("DBscan") << "InitScan: hits RTree loaded with "
364  << visitor.count << " items.";
365  }
366  mf::LogInfo("DBscan") << "InitScan: hits vector size is " << fps.size();
367 
368  return;
369 }
370 
371 //----------------------------------------------------------
372 double cluster::DBScanAlg::getSimilarity(const std::vector<double> v1, const std::vector<double> v2){
373 
374 
375  //for Euclidean distance comment everything out except this-->>>
376  // return std::sqrt((v2[1]-v1[1])*(v2[1]-v1[1])+(v2[0]-v1[0])*(v2[0]-v1[0]));
377  //------------------------------------------------------------------------
378  // return std::abs( v2[0]-v1[0]); //for rectangle
379  //----------------------------------------------------------------------
380  //Manhattan distance:
381  //return std::abs(v1[0]-v2[0])+std::abs(v1[1]-v2[1]);
382 
384  double wire_dist = fWirePitch[0];
385 
386  unsigned int wire1=(unsigned int)(v1[0]/wire_dist+0.5); //to make sure to get desired integer
387  unsigned int wire2=(unsigned int)(v2[0]/wire_dist+0.5);
388  int wirestobridge=0;
389 
390  if (wire1>wire2) {
391  unsigned int wire = wire1;
392  wire1 = wire2;
393  wire2 = wire;
394  }
395 
396  for(unsigned int i=wire1;i<wire2;i++){
397  if(fBadChannels.find(i) != fBadChannels.end())
398  wirestobridge++;
399  }
400 
401  double cmtobridge=wirestobridge*wire_dist;
402  //---------------------------------------------------------------------
403  return (( std::abs(v2[0]-v1[0])-cmtobridge)*( std::abs(v2[0]-v1[0])-cmtobridge)); //for ellipse
404 }
405 
406 //----------------------------------------------------------------
407 double cluster::DBScanAlg::getSimilarity2(const std::vector<double> v1, const std::vector<double> v2){
408 
409  //-------------------------------------------
410  //return std::abs( v2[1]-v1[1]);//for rectangle
411  //------------------------------------------
412 
414  double wire_dist = fWirePitch[0];
415 
416  unsigned int wire1=(unsigned int)(v1[0]/wire_dist+0.5); //to make sure to get desired integer
417  unsigned int wire2=(unsigned int)(v2[0]/wire_dist+0.5);
418  int wirestobridge=0;
419 
420  if (wire1>wire2) {
421  unsigned int wire = wire1;
422  wire1 = wire2;
423  wire2 = wire;
424  }
425 
426  for(unsigned int i=wire1;i<wire2;i++){
427  if(fBadChannels.find(i) != fBadChannels.end())
428  wirestobridge++;
429  }
430 
431  double cmtobridge=wirestobridge*wire_dist;
432 
433  if (std::abs(v2[0]-v1[0])>1e-10){
434  cmtobridge *= std::abs((v2[1]-v1[1])/(v2[0]-v1[0]));
435  }
436  else cmtobridge = 0;
437 
438  return (( std::abs(v2[1]-v1[1])-cmtobridge)*( std::abs(v2[1]-v1[1])-cmtobridge));//for ellipse
439 
440 
441 }
442 
443 //----------------------------------------------------------------
444 double cluster::DBScanAlg::getWidthFactor(const std::vector<double> v1, const std::vector<double> v2){
445 
446  //double k=0.13; //this number was determined by looking at flat muon hits' widths.
447  //The average width of these hits in cm is 0.505, so 4*2*(w1^2)=2.04
448  //where w1=w2=0.505, e^2.044= 7.69. In order not to change the distance
449  //in time direction of the ellipse we want to make it equal to 1 for
450  //these hits. Thus the k factor is k=1/7.69=0.13//for coeff=4
451 
452  //double k=0.78;
453  //..................................................
454  double k = 0.1;//for 4.5 coeff
455  double WFactor = (exp(4.6*(( v1[2]*v1[2])+( v2[2]*v2[2]))))*k;
456  //........................................................
457  //Let's try something different:
458  // double k=1.96;
459  // double WFactor=(( v1[2]*v1[2])+( v2[2]*v2[2]))*k;
460  if(WFactor > 1){
461  if(WFactor < 6.25) return WFactor; //remember that we are increasing the distance in
462  //eps2 as std::sqrt of this number (i.e std::sqrt(6.25))
463  else return 6.25;
464 
465  }
466  else return 1.0;
467 }
468 
469 //----------------------------------------------------------------
470 //\todo this is O(n) in the number of hits, while the high performance
471 // claimed for DBSCAN relies on it being O(log n)!
472 std::vector<unsigned int> cluster::DBScanAlg::findNeighbors( unsigned int pid,
473  double threshold,
474  double threshold2) {
475  std::vector<unsigned int> ne;
476 
477  for ( int unsigned j=0; j < fsim.size(); j++){
478  if((pid != j )
479  && (((fsim[pid][j])/ (threshold*threshold))
480  + ((fsim2[pid][j])/ (threshold2*threshold2*(fsim3[pid][j]))))<1){ //ellipse
481  ne.push_back(j);
482  }
483  }// end loop over fsim
484 
485  return ne;
486 }
487 
488 //-----------------------------------------------------------------
490 {
491  int size = fps.size();
492  fsim.resize(size, std::vector<double>(size));
493  for ( int i=0; i < size; i++){
494  for ( int j=i+1; j < size; j++){
495  fsim[j] [i] = fsim[i][ j] = getSimilarity(fps[i], fps[j]);
496  }
497  }
498 }
499 
500 //------------------------------------------------------------------
502 {
503  int size = fps.size();
504  fsim2.resize(size, std::vector<double>(size));
505  for ( int i=0; i < size; i++){
506  for ( int j=i+1; j < size; j++){
507  fsim2[j] [i] = fsim2[i][ j] = getSimilarity2(fps[i], fps[j]);
508  }
509  }
510 }
511 
512 //------------------------------------------------------------------
514 {
515  int size = fps.size();
516  fsim3.resize(size, std::vector<double>(size));
517 
518  for ( int i=0; i < size; i++){
519  for ( int j=i+1; j < size; j++){
520  fsim3[j] [i] = fsim3[i][ j] = getWidthFactor(fps[i], fps[j]);
521  }
522  }
523 }
524 
525 
526 //----------------------------------------------------------------
528 // This is the algorithm that finds clusters:
529 // Run the selected clustering algorithm
531  switch(fClusterMethod) {
532  case 2:
533  return run_dbscan_cluster();
534  case 1:
535  return run_FN_cluster();
536  default:
537  computeSimilarity(); // watch out for this, they are *slow*
538  computeSimilarity2(); // "
539  computeWidthFactor(); // "
540  return run_FN_naive_cluster();
541  }
542 }
543 
544 //----------------------------------------------------------------
546 // This is the algorithm that finds clusters:
547 //
548 // DWM's implementation of DBScanAlg as much like the paper as possible
550  unsigned int cid = 0;
551  // foreach pid
552  for (size_t pid = 0; pid < fps.size(); pid++){
553  // not already visited
554  if (fpointId_to_clusterId[pid] == kNO_CLUSTER) {
555  if ( ExpandCluster(pid,cid) ) {
556  cid++;
557  }
558  } // if (!visited
559  } // for
560  // END DBSCAN
561 
562  // Construct clusters, count noise, etc..
563  int noise = 0;
564  fclusters.resize(cid);
565  for(size_t y = 0; y < fpointId_to_clusterId.size(); ++y){
566  if (fpointId_to_clusterId[y] == kNO_CLUSTER) {
567  // This shouldn't happen...all points should be clasified by now!
568  mf::LogWarning("DBscan") << "Unclassified point!";
569  }
570  else if (fpointId_to_clusterId[y]==kNOISE_CLUSTER) {
571  ++noise;
572  }
573  else {
574  unsigned int c = fpointId_to_clusterId[y];
575  if (c >= cid) {
576  mf::LogWarning("DBscan") << "Point in cluster " << c
577  << " when only " << cid
578  << " clusters wer found [0-" << cid-1
579  << "]";
580  }
581  fclusters[c].push_back(y);
582  }
583  }
584  mf::LogInfo("DBscan") << "DWM (R*-tree): Found "
585  << cid << " clusters...";
586  for (unsigned int c = 0; c < cid; ++c){
587  mf::LogVerbatim("DBscan") << "\t" << "Cluster " << c << ":\t"
588  << fclusters[c].size();
589  }
590  mf::LogVerbatim("DBscan") << "\t" << "...and " << noise << " noise points.";
591 }
592 
593 //----------------------------------------------------------------
594 // Find the neighbos of the given point
595 std::set<unsigned int> cluster::DBScanAlg::RegionQuery(unsigned int point){
596  dbsPoint region(fRect[point]);
597  Visitor visitor =
598 // fRTree.Query(RTree::AcceptOverlapping(region.bounds()),Visitor());
599 // fRTree.Query(AcceptEllipse(region.bounds(),fEps,fEps2),Visitor());
600  fRTree.Query(AcceptFindNeighbors(region.bounds(),
601  fEps,fEps2,
602  fMaxWidth,fWirePitch[0],//\todo
603  fBadWireSum), //assumes
604  Visitor()); //equal
605  //pitch
606  return visitor.sResult;
607 }
608 //----------------------------------------------------------------
609 // Find the neighbos of the given point
610 std::vector<unsigned int> cluster::DBScanAlg::RegionQuery_vector(unsigned int point){
611  dbsPoint region(fRect[point]);
612  Visitor visitor =
613 // fRTree.Query(RTree::AcceptOverlapping(region.bounds()),Visitor());
614 // fRTree.Query(AcceptEllipse(region.bounds(),fEps,fEps2),Visitor());
615  fRTree.Query(AcceptFindNeighbors(region.bounds(),
616  fEps,fEps2,
617  fMaxWidth,fWirePitch[0],//\todo
618  fBadWireSum), //assumes
619  Visitor()); //equal
620  //pitch
621  std::vector<unsigned int> &v = visitor.vResult;
622  // find neighbors insures that the called point is not in the
623  // returned and this is intended as a drop-in replacement, so insure
624  // this condition
625  v.erase(std::remove(v.begin(), v.end(), point),v.end());
626  return v;
627 }
628 
629 
630 //----------------------------------------------------------------
631 // Try to make a new cluster on the basis of point
632 bool cluster::DBScanAlg::ExpandCluster(unsigned int point,
633  unsigned int clusterID)
634 {
635  /* GetSetOfPoints for point*/
636  //std::vector<unsigned int> ne = findNeighbors(point, fEps,fEps2);
637  std::set< unsigned int > seeds = RegionQuery(point);
638 
639  // not enough support -> mark as noise
640  if (seeds.size() < fMinPts){
641  fpointId_to_clusterId[point] = kNOISE_CLUSTER;
642  return false;
643  } else {
644  // Add to the currecnt cluster
645  fpointId_to_clusterId[point]=clusterID;
646  for (std::set<unsigned int>::iterator itr = seeds.begin(); itr != seeds.end(); itr++){
647  fpointId_to_clusterId[*itr]=clusterID;
648  }
649  seeds.erase(point);
650  while (!seeds.empty()) {
651  unsigned int currentP = *(seeds.begin());
652  std::set< unsigned int > result = RegionQuery(currentP);
653 
654  if (result.size() >= fMinPts){
655  for (std::set<unsigned int>::iterator itr = result.begin();
656  itr != result.end();
657  itr++){
658  unsigned int resultP = *itr;
659  // not already assigned to a cluster
660  if (fpointId_to_clusterId[resultP] == kNO_CLUSTER ||
661  fpointId_to_clusterId[resultP] == kNOISE_CLUSTER ){
662  if (fpointId_to_clusterId[resultP] == kNO_CLUSTER ) {
663  seeds.insert(resultP);
664  }
665  fpointId_to_clusterId[resultP]=clusterID;
666  } // unclassified or noise
667  } // for
668  } // enough support
669  seeds.erase(currentP);
670  } // while
671  return true;
672  }
673 }
674 
675 //----------------------------------------------------------------
677 // This is the algorithm that finds clusters:
678 //
679 // The original findNeignbor based code converted to use a R*-tree,
680 // but not rearranged
682 {
683 
684  unsigned int cid = 0;
685  // foreach pid
686  for (size_t pid = 0; pid < fps.size(); pid++){
687  // not already visited
688  if (!fvisited[pid]){
689 
690  fvisited[pid] = true;
691  // get the neighbors
692  //std::vector<unsigned int> ne = findNeighbors(pid, fEps,fEps2);
693  std::vector<unsigned int> ne = RegionQuery_vector(pid);
694 
695  // not enough support -> mark as noise
696  if (ne.size() < fMinPts){
697  fnoise[pid] = true;
698  }
699  else{
700  // Add p to current cluster
701 
702  std::vector<unsigned int> c; // a new cluster
703 
704  c.push_back(pid); // assign pid to cluster
705  fpointId_to_clusterId[pid]=cid;
706  // go to neighbors
707  for (size_t i = 0; i < ne.size(); ++i){
708  unsigned int nPid = ne[i];
709 
710  // not already visited
711  if (!fvisited[nPid]){
712  fvisited[nPid] = true;
713  // go to neighbors
714  //std::vector<unsigned int> ne1 = findNeighbors(nPid, fEps, fEps2);
715  std::vector<unsigned int> ne1 = RegionQuery_vector(nPid);
716  // enough support
717  if (ne1.size() >= fMinPts){
718 
719  // join
720 
721  for(size_t i = 0; i < ne1.size(); ++i){
722  // join neighbord
723  ne.push_back(ne1[i]);
724  }
725  }
726  }
727 
728  // not already assigned to a cluster
729  //if (!fpointId_to_clusterId[nPid]){
730  if (fpointId_to_clusterId[nPid] == kNO_CLUSTER ){
731  c.push_back(nPid);
732  fpointId_to_clusterId[nPid]=cid;
733  }
734  }
735 
736  fclusters.push_back(c);
737 
738 
739  cid++;
740  }
741  } // if (!visited
742  } // for
743 
744 
745  int noise=0;
746  //no_hits=fnoise.size();
747 
748  for(size_t y = 0;y < fpointId_to_clusterId.size(); ++y){
749  //if (fpointId_to_clusterId[y]==0) noise++;
750  if (fpointId_to_clusterId[y]==kNO_CLUSTER) noise++;
751  }
752  mf::LogInfo("DBscan") << "FindNeighbors (R*-tree): Found "
753  << cid << " clusters...";
754  for (unsigned int c = 0; c < cid; ++c){
755  mf::LogVerbatim("DBscan") << "\t" << "Cluster " << c << ":\t"
756  << fclusters[c].size();
757  }
758  mf::LogVerbatim("DBscan") << "\t" << "...and " << noise << " noise points.";
759 
760 }
761 
762 //----------------------------------------------------------------
764 // This is the algorithm that finds clusters:
765 //
766 // The original findNeighrbor-based code.
768 {
769 
770  unsigned int cid = 0;
771  // foreach pid
772  for (size_t pid = 0; pid < fps.size(); ++pid){
773  // not already visited
774  if (!fvisited[pid]){
775 
776  fvisited[pid] = true;
777  // get the neighbors
778  std::vector<unsigned int> ne = findNeighbors(pid, fEps,fEps2);
779 
780  // not enough support -> mark as noise
781  if (ne.size() < fMinPts){
782  fnoise[pid] = true;
783  }
784  else{
785  // Add p to current cluster
786 
787  std::vector<unsigned int> c; // a new cluster
788 
789  c.push_back(pid); // assign pid to cluster
790  fpointId_to_clusterId[pid] = cid;
791  // go to neighbors
792  for (size_t i = 0; i < ne.size(); ++i){
793  unsigned int nPid = ne[i];
794 
795  // not already visited
796  if (!fvisited[nPid]){
797  fvisited[nPid] = true;
798  // go to neighbors
799  std::vector<unsigned int> ne1 = findNeighbors(nPid, fEps, fEps2);
800  // enough support
801  if (ne1.size() >= fMinPts){
802 
803  // join
804 
805  for(unsigned int i=0;i<ne1.size();i++){
806  // join neighbord
807  ne.push_back(ne1[i]);
808  }
809  }
810  }
811 
812  // not already assigned to a cluster
813  //if (!fpointId_to_clusterId[nPid]){
814  if (fpointId_to_clusterId[nPid] == kNO_CLUSTER){
815  c.push_back(nPid);
816  fpointId_to_clusterId[nPid]=cid;
817  }
818  }
819 
820  fclusters.push_back(c);
821 
822 
823  cid++;
824  }
825  } // if (!visited
826  } // for
827 
828 
829  int noise = 0;
830  //no_hits=fnoise.size();
831 
832  for(size_t y = 0; y < fpointId_to_clusterId.size(); ++y){
833  //if (fpointId_to_clusterId[y]==0) noise++;
834  if (fpointId_to_clusterId[y] == kNO_CLUSTER) ++noise;
835  }
836  mf::LogInfo("DBscan") << "FindNeighbors (naive): Found " << cid
837  << " clusters...";
838  for (unsigned int c = 0; c < cid; ++c){
839  mf::LogVerbatim("DBscan") << "\t" << "Cluster " << c << ":\t"
840  << fclusters[c].size() << " points";
841  }
842  mf::LogVerbatim("DBscan") << "\t" << "...and " << noise << " noise points.";
843 
844 }
MaybeLogger_< ELseverityLevel::ELsev_info, true > LogVerbatim
double getSimilarity(const std::vector< double > v1, const std::vector< double > v2)
Definition: DBScanAlg.cxx:372
double x
Definition: DBScanAlg.cxx:44
const unsigned int kNO_CLUSTER
Definition: DBScanAlg.cxx:253
Functions to help with numbers.
unsigned int count
Definition: DBScanAlg.cxx:68
void computeSimilarity()
Definition: DBScanAlg.cxx:489
double dx
Definition: DBScanAlg.cxx:45
void InitScan(const std::vector< art::Ptr< recob::Hit > > &allhits, std::set< uint32_t > badChannels, const std::vector< geo::WireID > &wireids=std::vector< geo::WireID >())
Definition: DBScanAlg.cxx:281
void computeWidthFactor()
Definition: DBScanAlg.cxx:513
intermediate_table::iterator iterator
MaybeLogger_< ELseverityLevel::ELsev_info, false > LogInfo
bool operator()(const RTree::Leaf *const leaf) const
Definition: DBScanAlg.cxx:247
Declaration of signal hit object.
Float_t Y
Definition: plot.C:39
AcceptFindNeighbors(const BoundingBox &b, double eps, double eps2, double maxWidth, double wireDist, std::vector< unsigned int > &badWireSum)
Definition: DBScanAlg.cxx:148
static const BoundingBox EmptyBoundingBox
Definition: DBScanAlg.cxx:119
virtual double SamplingRate() const =0
Returns the period of the TPC readout electronics clock.
Cluster finding and building.
void run_dbscan_cluster()
Definition: DBScanAlg.cxx:549
std::set< unsigned int > RegionQuery(unsigned int point)
Definition: DBScanAlg.cxx:595
void run_FN_naive_cluster()
Definition: DBScanAlg.cxx:767
bool isNear(const BoundingBox &b) const
Definition: DBScanAlg.cxx:168
unsigned int noise()
Definition: chem4.cc:265
void Expand(double DX, double DY)
Definition: DBScanAlg.cxx:49
bool operator()(const RTree::Leaf *const leaf) const
Definition: DBScanAlg.cxx:103
LeafType leaf
Definition: RStarTree.h:59
auto vector(Vector const &v)
Returns a manipulator which will print the specified array.
Definition: DumpUtils.h:265
const unsigned int kNOISE_CLUSTER
Definition: DBScanAlg.cxx:254
std::vector< unsigned int > vResult
Definition: DBScanAlg.cxx:69
virtual ~DBScanAlg()
Definition: DBScanAlg.cxx:266
const BoundingBox & fBound
Definition: DBScanAlg.cxx:143
void operator()(const RTree::Leaf *const leaf)
Definition: DBScanAlg.cxx:73
virtual double Temperature() const =0
T get(std::string const &key) const
Definition: ParameterSet.h:231
const BoundingBox & m_bound
Definition: DBScanAlg.cxx:86
BoundingBox center(const BoundingBox &b) const
Definition: DBScanAlg.cxx:158
Float_t d
Definition: plot.C:237
bool operator()(const RTree::Node *const node) const
Definition: DBScanAlg.cxx:99
DBScanAlg(fhicl::ParameterSet const &pset)
Definition: DBScanAlg.cxx:260
Monte Carlo Simulation.
double getWidthFactor(const std::vector< double > v1, const std::vector< double > v2)
Definition: DBScanAlg.cxx:444
std::vector< unsigned int > & fBadWireSum
Definition: DBScanAlg.cxx:147
Definition: main.cpp:51
bool operator()(const RTree::Node *const node) const
Definition: DBScanAlg.cxx:239
constexpr auto absDiff(A const &a, B const &b)
Returns the absolute value of the difference between two values.
Definition: NumericUtils.h:43
Encapsulate the geometry of a wire.
void computeSimilarity2()
Definition: DBScanAlg.cxx:501
std::vector< unsigned int > findNeighbors(unsigned int pid, double threshold, double threshold2)
Definition: DBScanAlg.cxx:472
AcceptEllipse(const BoundingBox &b, double r1, double r2)
Definition: DBScanAlg.cxx:90
std::vector< TrajPoint > seeds
Definition: DataStructs.cxx:11
std::vector< unsigned int > RegionQuery_vector(unsigned int point)
Definition: DBScanAlg.cxx:610
bool ExpandCluster(unsigned int point, unsigned int clusterID)
Definition: DBScanAlg.cxx:632
Encapsulate the construction of a single detector plane.
virtual double DriftVelocity(double efield=0., double temperature=0.) const =0
bool overlaps(const RStarBoundingBox< dimensions > &bb) const
void reconfigure(fhicl::ParameterSet const &p)
Definition: DBScanAlg.cxx:271
MaybeLogger_< ELseverityLevel::ELsev_warning, false > LogWarning
int count
Definition: main.cpp:52
std::pair< double, double > edges[dimensions]
BoundingBox bounds() const
Definition: DBScanAlg.cxx:52
dbsPoint(double X=0.0, double Y=0.0, double dX=0.0, double dY=0.0)
Definition: DBScanAlg.cxx:46
double y
Definition: DBScanAlg.cxx:44
Char_t n[5]
double getSimilarity2(const std::vector< double > v1, const std::vector< double > v2)
Definition: DBScanAlg.cxx:407
virtual double Efield(unsigned int planegap=0) const =0
Returns the nominal electric field in the specified volume.
Float_t e
Definition: plot.C:34
double dy
Definition: DBScanAlg.cxx:45
Float_t X
Definition: plot.C:39
BoundingBox center() const
Definition: DBScanAlg.cxx:166
std::set< unsigned int > sResult
Definition: DBScanAlg.cxx:70
cet::coded_exception< error, detail::translate > exception
Definition: exception.h:33
const bool ContinueVisiting
Definition: DBScanAlg.cxx:71
BoundingBox nearestPoint(const BoundingBox &b) const
Definition: DBScanAlg.cxx:217