LArSoft  v06_85_00
Liquid Argon Software toolkit - http://larsoft.org/
MemoryTrackerLinux_service.cc
Go to the documentation of this file.
1 // ======================================================================
2 //
3 // MemoryTracker
4 //
5 // The MemoryTracker service records VSize and RSS information
6 // throughout the course of an art process. It inserts memory
7 // information into an in-memory SQLite database, or an external file
8 // if the user provides a non-empty file name.
9 //
10 // In the context of multi-threading, the memory information recorded
11 // corresponds to all memory information for the process, and not for
12 // individual threads. A consequence of this is that the recorded
13 // memory usage for a given event may not correspond to memory usage
14 // of that event per se, but can include contributions from other
15 // events that are being processed concurrently.
16 //
17 // In order to have a straightforward interpretation of the
18 // per-event/module memory usage of an art process, then only one
19 // thread should be used. The max VSize and RSS measurements of a job
20 // should be meaningful, however, even in a multi-threaded process.
21 //
22 // ======================================================================
23 
25 #include "art/Framework/Services/Optional/detail/LinuxMallInfo.h"
31 #include "art/Utilities/LinuxProcData.h"
32 #include "art/Utilities/LinuxProcMgr.h"
36 #include "cetlib/HorizontalRule.h"
37 #include "cetlib/container_algorithms.h"
38 #include "cetlib/sqlite/Connection.h"
39 #include "cetlib/sqlite/Ntuple.h"
40 #include "cetlib/sqlite/select.h"
41 #include "fhiclcpp/types/Atom.h"
45 
46 #include <memory>
47 #include <sstream>
48 #include <tuple>
49 
50 namespace art {
51 
52  class MemoryTracker {
53  public:
54  struct Config {
55  using Name = fhicl::Name;
57  struct DBoutput {
59  fhicl::Atom<bool> overwrite{Name{"overwrite"}, false};
60  };
62  fhicl::Atom<bool> includeMallocInfo{Name{"includeMallocInfo"}, false};
63  };
64 
67 
68  private:
69  // Callbacks
70  // ... Path level
71  void prePathProcessing(std::string const&);
72 
73  void recordOtherData(ModuleDescription const& md, std::string const& step);
74  void recordEventData(Event const& e, std::string const& step);
75  void recordModuleData(ModuleDescription const& md, std::string const& step);
76 
77  // ... Wrap up
78  void postEndJob();
79 
80  bool checkMallocConfig_(std::string const&, bool);
81 
82  void recordPeakUsages_();
83  void flushTables_();
84  void summary_();
85 
86  LinuxProcMgr procInfo_;
87  std::string fileName_;
88 
89  // Options
90  cet::sqlite::Connection db_;
93 
94  struct PerScheduleData {
95  std::string pathName{};
96  art::EventID eventID{};
97  };
98  std::vector<PerScheduleData> data_;
99 
100  template <unsigned N>
101  using name_array = cet::sqlite::name_array<N>;
102 
103  name_array<3u> peakUsageColumns_{{"Name", "Value", "Description"}};
105  {"Step", "ModuleLabel", "ModuleType", "Vsize", "RSS"}};
107  {"Step", "Run", "SubRun", "Event", "Vsize", "RSS"}};
109  "Run",
110  "SubRun",
111  "Event",
112  "Path",
113  "ModuleLabel",
114  "ModuleType",
115  "Vsize",
116  "RSS"}};
118  "Run",
119  "SubRun",
120  "Event",
121  "arena",
122  "ordblks",
123  "keepcost",
124  "hblkhd",
125  "hblks",
126  "uordblks",
127  "fordblks"}};
129  "Run",
130  "SubRun",
131  "Event",
132  "Path",
133  "ModuleLabel",
134  "ModuleType",
135  "arena",
136  "ordblks",
137  "keepcost",
138  "hblkhd",
139  "hblks",
140  "uordblks",
141  "fordblks"}};
142 
143  using peakUsage_t = cet::sqlite::Ntuple<std::string, double, std::string>;
144  using otherInfo_t = cet::sqlite::
145  Ntuple<std::string, std::string, std::string, double, double>;
146  using memEvent_t = cet::sqlite::
147  Ntuple<std::string, uint32_t, uint32_t, uint32_t, double, double>;
148  using memModule_t = cet::sqlite::Ntuple<std::string,
149  uint32_t,
150  uint32_t,
151  uint32_t,
152  std::string,
153  std::string,
154  std::string,
155  double,
156  double>;
157  using memEventHeap_t = cet::sqlite::Ntuple<std::string,
158  uint32_t,
159  uint32_t,
160  uint32_t,
161  int,
162  int,
163  int,
164  int,
165  int,
166  int,
167  int>;
168  using memModuleHeap_t = cet::sqlite::Ntuple<std::string,
169  uint32_t,
170  uint32_t,
171  uint32_t,
172  std::string,
173  std::string,
174  std::string,
175  int,
176  int,
177  int,
178  int,
179  int,
180  int,
181  int>;
182 
187  std::unique_ptr<memEventHeap_t> eventHeapTable_;
188  std::unique_ptr<memModuleHeap_t> moduleHeapTable_;
189  }; // MemoryTracker
190 
191 } // art
192 
193 //====================================================
194 // Implementation below
195 
196 using namespace std::string_literals;
197 using namespace cet;
198 
199 namespace {
200  // MT-TODO: Placeholder until we are multi-threaded
201  unsigned const nSchedules{1u};
202 }
203 
204 //======================================================================================
205 using art::detail::LinuxMallInfo;
208 
210  ActivityRegistry& iReg)
211  : procInfo_{nSchedules}
212  , fileName_{config().dbOutput().filename()}
214  , overwriteContents_{config().dbOutput().overwrite()}
215  // Fix so that a value of 'false' is an error if filename => in-memory db.
216  , includeMallocInfo_{checkMallocConfig_(config().dbOutput().filename(),
217  config().includeMallocInfo())}
218  // tables
219  , peakUsageTable_{db_, "PeakUsage", peakUsageColumns_, true}
220  // always recompute the peak usage
225  std::make_unique<memEventHeap_t>(db_,
226  "EventMallocInfo",
228  nullptr}
230  std::make_unique<memModuleHeap_t>(db_,
231  "ModuleMallocInfo",
233  nullptr}
234 {
235  data_.resize(nSchedules);
236 
237  iReg.sPostEndJob.watch(this, &MemoryTracker::postEndJob);
238 
239  if (!fileName_.empty()) {
240  iReg.sPreModuleConstruction.watch([this](auto const& md) {
241  this->recordOtherData(md, "PreModuleConstruction");
242  });
243  iReg.sPostModuleConstruction.watch([this](auto const& md) {
244  this->recordOtherData(md, "PostModuleConstruction");
245  });
246  iReg.sPreModuleBeginJob.watch(
247  [this](auto const& md) { this->recordOtherData(md, "PreBeginJob"); });
248  iReg.sPostModuleBeginJob.watch(
249  [this](auto const& md) { this->recordOtherData(md, "PostBeginJob"); });
250  iReg.sPreModuleBeginRun.watch(
251  [this](auto const& md) { this->recordOtherData(md, "PreBeginRun"); });
252  iReg.sPostModuleBeginRun.watch(
253  [this](auto const& md) { this->recordOtherData(md, "PostBeginRun"); });
254  iReg.sPreModuleBeginSubRun.watch(
255  [this](auto const& md) { this->recordOtherData(md, "PreBeginSubRun"); });
256  iReg.sPostModuleBeginSubRun.watch(
257  [this](auto const& md) { this->recordOtherData(md, "PostBeginSubRun"); });
258  iReg.sPreProcessPath.watch(this, &MemoryTracker::prePathProcessing);
259  iReg.sPreProcessEvent.watch(
260  [this](auto const& e) { this->recordEventData(e, "PreProcessEvent"); });
261  iReg.sPostProcessEvent.watch(
262  [this](auto const& e) { this->recordEventData(e, "PostProcessEvent"); });
263  iReg.sPreModule.watch([this](auto const& md) {
264  this->recordModuleData(md, "PreProcessModule");
265  });
266  iReg.sPostModule.watch([this](auto const& md) {
267  this->recordModuleData(md, "PostProcessModule");
268  });
269  iReg.sPreWriteEvent.watch(
270  [this](auto const& md) { this->recordModuleData(md, "PreWriteEvent"); });
271  iReg.sPostWriteEvent.watch(
272  [this](auto const& md) { this->recordModuleData(md, "PostWriteEvent"); });
273  iReg.sPreModuleEndSubRun.watch(
274  [this](auto const& md) { this->recordOtherData(md, "PreEndSubRun"); });
275  iReg.sPreModuleEndRun.watch(
276  [this](auto const& md) { this->recordOtherData(md, "PreEndRun"); });
277  iReg.sPreModuleEndJob.watch(
278  [this](auto const& md) { this->recordOtherData(md, "PreEndJob"); });
279  iReg.sPostModuleEndSubRun.watch(
280  [this](auto const& md) { this->recordOtherData(md, "PostEndSubRun"); });
281  iReg.sPostModuleEndRun.watch(
282  [this](auto const& md) { this->recordOtherData(md, "PostEndRun"); });
283  iReg.sPostModuleEndJob.watch(
284  [this](auto const& md) { this->recordOtherData(md, "PostEndJob"); });
285  }
286 }
287 
288 //======================================================================
289 void
290 art::MemoryTracker::prePathProcessing(std::string const& pathname)
291 {
292  // MT-TODO: Placeholder until we're multi-threaded
293  auto const sid = ScheduleID::first().id();
294  data_[sid].pathName = pathname;
295 }
296 
297 //======================================================================
298 void
300  std::string const& step)
301 {
302  // MT-TODO: Placeholder until we're multi-threaded
303  auto const sid = ScheduleID::first().id();
304  auto const data = procInfo_.getCurrentData(sid);
305  otherInfoTable_.insert(step,
306  md.moduleLabel(),
307  md.moduleName(),
308  LinuxProcData::getValueInMB<vsize_t>(data),
309  LinuxProcData::getValueInMB<rss_t>(data));
310 }
311 
312 //======================================================================
313 void
314 art::MemoryTracker::recordEventData(Event const& e, std::string const& step)
315 {
316  // MT-TODO: Placeholder until we're multi-threaded
317  auto const sid = ScheduleID::first().id();
318  auto& d = data_[sid];
319  d.eventID = e.id();
320 
321  auto const currentMemory = procInfo_.getCurrentData(sid);
322 
323  eventTable_.insert(step,
324  d.eventID.run(),
325  d.eventID.subRun(),
326  d.eventID.event(),
327  LinuxProcData::getValueInMB<vsize_t>(currentMemory),
328  LinuxProcData::getValueInMB<rss_t>(currentMemory));
329 
330  if (includeMallocInfo_) {
331  auto minfo = LinuxMallInfo{}.get();
332  eventHeapTable_->insert(step,
333  d.eventID.run(),
334  d.eventID.subRun(),
335  d.eventID.event(),
336  minfo.arena,
337  minfo.ordblks,
338  minfo.keepcost,
339  minfo.hblkhd,
340  minfo.hblks,
341  minfo.uordblks,
342  minfo.fordblks);
343  }
344 }
345 
346 //======================================================================
347 void
349  std::string const& step)
350 {
351  // MT-TODO: Placeholder until we're multi-threaded
352  auto const sid = ScheduleID::first().id();
353  auto& d = data_[sid];
354 
355  auto const currentMemory = procInfo_.getCurrentData(sid);
356 
357  moduleTable_.insert(step,
358  d.eventID.run(),
359  d.eventID.subRun(),
360  d.eventID.event(),
361  d.pathName,
362  md.moduleLabel(),
363  md.moduleName(),
364  LinuxProcData::getValueInMB<vsize_t>(currentMemory),
365  LinuxProcData::getValueInMB<rss_t>(currentMemory));
366 
367  if (includeMallocInfo_) {
368  auto minfo = LinuxMallInfo{}.get();
369  moduleHeapTable_->insert(step,
370  d.eventID.run(),
371  d.eventID.subRun(),
372  d.eventID.event(),
373  d.pathName,
374  md.moduleLabel(),
375  md.moduleName(),
376  minfo.arena,
377  minfo.ordblks,
378  minfo.keepcost,
379  minfo.hblkhd,
380  minfo.hblks,
381  minfo.uordblks,
382  minfo.fordblks);
383  }
384 }
385 
386 //======================================================================
387 void
389 {
391  flushTables_();
392  summary_();
393 }
394 
395 //======================================================================
396 bool
397 art::MemoryTracker::checkMallocConfig_(std::string const& dbfilename,
398  bool const include)
399 {
400  if (include && dbfilename.empty()) {
401  std::string const errmsg =
402  "\n'includeMallocInfo : true' is valid only if a nonempty db filename is specified:\n\n"s +
403  " MemoryTracker: {\n"
404  " includeMallocInfo: true\n"
405  " dbOutput: {\n"
406  " filename: \"your_filename.db\"\n"
407  " }\n"
408  " }\n\n";
409  throw Exception{errors::Configuration} << errmsg;
410  }
411  return include;
412 }
413 
414 //======================================================================
415 void
417 {
418  peakUsageTable_.insert(
419  "VmPeak", procInfo_.getVmPeak(), "Peak virtual memory (MB)");
420  peakUsageTable_.insert(
421  "VmHWM", procInfo_.getVmHWM(), "Peak resident set size (MB)");
422 }
423 
424 //======================================================================
425 void
427 {
428  otherInfoTable_.flush();
429  eventTable_.flush();
430  moduleTable_.flush();
431  peakUsageTable_.flush();
432  if (eventHeapTable_)
433  eventHeapTable_->flush();
434  if (moduleHeapTable_)
435  moduleHeapTable_->flush();
436 }
437 
438 //======================================================================
439 void
441 {
442  using namespace cet::sqlite;
443  using namespace std;
444  query_result<double> rVMax;
445  query_result<double> rRMax;
446  rVMax
447  << select("Value").from(db_, peakUsageTable_.name()).where("Name='VmPeak'");
448  rRMax
449  << select("Value").from(db_, peakUsageTable_.name()).where("Name='VmHWM'");
450 
451  mf::LogAbsolute log{"MemoryTracker"};
452  HorizontalRule const rule{100};
453  log << '\n' << rule('=') << '\n';
454  log << std::left << "MemoryTracker summary (base-10 MB units used)\n\n";
455  log << " Peak virtual memory usage (VmPeak) : " << unique_value(rVMax)
456  << " MB\n"
457  << " Peak resident set size usage (VmHWM): " << unique_value(rRMax)
458  << " MB\n";
459  if (!(fileName_.empty() || fileName_ == ":memory:")) {
460  log << " Details saved in: '" << fileName_ << "'\n";
461  }
462  log << rule('=');
463 }
464 
MemoryTracker(fhicl::ParameterSet const &)
Float_t s
Definition: plot.C:23
void recordOtherData(ModuleDescription const &md, std::string const &step)
name_array< 11u > eventHeapColumns_
T * get() const
Definition: ServiceHandle.h:71
#define DEFINE_ART_SERVICE(svc)
Definition: ServiceMacros.h:93
art::LinuxProcData::rss_t rss_t
cet::sqlite::Ntuple< std::string, uint32_t, uint32_t, uint32_t, std::string, std::string, std::string, int, int, int, int, int, int, int > memModuleHeap_t
cet::sqlite::name_array< N > name_array
constexpr id_type id() const
Definition: ScheduleID.h:70
void prePathProcessing(std::string const &)
STL namespace.
std::vector< PerScheduleData > data_
static ScheduleID first()
Definition: ScheduleID.h:82
#define DECLARE_ART_SERVICE(svc, scope)
Definition: ServiceMacros.h:91
std::unique_ptr< memEventHeap_t > eventHeapTable_
bool checkMallocConfig_(std::string const &, bool)
cet::sqlite::Ntuple< std::string, uint32_t, uint32_t, uint32_t, int, int, int, int, int, int, int > memEventHeap_t
void recordModuleData(ModuleDescription const &md, std::string const &step)
std::string const & moduleName() const
cet::sqlite::Ntuple< std::string, std::string, std::string, double, double > otherInfo_t
std::unique_ptr< memModuleHeap_t > moduleHeapTable_
Float_t d
Definition: plot.C:237
cet::sqlite::Ntuple< std::string, uint32_t, uint32_t, uint32_t, double, double > memEvent_t
std::string const & moduleLabel() const
cet::coded_exception< errors::ErrorCodes, ExceptionDetail::translate > Exception
Definition: Exception.h:66
constexpr auto const & left(const_AssnsIter< L, R, D, Dir > const &a, const_AssnsIter< L, R, D, Dir > const &b)
Definition: AssnsIter.h:104
cet::sqlite::Ntuple< std::string, double, std::string > peakUsage_t
void recordEventData(Event const &e, std::string const &step)
art::LinuxProcData::vsize_t vsize_t
HLT enums.
name_array< 14u > moduleHeapColumns_
cet::sqlite::Ntuple< std::string, uint32_t, uint32_t, uint32_t, std::string, std::string, std::string, double, double > memModule_t
Float_t e
Definition: plot.C:34
EventID id() const
Definition: Event.h:56
cet::sqlite::Connection db_