LArSoft  v09_90_00
Liquid Argon Software toolkit - https://larsoft.org/
MemoryTrackerLinux_service.cc
Go to the documentation of this file.
1 // vim: set sw=2 expandtab :
2 // ======================================================================
3 // MemoryTracker
4 //
5 // This MemoryTracker implementation is supported only for Linux
6 // systems. It relies on the proc file system to record VSize and RSS
7 // information throughout the course of an art process. It inserts
8 // memory information into an in-memory SQLite database, or an
9 // external file if the user provides a non-empty file name.
10 //
11 // Since information that procfs provides is process-specific, the
12 // MemoryTracker does not attempt to provide per-module information in
13 // the context of multi-threading. If more than one thread has been
14 // enabled for the art process, only the maximum RSS and VSize for the
15 // process is reported and the end of the job.
16 // ======================================================================
17 
18 #ifndef __linux__
19 #error "This source file can be built only for Linux platforms."
20 #endif
21 
23 #include "art/Framework/Services/Optional/detail/LinuxMallInfo.h"
33 #include "art/Utilities/Globals.h"
34 #include "art/Utilities/LinuxProcData.h"
35 #include "art/Utilities/LinuxProcMgr.h"
38 #include "cetlib/HorizontalRule.h"
39 #include "cetlib/container_algorithms.h"
40 #include "cetlib/sqlite/Connection.h"
41 #include "cetlib/sqlite/Ntuple.h"
42 #include "cetlib/sqlite/select.h"
43 #include "fhiclcpp/types/Atom.h"
47 
48 #include <iomanip>
49 #include <memory>
50 #include <sstream>
51 #include <string>
52 #include <tuple>
53 #include <vector>
54 
55 using namespace std;
56 using namespace string_literals;
57 using namespace cet;
58 
59 using art::detail::LinuxMallInfo;
62 
63 namespace art {
64 
65  class MemoryTracker {
66  template <unsigned N>
67  using name_array = cet::sqlite::name_array<N>;
68  using peakUsage_t = cet::sqlite::Ntuple<string, double, string>;
69  using otherInfo_t =
70  cet::sqlite::Ntuple<string, string, string, double, double>;
71  using memEvent_t =
72  cet::sqlite::Ntuple<string, uint32_t, uint32_t, uint32_t, double, double>;
73  using memModule_t = cet::sqlite::Ntuple<string,
74  uint32_t,
75  uint32_t,
76  uint32_t,
77  string,
78  string,
79  string,
80  double,
81  double>;
82  using memEventHeap_t = cet::sqlite::Ntuple<string,
83  uint32_t,
84  uint32_t,
85  uint32_t,
86  int,
87  int,
88  int,
89  int,
90  int,
91  int,
92  int>;
93  using memModuleHeap_t = cet::sqlite::Ntuple<string,
94  uint32_t,
95  uint32_t,
96  uint32_t,
97  string,
98  string,
99  string,
100  int,
101  int,
102  int,
103  int,
104  int,
105  int,
106  int>;
107 
108  public:
109  static constexpr bool service_handle_allowed{false};
110 
111  struct Config {
112  template <typename T>
114  using Name = fhicl::Name;
116  template <typename T>
118  struct DBoutput {
119  Atom<string> filename{Name{"filename"}, ""};
120  Atom<bool> overwrite{Name{"overwrite"}, false};
121  };
122  Table<DBoutput> dbOutput{Name{"dbOutput"}};
123  Atom<bool> includeMallocInfo{Name{"includeMallocInfo"}, false};
124  };
125 
128 
129  private:
130  void prePathProcessing(PathContext const& pc);
131  void recordOtherData(ModuleDescription const& md, string const& step);
132  void recordOtherData(ModuleContext const& mc, string const& step);
133  void recordEventData(Event const& e, string const& step);
134  void recordModuleData(ModuleContext const& mc, string const& step);
135  void postEndJob();
136  bool checkMallocConfig_(string const&, bool);
137  void recordPeakUsages_();
138  void flushTables_();
139  bool using_file_database_() const;
140  void summary_();
141  bool anyTableFull_() const;
142 
143  LinuxProcMgr procInfo_{};
144  string const fileName_;
145  unique_ptr<cet::sqlite::Connection> const db_;
146  bool const overwriteContents_;
147  bool const includeMallocInfo_;
148 
149  // NB: using "current" semantics for the MemoryTracker is valid
150  // since per-module/event information are retrieved only in a
151  // sequential (i.e. single-threaded) context.
152  EventID currentEventID_{EventID::invalidEvent()};
153  name_array<3u> peakUsageColumns_{{"Name", "Value", "Description"}};
154  name_array<5u> otherInfoColumns_{
155  {"Step", "ModuleLabel", "ModuleType", "Vsize", "RSS"}};
156  name_array<6u> eventColumns_{
157  {"Step", "Run", "SubRun", "Event", "Vsize", "RSS"}};
158  name_array<9u> moduleColumns_{{"Step",
159  "Run",
160  "SubRun",
161  "Event",
162  "Path",
163  "ModuleLabel",
164  "ModuleType",
165  "Vsize",
166  "RSS"}};
167  name_array<11u> eventHeapColumns_{{"Step",
168  "Run",
169  "SubRun",
170  "Event",
171  "arena",
172  "ordblks",
173  "keepcost",
174  "hblkhd",
175  "hblks",
176  "uordblks",
177  "fordblks"}};
178  name_array<14u> moduleHeapColumns_{{"Step",
179  "Run",
180  "SubRun",
181  "Event",
182  "Path",
183  "ModuleLabel",
184  "ModuleType",
185  "arena",
186  "ordblks",
187  "keepcost",
188  "hblkhd",
189  "hblks",
190  "uordblks",
191  "fordblks"}};
196  unique_ptr<memEventHeap_t> eventHeapTable_;
197  unique_ptr<memModuleHeap_t> moduleHeapTable_;
198  };
199 
200  MemoryTracker::MemoryTracker(ServiceTable<Config> const& config,
201  ActivityRegistry& iReg)
202  : fileName_{config().dbOutput().filename()}
204  , overwriteContents_{config().dbOutput().overwrite()}
205  , includeMallocInfo_{checkMallocConfig_(config().dbOutput().filename(),
206  config().includeMallocInfo())}
207  // Fix so that a value of 'false' is an error if filename => in-memory db.
208  , peakUsageTable_{*db_, "PeakUsage", peakUsageColumns_, true}
209  // always recompute the peak usage
211  , eventTable_{*db_, "EventInfo", eventColumns_, overwriteContents_}
212  , moduleTable_{*db_, "ModuleInfo", moduleColumns_, overwriteContents_}
214  make_unique<memEventHeap_t>(*db_,
215  "EventMallocInfo",
217  nullptr}
219  make_unique<memModuleHeap_t>(*db_,
220  "ModuleMallocInfo",
222  nullptr}
223  {
224  iReg.sPostEndJob.watch(this, &MemoryTracker::postEndJob);
225  auto const nthreads = Globals::instance()->nthreads();
226  if (nthreads != 1) {
227  mf::LogWarning("MemoryTracker")
228  << "Since " << nthreads
229  << " threads have been configured, only process-level\n"
230  "memory usage will be recorded at the end of the job.";
231  }
232 
233  if (!fileName_.empty() && nthreads == 1u) {
234  iReg.sPreModuleConstruction.watch([this](auto const& md) {
235  this->recordOtherData(md, "PreModuleConstruction");
236  });
237  iReg.sPostModuleConstruction.watch([this](auto const& md) {
238  this->recordOtherData(md, "PostModuleConstruction");
239  });
240  iReg.sPreModuleBeginJob.watch(
241  [this](auto const& md) { this->recordOtherData(md, "PreBeginJob"); });
242  iReg.sPostModuleBeginJob.watch(
243  [this](auto const& md) { this->recordOtherData(md, "PostBeginJob"); });
244  iReg.sPreModuleBeginRun.watch(
245  [this](auto const& mc) { this->recordOtherData(mc, "PreBeginRun"); });
246  iReg.sPostModuleBeginRun.watch(
247  [this](auto const& mc) { this->recordOtherData(mc, "PostBeginRun"); });
248  iReg.sPreModuleBeginSubRun.watch([this](auto const& mc) {
249  this->recordOtherData(mc, "PreBeginSubRun");
250  });
251  iReg.sPostModuleBeginSubRun.watch([this](auto const& mc) {
252  this->recordOtherData(mc, "PostBeginSubRun");
253  });
254  iReg.sPreProcessEvent.watch([this](auto const& e, ScheduleContext) {
255  this->recordEventData(e, "PreProcessEvent");
256  });
257  iReg.sPostProcessEvent.watch([this](auto const& e, ScheduleContext) {
258  this->recordEventData(e, "PostProcessEvent");
259  });
260  iReg.sPreModule.watch([this](auto const& mc) {
261  this->recordModuleData(mc, "PreProcessModule");
262  });
263  iReg.sPostModule.watch([this](auto const& mc) {
264  this->recordModuleData(mc, "PostProcessModule");
265  });
266  iReg.sPreWriteEvent.watch([this](auto const& mc) {
267  this->recordModuleData(mc, "PreWriteEvent");
268  });
269  iReg.sPostWriteEvent.watch([this](auto const& mc) {
270  this->recordModuleData(mc, "PostWriteEvent");
271  });
272  iReg.sPreModuleEndSubRun.watch(
273  [this](auto const& mc) { this->recordOtherData(mc, "PreEndSubRun"); });
274  iReg.sPreModuleEndRun.watch(
275  [this](auto const& mc) { this->recordOtherData(mc, "PreEndRun"); });
276  iReg.sPreModuleEndJob.watch(
277  [this](auto const& md) { this->recordOtherData(md, "PreEndJob"); });
278  iReg.sPostModuleEndSubRun.watch(
279  [this](auto const& mc) { this->recordOtherData(mc, "PostEndSubRun"); });
280  iReg.sPostModuleEndRun.watch(
281  [this](auto const& mc) { this->recordOtherData(mc, "PostEndRun"); });
282  iReg.sPostModuleEndJob.watch(
283  [this](auto const& md) { this->recordOtherData(md, "PostEndJob"); });
284  }
285  }
286 
287  void
288  MemoryTracker::recordOtherData(ModuleContext const& mc, string const& step)
289  {
291  }
292 
293  void
295  string const& step)
296  {
297  auto const data = procInfo_.getCurrentData();
298  otherInfoTable_.insert(step,
299  md.moduleLabel(),
300  md.moduleName(),
301  LinuxProcData::getValueInMB<vsize_t>(data),
302  LinuxProcData::getValueInMB<rss_t>(data));
303  }
304 
305  void
306  MemoryTracker::recordEventData(Event const& e, string const& step)
307  {
308  currentEventID_ = e.id();
309  auto const currentMemory = procInfo_.getCurrentData();
310  eventTable_.insert(step,
314  LinuxProcData::getValueInMB<vsize_t>(currentMemory),
315  LinuxProcData::getValueInMB<rss_t>(currentMemory));
316  if (includeMallocInfo_) {
317  auto minfo = LinuxMallInfo{}.get();
318  eventHeapTable_->insert(step,
322  minfo.arena,
323  minfo.ordblks,
324  minfo.keepcost,
325  minfo.hblkhd,
326  minfo.hblks,
327  minfo.uordblks,
328  minfo.fordblks);
329  }
330  }
331 
332  void
333  MemoryTracker::recordModuleData(ModuleContext const& mc, string const& step)
334  {
335  auto const currentMemory = procInfo_.getCurrentData();
336  moduleTable_.insert(step,
340  mc.pathName(),
341  mc.moduleLabel(),
342  mc.moduleName(),
343  LinuxProcData::getValueInMB<vsize_t>(currentMemory),
344  LinuxProcData::getValueInMB<rss_t>(currentMemory));
345  if (includeMallocInfo_) {
346  auto minfo = LinuxMallInfo{}.get();
347  moduleHeapTable_->insert(step,
351  mc.pathName(),
352  mc.moduleLabel(),
353  mc.moduleName(),
354  minfo.arena,
355  minfo.ordblks,
356  minfo.keepcost,
357  minfo.hblkhd,
358  minfo.hblks,
359  minfo.uordblks,
360  minfo.fordblks);
361  }
362  }
363 
364  void
366  {
368  flushTables_();
369  summary_();
370  }
371 
372  bool
373  MemoryTracker::checkMallocConfig_(string const& dbfilename,
374  bool const include)
375  {
376  if (include && dbfilename.empty()) {
377  string const errmsg =
378  "\n'includeMallocInfo : true' is valid only if a nonempty db filename is specified:\n\n"s +
379  " MemoryTracker: {\n"
380  " includeMallocInfo: true\n"
381  " dbOutput: {\n"
382  " filename: \"your_filename.db\"\n"
383  " }\n"
384  " }\n\n";
385  throw Exception{errors::Configuration} << errmsg;
386  }
387  return include;
388  }
389 
390  void
392  {
393  peakUsageTable_.insert(
394  "VmPeak", procInfo_.getVmPeak(), "Peak virtual memory (MB)");
395  peakUsageTable_.insert(
396  "VmHWM", procInfo_.getVmHWM(), "Peak resident set size (MB)");
397  }
398 
399  void
401  {
402  otherInfoTable_.flush();
403  eventTable_.flush();
404  moduleTable_.flush();
405  peakUsageTable_.flush();
406  if (eventHeapTable_) {
407  eventHeapTable_->flush();
408  }
409  if (moduleHeapTable_) {
410  moduleHeapTable_->flush();
411  }
412  }
413 
414  bool
416  {
417  return !fileName_.empty() && fileName_ != ":memory:";
418  }
419 
420  void
422  {
423  using namespace cet::sqlite;
424  using namespace std;
425  query_result<double> rVMax;
426  query_result<double> rRMax;
427  rVMax << select("Value")
428  .from(*db_, peakUsageTable_.name())
429  .where("Name='VmPeak'");
430  rRMax << select("Value")
431  .from(*db_, peakUsageTable_.name())
432  .where("Name='VmHWM'");
433  mf::LogAbsolute log{"MemoryTracker"};
434  HorizontalRule const rule{100};
435  log << '\n' << rule('=') << '\n';
436 
437  if (anyTableFull_() && using_file_database_()) {
438  log << "The SQLite database connected to the MemoryTracker exceeded the "
439  "available resources.\n";
440  log << "No memory usage summary is available.\n";
441  log << "The database at " << fileName_
442  << " will contain an incomplete record of this job's memory usage.\n";
443  } else {
444  log << std::left << "MemoryTracker summary (base-10 MB units used)\n\n";
445  log << " Peak virtual memory usage (VmPeak) : " << unique_value(rVMax)
446  << " MB\n"
447  << " Peak resident set size usage (VmHWM): " << unique_value(rRMax)
448  << " MB\n";
449  if (using_file_database_()) {
450  log << " Details saved in: '" << fileName_ << "'\n";
451  }
452  }
453  log << rule('=');
454  }
455 
456  bool
458  {
459  return peakUsageTable_.full() || otherInfoTable_.full() ||
460  eventTable_.full() || moduleTable_.full() ||
461  (eventHeapTable_ && eventHeapTable_->full()) ||
462  (moduleHeapTable_ && moduleHeapTable_->full());
463  }
464 
465 } // namespace art
466 
name_array< 11u > eventHeapColumns_
auto const & pathName() const
Definition: ModuleContext.h:33
art::LinuxProcData::rss_t rss_t
std::string const & moduleLabel() const
cet::sqlite::name_array< N > name_array
STL namespace.
unique_ptr< memEventHeap_t > eventHeapTable_
ScheduleID::size_type nthreads() const
Definition: Globals.cc:36
RunNumber_t run() const
Definition: EventID.h:98
cet::sqlite::Ntuple< string, uint32_t, uint32_t, uint32_t, int, int, int, int, int, int, int > memEventHeap_t
void recordEventData(Event const &e, string const &step)
std::string const & moduleName() const
auto const & moduleName() const
Definition: ModuleContext.h:48
#define DECLARE_ART_SERVICE(svc, scope)
unique_ptr< cet::sqlite::Connection > const db_
cet::sqlite::Ntuple< string, string, string, double, double > otherInfo_t
cet::sqlite::Ntuple< string, uint32_t, uint32_t, uint32_t, string, string, string, double, double > memModule_t
bool checkMallocConfig_(string const &, bool)
cet::sqlite::Ntuple< string, uint32_t, uint32_t, uint32_t, string, string, string, int, int, int, int, int, int, int > memModuleHeap_t
cet::coded_exception< errors::ErrorCodes, ExceptionDetail::translate > Exception
Definition: Exception.h:66
cet::sqlite::Ntuple< string, double, string > peakUsage_t
#define DEFINE_ART_SERVICE(svc)
constexpr auto const & left(const_AssnsIter< L, R, D, Dir > const &a, const_AssnsIter< L, R, D, Dir > const &b)
Definition: AssnsIter.h:94
art::LinuxProcData::vsize_t vsize_t
auto const & moduleDescription() const
Definition: ModuleContext.h:38
MaybeLogger_< ELseverityLevel::ELsev_warning, false > LogWarning
auto const & moduleLabel() const
Definition: ModuleContext.h:43
Definition: MVAAlg.h:12
EventNumber_t event() const
Definition: EventID.h:116
name_array< 14u > moduleHeapColumns_
void recordModuleData(ModuleContext const &mc, string const &step)
static Globals * instance()
Definition: Globals.cc:17
Float_t e
Definition: plot.C:35
void recordOtherData(ModuleDescription const &md, string const &step)
unique_ptr< memModuleHeap_t > moduleHeapTable_
SubRunNumber_t subRun() const
Definition: EventID.h:110
EventID id() const
Definition: Event.cc:23
cet::sqlite::Ntuple< string, uint32_t, uint32_t, uint32_t, double, double > memEvent_t