LArSoft  v06_85_00
Liquid Argon Software toolkit - http://larsoft.org/
sam_metadata_dumper.cc
Go to the documentation of this file.
1 // sam_metadata_dumper.cc
2 
6 #include "boost/program_options.hpp"
11 #include "cetlib/canonical_string.h"
12 #include "cetlib/container_algorithms.h"
13 #include "fhiclcpp/ParameterSet.h"
15 
16 #include "TError.h"
17 #include "TFile.h"
18 
19 extern "C" {
20 #include "sqlite3.h"
21 }
22 
23 #include <algorithm>
24 #include <cstddef>
25 #include <cstdlib>
26 #include <cstring>
27 #include <iostream>
28 #include <ostream>
29 #include <sstream>
30 #include <string>
31 #include <vector>
32 
33 namespace bpo = boost::program_options;
34 
38 using std::back_inserter;
39 using std::cerr;
40 using std::cout;
41 using std::endl;
42 using std::ostream;
43 using std::string;
44 using std::vector;
45 
46 typedef vector<string> stringvec;
48  int SMDid;
49  std::string name;
50  std::string value;
51 };
52 
53 std::string
54 entryValue(std::string const& value)
55 {
56  std::string result;
57  if (value[0] == '[' || value[0] == '{' ||
58  cet::is_double_quoted_string(value)) {
59  // Assume entry is already a legal JSON representation.
60  result = value;
61  } else {
62  // Attempt to convert to number. If this works, we don't
63  // canonicalize the string. Note that we use the glibc version
64  // because we don't want to have to catch the exception. We could
65  // use streams, but we don't care about the result and dealing with
66  // streams is awkward.
67  char const* entval = value.c_str();
68  char* endptr = const_cast<char*>(entval);
69  strtold(entval, &endptr);
70  if (endptr == entval + value.size()) {
71  // Full conversion: no string canonicalization necessary.
72  result = value;
73  } else {
74  cet::canonical_string(value, result);
75  }
76  }
77  return result;
78 }
79 
80 // Print the human-readable form of a single metadata entry.
81 void
83  size_t idLen,
84  size_t longestName,
85  ostream& output)
86 {
87  const std::string& name = ent.name;
88  constexpr size_t maxIDdigits = 5;
89  constexpr size_t maxNameSpacing = 20;
90 
91  // right-justify SMDid (unless it is more than 5 digits)
92  int id = static_cast<int>(ent.SMDid);
93  size_t maxIDspace = std::min(idLen, maxIDdigits);
94  int nspaces = maxIDspace - 1;
95  for (int i = 0; (nspaces > 0) && (id > 0); ++i) {
96  id /= 10;
97  if (id > 0)
98  --nspaces;
99  }
100  for (int i = 0; i < nspaces; ++i)
101  output << " ";
102  output << ent.SMDid << ": ";
103 
104  output << name;
105 
106  // right-justify value (unless name is more than 20 characters)
107  size_t nameSpacing = maxNameSpacing;
108  if (longestName < maxNameSpacing)
109  nameSpacing = longestName;
110  nspaces = static_cast<int>(nameSpacing - name.size());
111  while (nspaces > 0) {
112  output << " ";
113  --nspaces;
114  }
115 
116  output << " " << entryValue(ent.value) << "\n";
117 }
118 
119 // Print all the entries in the file catalog metadata from a file
120 void
122  vector<FileCatalogMetadataEntry> const& entries,
123  ostream& output,
124  ostream& /*errors*/)
125 {
126  // For nice formatting, determine maximum id length and name size,
127  // so that values can be lined up.
128  int maxID = 1;
129  size_t longestName = 1;
130  for (size_t i = 0; i < entries.size(); ++i) {
131  if (entries[i].SMDid > maxID)
132  maxID = entries[i].SMDid;
133  if (entries[i].name.size() > longestName)
134  longestName = entries[i].name.size();
135  }
136  size_t idLen = 1;
137  for (int i = 0; (i < 5) && (maxID > 0); ++i) {
138  maxID /= 10;
139  if (maxID > 0)
140  ++idLen;
141  }
142  for (auto const& entry : entries) {
143  print_one_fc_metadata_entry_hr(entry, idLen, longestName, output);
144  }
145 }
146 
147 // Print the JSON form of the metadata for the current entry.
148 void
150  ostream& output)
151 {
152  output << cet::canonical_string(ent.name) << ": ";
153 
154  output << entryValue(ent.value);
155 }
156 
157 void
159  vector<FileCatalogMetadataEntry> const& entries,
160  ostream& output,
161  ostream& /*errors*/)
162 {
163  std::ostringstream buf; // Need seekp to work.
164  buf << "{\n";
165  for (auto const& entry : entries) {
166  buf << " "; // Indent.
168  buf << ",\n";
169  }
170  buf.seekp(-2, std::ios_base::cur);
171  buf << "\n }";
172  output << buf.str();
173 }
174 
175 // Read all the file catalog metadata entries stored in the table in 'file'.
176 // Write any error messages to errors.
177 // Return false on failure, and true on success.
178 bool
180  TFile& file,
181  vector<FileCatalogMetadataEntry>& all_metadata_entries,
182  ostream& errors)
183 {
185  // Open the DB
186  art::SQLite3Wrapper sqliteDB{&file, "RootFileDB"};
187  // Read the entries into memory.
188  sqlite3_stmt* stmt = 0;
189  sqlite3_prepare_v2(sqliteDB,
190  "SELECT rowid, Name, Value from FileCatalog_metadata;",
191  -1,
192  &stmt,
193  nullptr);
194  bool row_found = false;
195  int sqlite_status = SQLITE_OK;
196  while ((sqlite_status = sqlite3_step(stmt)) == SQLITE_ROW) {
197  row_found = true;
198  ent.SMDid = sqlite3_column_int(stmt, 0);
199  ent.name =
200  std::string{reinterpret_cast<char const*>(sqlite3_column_text(stmt, 1))};
201  ent.value =
202  std::string{reinterpret_cast<char const*>(sqlite3_column_text(stmt, 2))};
203  all_metadata_entries.push_back(ent);
204  }
205  if (sqlite_status != SQLITE_DONE) {
206  errors << "Unexpected status from table read: " << sqlite3_errmsg(sqliteDB)
207  << " (0x" << sqlite_status << ").\n";
208  }
209  int const finalize_status = sqlite3_finalize(stmt);
210  if (finalize_status != SQLITE_OK) {
211  errors << "Unexpected status from DB status cleanup: "
212  << sqlite3_errmsg(sqliteDB) << " (0x" << finalize_status << ").\n";
213  }
214  if (!row_found) {
215  errors
216  << "No file catalog Metadata rows found - table is missing or empty\n";
217  return false;
218  }
219  return true;
220 }
221 
222 // Extract the file catalog metadata from the given TFile.
223 // The metadata entries are written to the stream output, and
224 // error messages are written to the stream errors.
225 //
226 // Returns 0 to indicate success, and 1 on failure.
227 // Precondition: file.IsZombie() == false
228 
229 // Caution: We pass 'file' by non-const reference because the TFile interface
230 // does not declare the functions we use to be const, even though they do not
231 // modify the underlying file.
232 int
234  ostream& output,
235  ostream& errors,
236  bool want_json)
237 {
238  vector<FileCatalogMetadataEntry> all_metadata_entries;
239  if (!read_all_fc_metadata_entries(file, all_metadata_entries, errors)) {
240  errors << "Unable to to read metadata entries.\n";
241  return 1;
242  }
243  // Iterate through all the entries, printing each one.
244  if (want_json) {
245  std::string const& path = file.GetName();
246  std::string const& baseName = path.substr(path.find_last_of("/") + 1u);
247  output << cet::canonical_string(baseName) << ": ";
248  print_all_fc_metadata_entries_JSON(all_metadata_entries, output, errors);
249  } else { // Human-readable.
250  output << "\nFile catalog metadata from file " << file.GetName() << ":\n\n";
251  print_all_fc_metadata_entries_hr(all_metadata_entries, output, errors);
252  output << "-------------------------------\n";
253  }
254  return 0;
255 }
256 
257 // Extract all the requested metadata tables (for from the named files.
258 // The contents of the tables are written to the stream output, and
259 // error messages are written to the stream errors.
260 //
261 // The return value is the number of files in which errors were
262 // encountered, and is thus 0 to indicate success.
263 int
265  ostream& output,
266  ostream& errors,
267  bool const want_json)
268 {
269  int rc{0};
270  bool first{true};
271  bool printed_opening{false};
272  for (auto const& fn : file_names) {
273  std::unique_ptr<TFile> current_file(TFile::Open(fn.c_str(), "READ"));
274  if (!current_file || current_file->IsZombie()) {
275  ++rc;
276  errors << "Unable to open file '" << fn << "' for reading."
277  << "\nSkipping file.\n";
278  continue;
279  }
280 
281  auto* key_ptr = current_file->GetKey("RootFileDB");
282  if (key_ptr == nullptr) {
283  ++rc;
284  errors << "\nRequested DB, \"RootFileDB\" of type, \"tkeyvfs\", not "
285  "present in file: \""
286  << fn << "\"\n"
287  << "Either this is not an art/ROOT file, it is a corrupt art/ROOT "
288  "file,\n"
289  << "or it is an art/ROOT file produced with a version older than "
290  "v1_00_12.\n";
291  continue;
292  }
293 
294  if (first) {
295  first = false;
296  if (want_json) {
297  output << "{\n ";
298  printed_opening = true;
299  }
300  } else if (want_json) {
301  output << ",\n ";
302  }
303  rc += print_fc_metadata_from_file(*current_file, output, errors, want_json);
304  }
305  if (printed_opening) {
306  output << "\n}\n";
307  }
308  return rc;
309 }
310 
311 void
312 RootErrorHandler(int level, bool die, char const* location, char const* message)
313 {
314  // Ignore dictionary errors.
315  if (level == kWarning && (!die) && strcmp(location, "TClass::TClass") == 0 &&
316  std::string(message).find("no dictionary") != std::string::npos) {
317  return;
318  } else {
319  // Default behavior
320  DefaultErrorHandler(level, die, location, message);
321  }
322 }
323 
324 int
325 main(int argc, char* argv[])
326 {
327  // ------------------
328  // use the boost command line option processing library to help out
329  // with command line options
330  std::ostringstream descstr;
331  descstr << argv[0] << " <options> [<source-file>]+";
332  bpo::options_description desc(descstr.str());
333  desc.add_options()("help,h", "produce help message")(
334  "hr,H", "produce human-readable output (default is JSON)")(
335  "human-readable", "produce human-readable output (default is JSON)")(
336  "source,s", bpo::value<stringvec>(), "source data file (multiple OK)");
337  bpo::options_description all_opts("All Options");
338  all_opts.add(desc);
339  // Each non-option argument is interpreted as the name of a files to
340  // be processed. Any number of filenames is allowed.
341  bpo::positional_options_description pd;
342  pd.add("source", -1);
343  // The variables_map contains the actual program options.
344  bpo::variables_map vm;
345  try {
346  bpo::store(bpo::command_line_parser(argc, argv)
347  .options(all_opts)
348  .positional(pd)
349  .run(),
350  vm);
351  bpo::notify(vm);
352  }
353  catch (bpo::error const& e) {
354  std::cerr << "Exception from command line processing in " << argv[0] << ": "
355  << e.what() << "\n";
356  return 2;
357  }
358  if (vm.count("help")) {
359  std::cout << desc << std::endl;
360  return 1;
361  }
362  bool const want_json =
363  (!vm.count("hr")) && (!vm.count("human-readable")); // Default is JSON.
364 
365  // Get the names of the files we will process.
366  stringvec file_names;
367  size_t const file_count = vm.count("source");
368  if (file_count < 1) {
369  cerr << "One or more input files must be specified;"
370  << " supply filenames as program arguments\n"
371  << "For usage and options list, please do 'sam_metadata_dumper "
372  "--help'.\n";
373  return 3;
374  }
375  file_names.reserve(file_count);
376  cet::copy_all(vm["source"].as<stringvec>(), std::back_inserter(file_names));
377 
378  // Set the ROOT error handler.
379  SetErrorHandler(RootErrorHandler);
380 
381  // Register the tkey VFS with sqlite:
382  tkeyvfs_init();
383 
384  // Do the work.
385  return print_fc_metadata_from_files(file_names, cout, cerr, want_json);
386 }
bool read_all_fc_metadata_entries(TFile &file, vector< FileCatalogMetadataEntry > &all_metadata_entries, ostream &errors)
std::string entryValue(std::string const &value)
void RootErrorHandler(int level, bool die, char const *location, char const *message)
std::vector< std::string > stringvec
auto vector(Vector const &v)
Returns a manipulator which will print the specified array.
Definition: DumpUtils.h:265
void print_one_fc_metadata_entry_hr(FileCatalogMetadataEntry const &ent, size_t idLen, size_t longestName, ostream &output)
int tkeyvfs_init(void)
Definition: tkeyvfs.cc:1768
void print_one_fc_metadata_entry_JSON(FileCatalogMetadataEntry const &ent, ostream &output)
int print_fc_metadata_from_file(TFile &file, ostream &output, ostream &errors, bool want_json)
void print_all_fc_metadata_entries_JSON(vector< FileCatalogMetadataEntry > const &entries, ostream &output, ostream &)
Int_t min
Definition: plot.C:26
TFile * file
vector< string > stringvec
std::map< fhicl::ParameterSetID, ParameterSetBlob > ParameterSetMap
int print_fc_metadata_from_files(stringvec const &file_names, ostream &output, ostream &errors, bool const want_json)
Float_t e
Definition: plot.C:34
void print_all_fc_metadata_entries_hr(vector< FileCatalogMetadataEntry > const &entries, ostream &output, ostream &)
int main(int argc, char *argv[])