LArSoft  v09_90_00
Liquid Argon Software toolkit - https://larsoft.org/
FileCatalogOptionsHandler.cc
Go to the documentation of this file.
2 
7 #include "cetlib/split.h"
10 
11 #include <iostream>
12 #include <iterator>
13 #include <string>
14 #include <vector>
15 
19 using string_pair_t = std::pair<std::string const, std::string>;
20 using std::string;
21 using std::vector;
22 
23 namespace {
24 
26  split_to_pair(string const& to_split)
27  {
28  vector<string> tmp;
29  tmp.reserve(2);
30  cet::split(to_split, ':', std::back_inserter(tmp));
31  switch (tmp.size()) {
32  case 0:
33  return string_pair_t();
34  case 1:
35  return string_pair_t(string("_default"), std::move(tmp[0]));
36  case 2:
37  return string_pair_t(std::move(tmp[0]), std::move(tmp[1]));
38  default:
40  << "Expected \"key:value\", got multiple \":\".\n";
41  }
42  }
43 
44  void
45  check_metadata_options(bpo::variables_map const& vm)
46  {
47  auto check_for_conflicting_options =
48  [&vm](string const& firstOpt, std::initializer_list<string> opts) {
49  for (auto const& opt : opts) {
50  if (vm.count(opt) + vm.count(firstOpt) > 1)
52  << "The options '--" << opt << "' and '--" << firstOpt
53  << "' are mutually exclusive.";
54  }
55  };
56 
57  check_for_conflicting_options(
58  "sam-file-type", {"sam-inherit-metadata", "sam-inherit-file-type"});
59  check_for_conflicting_options(
60  "sam-run-type", {"sam-inherit-metadata", "sam-inherit-run-type"});
61  }
62 
63  void
64  fill_tiers_streams(bpo::variables_map const& vm,
65  fhicl::intermediate_table& raw_config)
66  {
67  // Precondition: at least one output module defined in the
68  // configuration.
69  auto const& table = raw_config.get<table_t const&>("outputs");
70  string const outputs_stem{"outputs"};
71  string const tier_spec_stem{"dataTier"};
72  string const stream_name_stem{"streamName"};
73  vector<string> data_tiers((vm.count("sam-data-tier") > 0) ?
74  vm["sam-data-tier"].as<vector<string>>() :
75  vector<string>());
76  vector<string> stream_names((vm.count("sam-stream-name") > 0) ?
77  vm["sam-stream-name"].as<vector<string>>() :
78  vector<string>());
79  std::map<string, string> sep_tiers, sep_streams;
80  for (auto const& tier : data_tiers) {
81  sep_tiers.insert(split_to_pair(tier));
82  }
83  for (auto const& stream : stream_names) {
84  sep_streams.insert(split_to_pair(stream));
85  }
86  auto const def_tier_it(sep_tiers.find("_default"));
87  auto const def_tier((def_tier_it != sep_tiers.end()) ? def_tier_it->second :
88  "");
89  auto const def_stream_it(sep_streams.find("_default"));
90  auto const def_stream(
91  (def_stream_it != sep_streams.end()) ? def_stream_it->second : "");
92  for (auto const& output : table) {
94  raw_config, fhicl_key(outputs_stem, output.first, "module_type"))) {
95  continue; // Not a module parameter set.
96  }
97  auto const& tier_spec_key =
98  fhicl_key(outputs_stem, output.first, tier_spec_stem);
99  auto const& stream_name_key =
100  fhicl_key(outputs_stem, output.first, stream_name_stem);
101  auto tiers_it(sep_tiers.find(output.first));
102  string tier;
103  if (tiers_it != sep_tiers.end()) {
104  tier = tiers_it->second;
105  } else if (!exists_outside_prolog(raw_config, tier_spec_key)) {
106  tier = def_tier;
107  }
108  if (!tier.empty()) {
109  raw_config.put(tier_spec_key, tier);
110  }
111  auto streams_it(sep_streams.find(output.first));
112  string stream;
113  if (streams_it != sep_streams.end()) {
114  stream = streams_it->second;
115  } else if (!exists_outside_prolog(raw_config, stream_name_key)) {
116  stream = (!def_stream.empty()) ? def_stream : output.first;
117  }
118  if (!stream.empty()) {
119  raw_config.put(stream_name_key, stream);
120  }
121  if (!(exists_outside_prolog(raw_config, tier_spec_key) &&
122  exists_outside_prolog(raw_config, stream_name_key))) {
124  << "Output \"" << output.first << "\" must be configured with "
125  << tier_spec_stem << " (--sam-data-tier=" << output.first
126  << ":<tier>) and " << stream_name_stem
127  << " (--sam-stream-name=" << output.first << ":<stream>).\n";
128  }
129  }
130  }
131 
132  bool
133  have_outputs(fhicl::intermediate_table& table)
134  {
135  bool result{false};
136  if (exists_outside_prolog(table, "outputs")) {
137  auto const& ev = table.find("outputs");
138  if (ev.is_a(fhicl::TABLE) &&
139  !table.get<fhicl::extended_value::table_t const&>("outputs")
140  .empty()) {
141  result = true;
142  }
143  }
144  return result;
145  }
146 
147  void
148  maybeThrowOnMissingMetadata(fhicl::intermediate_table const& table)
149  {
150  string const key_stem{"services.FileCatalogMetadata."};
151  vector<string> missingItems;
152  if (!exists_outside_prolog(table, key_stem + "applicationFamily")) {
153  missingItems.emplace_back(key_stem +
154  "applicationFamily (--sam-application-family)");
155  }
156  if (!exists_outside_prolog(table, key_stem + "applicationVersion")) {
157  missingItems.emplace_back(
158  key_stem + "applicationVersion (--sam-application-version)");
159  }
160  if (!exists_outside_prolog(table, key_stem + "group")) {
161  missingItems.emplace_back(key_stem + "group (--sam-group)");
162  }
163  if (!missingItems.empty()) {
165  e << "SAM metadata information is required -- missing metadata:\n";
166  for (auto const& s : missingItems) {
167  e << s << '\n';
168  }
169  }
170  }
171 
172 } // namespace
173 
175  bpo::options_description& desc)
176 {
177  bpo::options_description sam_options{"SAM options"};
178  // clang-format off
179  sam_options.add_options()
180  ("sam-web-uri", bpo::value<string>(), "URI for SAM web service.")
181  ("sam-process-id", bpo::value<string>(), "SAM process ID.")
182  ("sam-application-family",
183  bpo::value<string>(&appFamily_), "SAM application family.")
184  ("sam-app-family",
185  bpo::value<string>(&appFamily_), "SAM application family.")
186  ("sam-application-version",
187  bpo::value<string>(&appVersion_), "SAM application version.")
188  ("sam-app-version",
189  bpo::value<string>(&appVersion_), "SAM application version.")
190  ("sam-group", bpo::value<string>(), "SAM group.")
191  ("sam-file-type", bpo::value<string>(), "File type for SAM metadata.")
192  ("sam-data-tier",
193  bpo::value<vector<string>>(),
194  "SAM data tier (<spec-label>:<tier-spec>).")
195  ("sam-run-type", bpo::value<string>(), "Global run-type for SAM metadata.")
196  ("sam-stream-name",
197  bpo::value<vector<string>>(),
198  "SAM stream name (<module-label>:<stream-name>).")
199  ("sam-inherit-metadata", "Input file provides the file type and run type.")
200  ("sam-inherit-file-type", "Input file provides the file type.")
201  ("sam-inherit-run-type", "Input file provides the run type.");
202  // clang-format on
203  desc.add(sam_options);
204 }
205 
206 int
208 {
209  // Checks can't be done until after post-processing.
210  return 0;
211 }
212 
213 int
215  bpo::variables_map const& vm,
216  fhicl::intermediate_table& raw_config)
217 {
218  std::string const services{"services"};
219  auto const& ciLocation = fhicl_key(services, "CatalogInterface");
220  auto const& ftLocation = fhicl_key(services, "FileTransfer");
221  auto const& fcmdLocation = fhicl_key(services, "FileCatalogMetadata");
222 
224  // Load up the configuration with command-line options.
225  //
226  // sam-web-uri and sam-process-id.
227  if (vm.count("sam-web-uri") > 0) {
228  raw_config.put(fhicl_key(ciLocation, "webURI"),
229  vm["sam-web-uri"].as<string>());
230  }
231  if (vm.count("sam-process-id") > 0) {
232  // Sequence.
233  raw_config.put("source.fileNames",
234  vector<string>{vm["sam-process-id"].as<string>()});
235  // Atom.
236  raw_config.put(fhicl_key(fcmdLocation, "processID"),
237  vm["sam-process-id"].as<string>());
238  }
239  if (exists_outside_prolog(raw_config, fhicl_key(ciLocation, "webURI")) !=
241  raw_config, fhicl_key(fcmdLocation, "processID"))) { // Inconsistent.
243  << "configurations " << fhicl_key(ciLocation, "webURI")
244  << " (--sam-web-uri) and\n"
245  << fhicl_key(fcmdLocation, "processID")
246  << " (--sam-process-id) must be specified\n"
247  << "together or not at all.\n";
248  }
249  bool const wantSAMweb{
250  exists_outside_prolog(raw_config, fhicl_key(ciLocation, "webURI")) &&
251  exists_outside_prolog(raw_config, "source.fileNames")};
252  // Other metadata items.
253  if (!appFamily_.empty()) {
254  raw_config.put(fhicl_key(fcmdLocation, "applicationFamily"), appFamily_);
255  }
256  if (vm.count("sam-group") > 0) {
257  raw_config.put(fhicl_key(fcmdLocation, "group"),
258  vm["sam-group"].as<string>());
259  }
260  if (!appVersion_.empty()) {
261  raw_config.put(fhicl_key(fcmdLocation, "applicationVersion"), appVersion_);
262  }
263 
264  check_metadata_options(vm);
265 
266  string const mdFromInput{"metadataFromInput"};
267  bool specifyDataTier{false}; // The output module needs a
268  // 'dataTier' if "fileType" is
269  // provided either by the input file
270  // or as a configuration parameter.
271  if (vm.count("sam-inherit-metadata") > 0) {
272  raw_config.put(fhicl_key(fcmdLocation, mdFromInput),
273  vector<string>{"fileType", "runType"});
274  specifyDataTier = true;
275  raw_config.erase(fhicl_key(fcmdLocation, "fileType"));
276  raw_config.erase(fhicl_key(fcmdLocation, "runType"));
277  } else {
278  vector<string> md;
279  if (vm.count("sam-inherit-file-type") > 0) {
280  md.emplace_back("file_type");
281  specifyDataTier = true;
282  raw_config.erase(fhicl_key(fcmdLocation, "fileType"));
283  }
284  if (vm.count("sam-inherit-run-type") > 0) {
285  // 'run_type' is not supported by SAM as a top-level field; we
286  // thus preface it with 'art.'
287  md.emplace_back("art.run_type");
288  raw_config.erase(fhicl_key(fcmdLocation, "runType"));
289  }
290  if (!md.empty()) {
291  raw_config.put(fhicl_key(fcmdLocation, mdFromInput), md);
292  }
293  }
294 
295  if (vm.count("sam-run-type") > 0) {
296  raw_config.put(fhicl_key(fcmdLocation, "runType"),
297  vm["sam-run-type"].as<string>());
298  }
299  if (vm.count("sam-file-type") > 0) {
300  raw_config.put(fhicl_key(fcmdLocation, "fileType"),
301  vm["sam-file-type"].as<string>());
302  }
303  bool const requireMetadata =
304  have_outputs(raw_config) &&
305  (wantSAMweb ||
306  exists_outside_prolog(raw_config,
307  fhicl_key(fcmdLocation, "applicationFamily")) ||
308  exists_outside_prolog(raw_config,
309  fhicl_key(fcmdLocation, "applicationVersion")) ||
310  exists_outside_prolog(raw_config, fhicl_key(fcmdLocation, "group")) ||
311  exists_outside_prolog(raw_config, fhicl_key(fcmdLocation, "fileType")) ||
312  specifyDataTier);
313 
314  if (requireMetadata) {
315  fill_tiers_streams(vm, raw_config);
316  maybeThrowOnMissingMetadata(raw_config);
317  }
318 
319  string process_name;
320  if (exists_outside_prolog(raw_config, "process_name")) {
321  process_name = raw_config.get<string>("process_name");
322  }
323  if (requireMetadata && process_name.empty()) {
325  << "Non-empty / default process_name required for SAM metadata.\n";
326  }
327  if (wantSAMweb) {
328  raw_config.put(fhicl_key(ciLocation, "service_provider"),
329  "IFCatalogInterface");
330  raw_config.put(fhicl_key(ftLocation, "service_provider"), "IFFileTransfer");
331  art::ensureTable(raw_config, fhicl_key(services, "IFDH"));
332  }
333  return 0;
334 }
FileCatalogOptionsHandler(bpo::options_description &desc)
bool exists_outside_prolog(fhicl::intermediate_table const &config, std::string const &key)
int doProcessOptions(bpo::variables_map const &vm, fhicl::intermediate_table &raw_config) override
int doCheckOptions(bpo::variables_map const &vm) override
Float_t tmp
Definition: plot.C:35
void ensureTable(fhicl::intermediate_table &table, std::string const &fhicl_spec)
Definition: ensureTable.cc:6
auto vector(Vector const &v)
Returns a manipulator which will print the specified array.
Definition: DumpUtils.h:289
std::enable_if_t< std::is_convertible_v< T, std::string >, std::string > fhicl_key(T const &name)
Definition: fhicl_key.h:12
shims::map< std::string, extended_value > table_t
bool put(std::string const &name, std::string const &value, bool in_prolog=false)
cet::coded_exception< errors::ErrorCodes, ExceptionDetail::translate > Exception
Definition: Exception.h:66
void erase(std::string const &key, bool in_prolog=false)
extended_value const & find(std::string const &key) const
T get(std::string const &name)
Float_t e
Definition: plot.C:35
decltype(auto) constexpr empty(T &&obj)
ADL-aware version of std::empty.
Definition: StdUtils.h:109
std::pair< std::string const, std::string > string_pair_t