LArSoft  v09_90_00
Liquid Argon Software toolkit - https://larsoft.org/
c2numpy.h
Go to the documentation of this file.
1 // Copyright 2016 Jim Pivarski
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef C2NUMPY
16 #define C2NUMPY
17 
18 #include <inttypes.h>
19 #include <stdarg.h>
20 #include <string.h>
21 
22 #include <sstream>
23 #include <string>
24 #include <vector>
25 
26 const char* C2NUMPY_VERSION = "1.2";
27 
28 // http://docs.scipy.org/doc/numpy/user/basics.types.html
29 typedef enum {
30  C2NUMPY_BOOL, // Boolean (True or False) stored as a byte
31  C2NUMPY_INT, // Default integer type (same as C long; normally either int64 or int32)
32  C2NUMPY_INTC, // Identical to C int (normally int32 or int64)
33  C2NUMPY_INTP, // Integer used for indexing (same as C ssize_t; normally either int32 or int64)
34  C2NUMPY_INT8, // Byte (-128 to 127)
35  C2NUMPY_INT16, // Integer (-32768 to 32767)
36  C2NUMPY_INT32, // Integer (-2147483648 to 2147483647)
37  C2NUMPY_INT64, // Integer (-9223372036854775808 to 9223372036854775807)
38  C2NUMPY_UINT8, // Unsigned integer (0 to 255)
39  C2NUMPY_UINT16, // Unsigned integer (0 to 65535)
40  C2NUMPY_UINT32, // Unsigned integer (0 to 4294967295)
41  C2NUMPY_UINT64, // Unsigned integer (0 to 18446744073709551615)
42  C2NUMPY_FLOAT, // Shorthand for float64.
43  C2NUMPY_FLOAT16, // Half precision float: sign bit, 5 bits exponent, 10 bits mantissa
44  C2NUMPY_FLOAT32, // Single precision float: sign bit, 8 bits exponent, 23 bits mantissa
45  C2NUMPY_FLOAT64, // Double precision float: sign bit, 11 bits exponent, 52 bits mantissa
46  C2NUMPY_COMPLEX, // Shorthand for complex128.
47  C2NUMPY_COMPLEX64, // Complex number, represented by two 32-bit floats (real and imaginary components)
48  C2NUMPY_COMPLEX128, // Complex number, represented by two 64-bit floats (real and imaginary components)
49 
50  C2NUMPY_STRING = 100, // strings are C2NUMPY_STRING + their fixed size (up to 155)
51  C2NUMPY_END = 255 // ensure that c2numpy_type is at least a byte
52 } c2numpy_type;
53 
54 // a Numpy writer object
55 typedef struct {
56  FILE* file; // output file handle
57  std::string outputFilePrefix; // output file name, not including the rotating number and .npy
58  int64_t sizeSeekPosition; // (internal) keep track of number of rows to modify before closing
59  int64_t sizeSeekSize; // (internal)
60 
61  int32_t numColumns; // number of columns in the record array
62  std::vector<std::string> columnNames; // column names
63  std::vector<c2numpy_type> columnTypes; // column types
64 
65  int32_t numRowsPerFile; // maximum number of rows per file
66  int32_t currentColumn; // current column number
67  int32_t currentRowInFile; // current row number in the current file
68  int32_t currentFileNumber; // current file number
70 
72 {
73  // FIXME: all of the "<" signs should be system-dependent (they mean little endian)
74  static const char* c2numpy_bool = "|b1";
75  static const char* c2numpy_int = "<i8";
76  static const char* c2numpy_intc = "<i4"; // FIXME: should be system-dependent
77  static const char* c2numpy_intp = "<i8"; // FIXME: should be system-dependent
78  static const char* c2numpy_int8 = "|i1";
79  static const char* c2numpy_int16 = "<i2";
80  static const char* c2numpy_int32 = "<i4";
81  static const char* c2numpy_int64 = "<i8";
82  static const char* c2numpy_uint8 = "|u1";
83  static const char* c2numpy_uint16 = "<u2";
84  static const char* c2numpy_uint32 = "<u4";
85  static const char* c2numpy_uint64 = "<u8";
86  static const char* c2numpy_float = "<f8";
87  static const char* c2numpy_float16 = "<f2";
88  static const char* c2numpy_float32 = "<f4";
89  static const char* c2numpy_float64 = "<f8";
90  static const char* c2numpy_complex = "<c16";
91  static const char* c2numpy_complex64 = "<c8";
92  static const char* c2numpy_complex128 = "<c16";
93 
94  static const char* c2numpy_str[155] = {
95  "|S0", "|S1", "|S2", "|S3", "|S4", "|S5", "|S6", "|S7", "|S8", "|S9",
96  "|S10", "|S11", "|S12", "|S13", "|S14", "|S15", "|S16", "|S17", "|S18", "|S19",
97  "|S20", "|S21", "|S22", "|S23", "|S24", "|S25", "|S26", "|S27", "|S28", "|S29",
98  "|S30", "|S31", "|S32", "|S33", "|S34", "|S35", "|S36", "|S37", "|S38", "|S39",
99  "|S40", "|S41", "|S42", "|S43", "|S44", "|S45", "|S46", "|S47", "|S48", "|S49",
100  "|S50", "|S51", "|S52", "|S53", "|S54", "|S55", "|S56", "|S57", "|S58", "|S59",
101  "|S60", "|S61", "|S62", "|S63", "|S64", "|S65", "|S66", "|S67", "|S68", "|S69",
102  "|S70", "|S71", "|S72", "|S73", "|S74", "|S75", "|S76", "|S77", "|S78", "|S79",
103  "|S80", "|S81", "|S82", "|S83", "|S84", "|S85", "|S86", "|S87", "|S88", "|S89",
104  "|S90", "|S91", "|S92", "|S93", "|S94", "|S95", "|S96", "|S97", "|S98", "|S99",
105  "|S100", "|S101", "|S102", "|S103", "|S104", "|S105", "|S106", "|S107", "|S108", "|S109",
106  "|S110", "|S111", "|S112", "|S113", "|S114", "|S115", "|S116", "|S117", "|S118", "|S119",
107  "|S120", "|S121", "|S122", "|S123", "|S124", "|S125", "|S126", "|S127", "|S128", "|S129",
108  "|S130", "|S131", "|S132", "|S133", "|S134", "|S135", "|S136", "|S137", "|S138", "|S139",
109  "|S140", "|S141", "|S142", "|S143", "|S144", "|S145", "|S146", "|S147", "|S148", "|S149",
110  "|S150", "|S151", "|S152", "|S153", "|S154"};
111 
112  switch (type) {
113  case C2NUMPY_BOOL: return c2numpy_bool;
114  case C2NUMPY_INT: return c2numpy_int;
115  case C2NUMPY_INTC: return c2numpy_intc;
116  case C2NUMPY_INTP: return c2numpy_intp;
117  case C2NUMPY_INT8: return c2numpy_int8;
118  case C2NUMPY_INT16: return c2numpy_int16;
119  case C2NUMPY_INT32: return c2numpy_int32;
120  case C2NUMPY_INT64: return c2numpy_int64;
121  case C2NUMPY_UINT8: return c2numpy_uint8;
122  case C2NUMPY_UINT16: return c2numpy_uint16;
123  case C2NUMPY_UINT32: return c2numpy_uint32;
124  case C2NUMPY_UINT64: return c2numpy_uint64;
125  case C2NUMPY_FLOAT: return c2numpy_float;
126  case C2NUMPY_FLOAT16: return c2numpy_float16;
127  case C2NUMPY_FLOAT32: return c2numpy_float32;
128  case C2NUMPY_FLOAT64: return c2numpy_float64;
129  case C2NUMPY_COMPLEX: return c2numpy_complex;
130  case C2NUMPY_COMPLEX64: return c2numpy_complex64;
131  case C2NUMPY_COMPLEX128: return c2numpy_complex128;
132  default:
133  if (0 < type - C2NUMPY_STRING && type - C2NUMPY_STRING < 155)
134  return c2numpy_str[type - C2NUMPY_STRING];
135  }
136 
137  return NULL;
138 }
139 
140 int c2numpy_init(c2numpy_writer* writer, const std::string outputFilePrefix, int32_t numRowsPerFile)
141 {
142  writer->file = NULL;
143  writer->outputFilePrefix = outputFilePrefix;
144  writer->sizeSeekPosition = 0;
145  writer->sizeSeekSize = 0;
146 
147  writer->numColumns = 0;
148 
149  writer->numRowsPerFile = numRowsPerFile;
150  writer->currentColumn = 0;
151  writer->currentRowInFile = 0;
152  writer->currentFileNumber = 0;
153 
154  return 0;
155 }
156 
157 int c2numpy_addcolumn(c2numpy_writer* writer, const std::string name, c2numpy_type type)
158 {
159  writer->numColumns += 1;
160  writer->columnNames.push_back(name);
161  writer->columnTypes.push_back(type);
162  return 0;
163 }
164 
166 {
167  std::stringstream fileNameStream;
168  fileNameStream << writer->outputFilePrefix;
169  fileNameStream << writer->currentFileNumber;
170  fileNameStream << ".npy";
171  std::string fileName = fileNameStream.str();
172  writer->file = fopen(fileName.c_str(), "wb");
173 
174  std::stringstream headerStream;
175  headerStream << "{'descr': [";
176 
177  int column;
178  for (column = 0; column < writer->numColumns; ++column) {
179  headerStream << "('" << writer->columnNames[column] << "', '"
180  << c2numpy_descr(writer->columnTypes[column]) << "')";
181  if (column < writer->numColumns - 1) headerStream << ", ";
182  }
183 
184  headerStream << "], 'fortran_order': False, 'shape': (";
185 
186  writer->sizeSeekPosition = headerStream.str().size();
187 
188  headerStream << writer->numRowsPerFile;
189 
190  writer->sizeSeekSize = headerStream.str().size() - writer->sizeSeekPosition;
191 
192  headerStream << ",), }";
193 
194  int headerSize = headerStream.str().size();
195  char version = 1;
196 
197  if (headerSize > 65535) version = 2;
198  while ((6 + 2 + (version == 1 ? 2 : 4) + headerSize) % 16 != 0) {
199  headerSize += 1;
200  headerStream << " ";
201  if (headerSize > 65535) version = 2;
202  }
203 
204  fwrite("\x93NUMPY", 1, 6, writer->file);
205  if (version == 1) {
206  fwrite("\x01\x00", 1, 2, writer->file);
207  fwrite(&headerSize, 1, 2, writer->file);
208  writer->sizeSeekPosition += 6 + 2 + 2;
209  }
210  else {
211  fwrite("\x02\x00", 1, 2, writer->file);
212  fwrite(&headerSize, 1, 4, writer->file);
213  writer->sizeSeekPosition += 6 + 2 + 4;
214  }
215 
216  std::string header = headerStream.str();
217  fwrite(header.c_str(), 1, header.size(), writer->file);
218 
219  return 0;
220 }
221 
222 #define C2NUMPY_CHECK_ITEM \
223  { \
224  if (writer->file == NULL) { \
225  int status = c2numpy_open(writer); \
226  if (status != 0) return status; \
227  } \
228  }
229 
230 #define C2NUMPY_INCREMENT_ITEM \
231  { \
232  if (writer->currentColumn == 0) { \
233  writer->currentRowInFile += 1; \
234  if (writer->currentRowInFile == writer->numRowsPerFile) { \
235  fclose(writer->file); \
236  writer->file = NULL; \
237  writer->currentRowInFile = 0; \
238  writer->currentFileNumber += 1; \
239  } \
240  } \
241  return 0; \
242  }
243 
244 int c2numpy_bool(c2numpy_writer* writer, int8_t data)
245 { // "bool" is just a byte
247  if (writer->columnTypes[writer->currentColumn] != C2NUMPY_BOOL) return -1;
248  fwrite(&data, sizeof(int8_t), 1, writer->file);
249  writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
251 }
252 
253 int c2numpy_int(c2numpy_writer* writer, int64_t data)
254 { // Numpy's default int is 64-bit
256  if (writer->columnTypes[writer->currentColumn] != C2NUMPY_INT) return -1;
257  fwrite(&data, sizeof(int64_t), 1, writer->file);
258  writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
260 }
261 
262 int c2numpy_intc(c2numpy_writer* writer, int data)
263 { // the built-in C int
265  if (writer->columnTypes[writer->currentColumn] != C2NUMPY_INTC) return -1;
266  fwrite(&data, sizeof(int), 1, writer->file);
267  writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
269 }
270 
271 int c2numpy_intp(c2numpy_writer* writer, size_t data)
272 { // intp is Numpy's way of saying size_t
274  if (writer->columnTypes[writer->currentColumn] != C2NUMPY_INTP) return -1;
275  fwrite(&data, sizeof(size_t), 1, writer->file);
276  writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
278 }
279 
280 int c2numpy_int8(c2numpy_writer* writer, int8_t data)
281 {
283  if (writer->columnTypes[writer->currentColumn] != C2NUMPY_INT8) return -1;
284  fwrite(&data, sizeof(int8_t), 1, writer->file);
285  writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
287 }
288 
289 int c2numpy_int16(c2numpy_writer* writer, int16_t data)
290 {
292  if (writer->columnTypes[writer->currentColumn] != C2NUMPY_INT16) return -1;
293  fwrite(&data, sizeof(int16_t), 1, writer->file);
294  writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
296 }
297 
298 int c2numpy_int32(c2numpy_writer* writer, int32_t data)
299 {
301  if (writer->columnTypes[writer->currentColumn] != C2NUMPY_INT32) return -1;
302  fwrite(&data, sizeof(int32_t), 1, writer->file);
303  writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
305 }
306 
307 int c2numpy_int64(c2numpy_writer* writer, int64_t data)
308 {
310  if (writer->columnTypes[writer->currentColumn] != C2NUMPY_INT64) return -1;
311  fwrite(&data, sizeof(int64_t), 1, writer->file);
312  writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
314 }
315 
316 int c2numpy_uint8(c2numpy_writer* writer, uint8_t data)
317 {
319  if (writer->columnTypes[writer->currentColumn] != C2NUMPY_UINT8) return -1;
320  fwrite(&data, sizeof(uint8_t), 1, writer->file);
321  writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
323 }
324 
325 int c2numpy_uint16(c2numpy_writer* writer, uint16_t data)
326 {
328  if (writer->columnTypes[writer->currentColumn] != C2NUMPY_UINT16) return -1;
329  fwrite(&data, sizeof(uint16_t), 1, writer->file);
330  writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
332 }
333 
334 int c2numpy_uint32(c2numpy_writer* writer, uint32_t data)
335 {
337  if (writer->columnTypes[writer->currentColumn] != C2NUMPY_UINT32) return -1;
338  fwrite(&data, sizeof(uint32_t), 1, writer->file);
339  writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
341 }
342 
343 int c2numpy_uint64(c2numpy_writer* writer, uint64_t data)
344 {
346  if (writer->columnTypes[writer->currentColumn] != C2NUMPY_UINT64) return -1;
347  fwrite(&data, sizeof(uint64_t), 1, writer->file);
348  writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
350 }
351 
352 int c2numpy_float(c2numpy_writer* writer, double data)
353 { // Numpy's "float" is a double
355  if (writer->columnTypes[writer->currentColumn] != C2NUMPY_FLOAT) return -1;
356  fwrite(&data, sizeof(double), 1, writer->file);
357  writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
359 }
360 
361 // int c2numpy_float16(c2numpy_writer *writer, ??? data) { // how to do float16 in C?
362 // C2NUMPY_CHECK_ITEM
363 // if (writer->columnTypes[writer->currentColumn] != C2NUMPY_FLOAT16) return -1;
364 // fwrite(&data, sizeof(???), 1, writer->file);
365 // writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
366 // C2NUMPY_INCREMENT_ITEM
367 // }
368 
369 int c2numpy_float32(c2numpy_writer* writer, float data)
370 {
372  if (writer->columnTypes[writer->currentColumn] != C2NUMPY_FLOAT32) return -1;
373  fwrite(&data, sizeof(float), 1, writer->file);
374  writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
376 }
377 
378 int c2numpy_float64(c2numpy_writer* writer, double data)
379 {
381  if (writer->columnTypes[writer->currentColumn] != C2NUMPY_FLOAT64) return -1;
382  fwrite(&data, sizeof(double), 1, writer->file);
383  writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
385 }
386 
387 // int c2numpy_complex(c2numpy_writer *writer, ??? data) { // how to do complex in C?
388 // C2NUMPY_CHECK_ITEM
389 // if (writer->columnTypes[writer->currentColumn] != C2NUMPY_COMPLEX) return -1;
390 // fwrite(&data, sizeof(???), 1, writer->file);
391 // writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
392 // C2NUMPY_INCREMENT_ITEM
393 // }
394 
395 // int c2numpy_complex64(c2numpy_writer *writer, ??? data) {
396 // C2NUMPY_CHECK_ITEM
397 // if (writer->columnTypes[writer->currentColumn] != C2NUMPY_COMPLEX64) return -1;
398 // fwrite(&data, sizeof(???), 1, writer->file);
399 // writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
400 // C2NUMPY_INCREMENT_ITEM
401 // }
402 
403 // int c2numpy_complex128(c2numpy_writer *writer, ??? data) {
404 // C2NUMPY_CHECK_ITEM
405 // if (writer->columnTypes[writer->currentColumn] != C2NUMPY_COMPLEX128) return -1;
406 // fwrite(&data, sizeof(???), 1, writer->file);
407 // writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
408 // C2NUMPY_INCREMENT_ITEM
409 // }
410 
411 int c2numpy_string(c2numpy_writer* writer, const char* data)
412 {
414 
415  int stringlength = writer->columnTypes[writer->currentColumn] - C2NUMPY_STRING;
416  if (0 < stringlength && stringlength < 155)
417  fwrite(data, 1, stringlength, writer->file);
418  else
419  return -1;
420  writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
421 
423 }
424 
426 {
427  if (writer->file != NULL) {
428  // we wrote fewer rows than we promised
429  if (writer->currentRowInFile < writer->numRowsPerFile) {
430  // so go back to the part of the header where that was written
431  fseek(writer->file, writer->sizeSeekPosition, SEEK_SET);
432  // overwrite it with spaces
433  int i;
434  for (i = 0; i < writer->sizeSeekSize; ++i)
435  fputc(' ', writer->file);
436  // now go back and write it again (it MUST be fewer or an equal number of digits)
437  fseek(writer->file, writer->sizeSeekPosition, SEEK_SET);
438  fprintf(writer->file, "%d", writer->currentRowInFile);
439  }
440  // now close it
441  fclose(writer->file);
442  }
443 
444  return 0;
445 }
446 
447 #endif // C2NUMPY
int c2numpy_init(c2numpy_writer *writer, const std::string outputFilePrefix, int32_t numRowsPerFile)
Definition: c2numpy.h:140
int c2numpy_int32(c2numpy_writer *writer, int32_t data)
Definition: c2numpy.h:298
#define C2NUMPY_INCREMENT_ITEM
Definition: c2numpy.h:230
c2numpy_type
Definition: c2numpy.h:29
int32_t currentFileNumber
Definition: c2numpy.h:68
int c2numpy_int8(c2numpy_writer *writer, int8_t data)
Definition: c2numpy.h:280
int c2numpy_bool(c2numpy_writer *writer, int8_t data)
Definition: c2numpy.h:244
int c2numpy_uint8(c2numpy_writer *writer, uint8_t data)
Definition: c2numpy.h:316
int c2numpy_open(c2numpy_writer *writer)
Definition: c2numpy.h:165
int32_t numRowsPerFile
Definition: c2numpy.h:65
int32_t currentColumn
Definition: c2numpy.h:66
int c2numpy_close(c2numpy_writer *writer)
Definition: c2numpy.h:425
fclose(fg1)
#define C2NUMPY_CHECK_ITEM
Definition: c2numpy.h:222
int c2numpy_addcolumn(c2numpy_writer *writer, const std::string name, c2numpy_type type)
Definition: c2numpy.h:157
int c2numpy_float(c2numpy_writer *writer, double data)
Definition: c2numpy.h:352
std::vector< c2numpy_type > columnTypes
Definition: c2numpy.h:63
int64_t sizeSeekSize
Definition: c2numpy.h:59
FILE * file
Definition: c2numpy.h:56
int c2numpy_int64(c2numpy_writer *writer, int64_t data)
Definition: c2numpy.h:307
const char * C2NUMPY_VERSION
Definition: c2numpy.h:26
int c2numpy_uint32(c2numpy_writer *writer, uint32_t data)
Definition: c2numpy.h:334
int c2numpy_float32(c2numpy_writer *writer, float data)
Definition: c2numpy.h:369
std::string outputFilePrefix
Definition: c2numpy.h:57
int64_t sizeSeekPosition
Definition: c2numpy.h:58
int32_t currentRowInFile
Definition: c2numpy.h:67
int c2numpy_uint16(c2numpy_writer *writer, uint16_t data)
Definition: c2numpy.h:325
int c2numpy_int16(c2numpy_writer *writer, int16_t data)
Definition: c2numpy.h:289
int c2numpy_intc(c2numpy_writer *writer, int data)
Definition: c2numpy.h:262
int c2numpy_float64(c2numpy_writer *writer, double data)
Definition: c2numpy.h:378
const char * c2numpy_descr(c2numpy_type type)
Definition: c2numpy.h:71
int c2numpy_int(c2numpy_writer *writer, int64_t data)
Definition: c2numpy.h:253
int c2numpy_uint64(c2numpy_writer *writer, uint64_t data)
Definition: c2numpy.h:343
int c2numpy_string(c2numpy_writer *writer, const char *data)
Definition: c2numpy.h:411
int c2numpy_intp(c2numpy_writer *writer, size_t data)
Definition: c2numpy.h:271
int32_t numColumns
Definition: c2numpy.h:61
std::vector< std::string > columnNames
Definition: c2numpy.h:62