1 package org.djutils.data.csv;
2
3 import java.io.FileReader;
4 import java.io.FileWriter;
5 import java.io.IOException;
6 import java.io.InputStreamReader;
7 import java.io.Reader;
8 import java.io.Writer;
9 import java.util.ArrayList;
10 import java.util.Iterator;
11 import java.util.LinkedHashMap;
12 import java.util.List;
13 import java.util.Map;
14 import java.util.Set;
15 import java.util.function.Consumer;
16 import java.util.zip.ZipEntry;
17 import java.util.zip.ZipFile;
18
19 import org.djutils.data.Column;
20 import org.djutils.data.ListTable;
21 import org.djutils.data.Row;
22 import org.djutils.data.Table;
23 import org.djutils.data.serialization.TextSerializationException;
24 import org.djutils.data.serialization.TextSerializer;
25 import org.djutils.exceptions.Throw;
26 import org.djutils.io.CompressedFileWriter;
27 import org.djutils.primitives.Primitive;
28
29 import de.siegmar.fastcsv.reader.NamedCsvReader;
30 import de.siegmar.fastcsv.reader.NamedCsvRow;
31 import de.siegmar.fastcsv.writer.CsvWriter;
32 import de.siegmar.fastcsv.writer.LineDelimiter;
33
34 /**
35 * CsvData takes care of reading and writing of table data in CSV format. The class can be used, e.g., as follows:
36 *
37 * <pre>
38 * Table Table = new ListTable("data", "Table", columns);
39 * Writer writer = new FileWriter("c:/data/data.csv");
40 * Writer metaWriter = new FileWriter("c:/data/data.meta.csv");
41 * CsvData.writeData(writer, metaWriter, Table);
42 * </pre>
43 * <p>
44 * Copyright (c) 2020-2023 Delft University of Technology, PO Box 5, 2600 AA, Delft, the Netherlands. All rights reserved. <br>
45 * BSD-style license. See <a href="https://djutils.org/docs/current/djutils/licenses.html">DJUTILS License</a>.
46 * </p>
47 * @author <a href="https://github.com/averbraeck">Alexander Verbraeck</a>
48 * @author <a href="https://tudelft.nl/staff/p.knoppers-1">Peter Knoppers</a>
49 * @author <a href="https://dittlab.tudelft.nl">Wouter Schakel</a>
50 */
51 public final class CsvData
52 {
53 /**
54 * Utility class, no public constructor.
55 */
56 private CsvData()
57 {
58 // utility class
59 }
60
61 /**
62 * Write the data from the data table in CSV format. The writer writes the data, whereas the metaWriter writes the metadata.
63 * The metadata consists of a CSV file with three columns: the id, the description, and the class. The first row after the
64 * header contains the id, description, and class of the data table itself. The second and further rows contain information
65 * about the columns of the data table.
66 * @param writer Writer; the writer that writes the data, e.g. to a file
67 * @param metaWriter Writer; the writer for the metadata
68 * @param table Table; the data table to write
69 * @param separator char; the delimiter to use for separating entries
70 * @param quotechar char; the character to use for quoted elements
71 * @param lineDelimiter String; the line terminator to use, can be LineDelimiter.CR, LF, CRLF or PLATFORM
72 * @throws IOException on I/O error when writing the data
73 * @throws TextSerializationException on unknown data type for serialization
74 */
75 public static void writeData(final Writer writer, final Writer metaWriter, final Table table, final char separator,
76 final char quotechar, final LineDelimiter lineDelimiter) throws IOException, TextSerializationException
77 {
78 writeMeta(metaWriter, true, table, separator, quotechar, lineDelimiter);
79 writeData(writer, true, table, separator, quotechar, lineDelimiter);
80 }
81
82 /**
83 * Write the data from the data table in CSV format. The data file and meta data file are zipped. The metadata consists of a
84 * CSV file with three columns: the id, the description, and the class. The first row after the header contains the id,
85 * description, and class of the data table itself. The second and further rows contain information about the columns of the
86 * data table.
87 * @param writer Writer; the writer that writes the data, e.g. to a file
88 * @param csvName String; name of the csv file within the zip file
89 * @param metaName String; name of the meta data file within the zip file
90 * @param table Table; the data table to write
91 * @param separator char; the delimiter to use for separating entries
92 * @param quotechar char; the character to use for quoted elements
93 * @param lineDelimiter String; the line terminator to use, can be LineDelimiter.CR, LF, CRLF or PLATFORM
94 * @throws IOException on I/O error when writing the data
95 * @throws TextSerializationException on unknown data type for serialization
96 */
97 public static void writeZippedData(final CompressedFileWriter writer, final String csvName, final String metaName,
98 final Table table, final char separator, final char quotechar, final LineDelimiter lineDelimiter)
99 throws IOException, TextSerializationException
100 {
101 writeMeta(writer.next(metaName), false, table, separator, quotechar, lineDelimiter);
102 writeData(writer.next(csvName), true, table, separator, quotechar, lineDelimiter);
103 }
104
105 /**
106 * Write the data from the data table in CSV format. The data file and meta data file are zipped. The metadata consists of a
107 * CSV file with three columns: the id, the description, and the class. The first row after the header contains the id,
108 * description, and class of the data table itself. The second and further rows contain information about the columns of the
109 * data table.
110 * @param writer Writer; the writer that writes the data, e.g. to a file
111 * @param csvName String; name of the CSV file within the zip file
112 * @param metaName String; name of the meta data file within the zip file
113 * @param table Table; the data table to write
114 * @throws IOException on I/O error when writing the data
115 * @throws TextSerializationException on unknown data type for serialization
116 */
117 public static void writeZippedData(final CompressedFileWriter writer, final String csvName, final String metaName,
118 final Table table) throws IOException, TextSerializationException
119 {
120 writeZippedData(writer, csvName, metaName, table, ',', '"', LineDelimiter.CRLF);
121 }
122
123 /**
124 * Writes the meta data.
125 * @param metaWriter Writer; the writer for the metadata
126 * @param closeWriter boolean; whether to close the stream
127 * @param table Table; the data table to write
128 * @param separator char; the delimiter to use for separating entries
129 * @param quotechar char; the character to use for quoted elements
130 * @param lineDelimiter String; the line terminator to use, can be LineDelimiter.CR, LF, CRLF or PLATFORM
131 * @throws IOException on I/O error when writing the data
132 */
133 private static void writeMeta(final Writer metaWriter, final boolean closeWriter, final Table table, final char separator,
134 final char quotechar, final LineDelimiter lineDelimiter) throws IOException
135 {
136 CsvWriter csvMetaWriter = null;
137 try
138 {
139 csvMetaWriter = CsvWriter.builder().fieldSeparator(separator).quoteCharacter(quotechar).lineDelimiter(lineDelimiter)
140 .build(metaWriter);
141 csvMetaWriter.writeRow("id", "description", "className", "unit");
142 csvMetaWriter.writeRow(table.getId(), table.getDescription(), table.getClass().getName(), "");
143 for (Column<?> column : table.getColumns())
144 {
145 if (column.getUnit() == null)
146 {
147 csvMetaWriter.writeRow(column.getId(), column.getDescription(), column.getValueType().getName(), "");
148 }
149 else
150 {
151 csvMetaWriter.writeRow(column.getId(), column.getDescription(), column.getValueType().getName(),
152 column.getUnit());
153 }
154 }
155 }
156 finally
157 {
158 if (closeWriter && csvMetaWriter != null)
159 {
160 csvMetaWriter.close();
161 }
162 }
163 }
164
165 /**
166 * Writes the data.
167 * @param writer Writer; the writer that writes the data, e.g. to a file
168 * @param closeWriter boolean; whether to close the stream
169 * @param table Table; the data table to write
170 * @param separator char; the delimiter to use for separating entries
171 * @param quotechar char; the character to use for quoted elements
172 * @param lineDelimiter String; the line terminator to use, can be LineDelimiter.CR, LF, CRLF or PLATFORM
173 * @throws IOException on I/O error when writing the data
174 * @throws TextSerializationException on unknown data type for serialization
175 */
176 private static void writeData(final Writer writer, final boolean closeWriter, final Table table, final char separator,
177 final char quotechar, final LineDelimiter lineDelimiter) throws IOException, TextSerializationException
178 {
179 // Assemble the serializer array
180 TextSerializer<?>[] serializers = new TextSerializer[table.getNumberOfColumns()];
181 for (int i = 0; i < table.getNumberOfColumns(); i++)
182 {
183 Column<?> column = table.getColumns().get(i);
184 serializers[i] = TextSerializer.resolve(column.getValueType());
185 }
186
187 // Write the data file
188 CsvWriter csvWriter = null;
189 try
190 {
191 csvWriter = CsvWriter.builder().fieldSeparator(separator).quoteCharacter(quotechar).lineDelimiter(lineDelimiter)
192 .build(writer);
193 csvWriter.writeRow(table.getColumnIds());
194 String[] textFields = new String[table.getNumberOfColumns()];
195 for (Row row : table)
196 {
197 Object[] values = row.getValues();
198 for (int i = 0; i < table.getNumberOfColumns(); i++)
199 {
200 textFields[i] = TextSerializer.serialize(serializers[i], values[i], table.getColumn(i).getUnit());
201 }
202 csvWriter.writeRow(textFields);
203 }
204 }
205 finally
206 {
207 if (closeWriter && csvWriter != null)
208 {
209 csvWriter.close();
210 }
211 }
212 }
213
214 /**
215 * Write the data from the data table in CSV format. The writer writes the data, whereas the metaWriter writes the metadata.
216 * The metadata consists of a CSV file with three columns: the id, the description, and the class. The first row after the
217 * header contains the id, description, and class of the data table itself. The second and further rows contain information
218 * about the columns of the data table. The line ending used will be CRLF which is RFC 4180 compliant.
219 * @param writer Writer; the writer that writes the data, e.g. to a file
220 * @param metaWriter Writer; the writer for the metadata
221 * @param table Table; the data table to write
222 * @throws IOException on I/O error when writing the data
223 * @throws TextSerializationException on unknown data type for serialization
224 */
225 public static void writeData(final Writer writer, final Writer metaWriter, final Table table)
226 throws IOException, TextSerializationException
227 {
228 writeData(writer, metaWriter, table, ',', '"', LineDelimiter.CRLF);
229 }
230
231 /**
232 * Write the data from the data table in CSV format.
233 * @param filename String; the file name to write the data to
234 * @param metaFilename String; the file name to write the metadata to
235 * @param table Table; the data table to write
236 * @throws IOException on I/O error when writing the data
237 * @throws TextSerializationException on unknown data type for serialization
238 */
239 public static void writeData(final String filename, final String metaFilename, final Table table)
240 throws IOException, TextSerializationException
241 {
242 try (FileWriter fw = new FileWriter(filename); FileWriter mfw = new FileWriter(metaFilename);)
243 {
244 writeData(fw, mfw, table);
245 }
246 }
247
248 /**
249 * Read the data from the CSV-file into the data table. Use the metadata to reconstruct the data table.
250 * @param reader Reader; the reader that can read the data, e.g. from a file
251 * @param metaReader Reader; the writer for the metadata
252 * @return Table the data table reconstructed from the meta data and filled with the data
253 * @param separator char; the delimiter to use for separating entries
254 * @param quotechar char; the character to use for quoted elements
255 * @throws IOException when the CSV data was not formatted right
256 * @throws TextSerializationException on unknown data type for serialization
257 */
258 public static Table readData(final Reader reader, final Reader metaReader, final char separator, final char quotechar)
259 throws IOException, TextSerializationException
260 {
261 // Read the metadata file and reconstruct the data table
262 try (NamedCsvReader csvMetaReader =
263 NamedCsvReader.builder().fieldSeparator(separator).quoteCharacter(quotechar).build(metaReader))
264 {
265 Set<String> metaHeader = csvMetaReader.getHeader();
266 Throw.when(
267 metaHeader.size() != 4 || !metaHeader.contains("id") || !metaHeader.contains("description")
268 || !metaHeader.contains("className") || !metaHeader.contains("unit"),
269 IOException.class,
270 "header of the metafile does not contain 'id, description, className, unit' as fields, but %s: ",
271 metaHeader);
272
273 // table metadata
274 List<Column<?>> columns = new ArrayList<>();
275 Map<String, String> tableRow = new LinkedHashMap<>();
276 Iterator<NamedCsvRow> it = csvMetaReader.iterator();
277 while (it.hasNext())
278 {
279 NamedCsvRow row = it.next();
280 // table metadata
281 if (tableRow.size() == 0)
282 {
283 tableRow.putAll(row.getFields());
284 }
285 else
286 {
287 // column metadata
288 String type = row.getField("className");
289 Class<?> valueClass = Primitive.forName(type);
290 if (valueClass == null)
291 {
292 try
293 {
294 valueClass = Class.forName(type);
295 }
296 catch (ClassNotFoundException exception)
297 {
298 throw new IOException("Could not find class " + type, exception);
299 }
300 }
301 Column<?> column =
302 new Column<>(row.getField("id"), row.getField("description"), valueClass, row.getField("unit"));
303 columns.add(column);
304 }
305 }
306
307 Throw.when(tableRow == null, IOException.class, "no table information in the metafile");
308
309 // create table
310 Table table;
311 Consumer<Object[]> unserializableTable;
312 if (tableRow.get("className").equals(ListTable.class.getName()))
313 {
314 ListTable listTable = new ListTable(tableRow.get("id"), tableRow.get("description"), columns);
315 table = listTable;
316 unserializableTable = (data) -> listTable.addRow(data);
317 }
318 else
319 {
320 // fallback
321 ListTable listTable = new ListTable(tableRow.get("id"), tableRow.get("description"), columns);
322 table = listTable;
323 unserializableTable = (data) -> listTable.addRow(data);
324 }
325
326 // Assemble the serializer array
327 TextSerializer<?>[] serializers = new TextSerializer[table.getNumberOfColumns()];
328 for (int i = 0; i < table.getNumberOfColumns(); i++)
329 {
330 serializers[i] = TextSerializer.resolve(columns.get(i).getValueType());
331 }
332
333 // Read the data file
334 try (NamedCsvReader csvReader =
335 NamedCsvReader.builder().fieldSeparator(separator).quoteCharacter(quotechar).build(reader))
336 {
337 Set<String> header = csvReader.getHeader();
338 Throw.when(header.size() != columns.size(), IOException.class,
339 "Number of columns in the data file does not match column metadata size");
340 for (int i = 0; i < columns.size(); i++)
341 {
342 Throw.when(!header.contains(columns.get(i).getId()), IOException.class,
343 "Header with id %s not found in the data file", columns.get(i).getId());
344 }
345
346 // Read the data file records
347 csvReader.forEach(row ->
348 {
349 Object[] values = new Object[columns.size()];
350 for (int i = 0; i < columns.size(); i++)
351 {
352 values[i] = TextSerializer.deserialize(serializers[i], row.getField(columns.get(i).getId()),
353 columns.get(i));
354 }
355 unserializableTable.accept(values); // addRow
356 });
357 return table;
358 }
359 }
360 }
361
362 /**
363 * Read the data from the CSV-file into the data table. Use the metadata to reconstruct the data table.
364 * @param reader Reader; the reader that can read the data, e.g. from a file
365 * @param metaReader Reader; the writer for the metadata
366 * @return Table the data table reconstructed from the meta data and filled with the data
367 * @throws IOException when the CSV data was not formatted right
368 * @throws TextSerializationException on unknown data type for serialization
369 */
370 public static Table readData(final Reader reader, final Reader metaReader) throws IOException, TextSerializationException
371 {
372 return readData(reader, metaReader, ',', '"');
373 }
374
375 /**
376 * Read the data from the CSV-file into the data table. Use the metadata to reconstruct the data table.
377 * @param filename String; the file name to read the data from
378 * @param metaFilename String; the file name to read the metadata from
379 * @return Table the data table reconstructed from the meta data and filled with the data
380 * @throws IOException when the CSV data was not formatted right
381 * @throws TextSerializationException on unknown data type for serialization
382 */
383 public static Table readData(final String filename, final String metaFilename)
384 throws IOException, TextSerializationException
385 {
386 try (FileReader fr = new FileReader(filename); FileReader mfr = new FileReader(metaFilename);)
387 {
388 return readData(fr, mfr);
389 }
390 }
391
392 /**
393 * Read the data from a CSV-file inside a zip file. The metadata file should be in the same zipfile. Use the metadata to
394 * reconstruct the data table.
395 * @param fileName String; file name of the zip file
396 * @param csvName String; name of the CSV-file, without path
397 * @param metaName String; name of the metadata file, without path
398 * @return Table the data table reconstructed from the meta data and filled with the data
399 * @throws IOException when the CSV data was not formatted right
400 * @throws TextSerializationException on unknown data type for serialization
401 */
402 public static Table readZippedData(final String fileName, final String csvName, final String metaName)
403 throws IOException, TextSerializationException
404 {
405 return readZippedData(fileName, csvName, metaName, ',', '"');
406 }
407
408 /**
409 * Read the data from a CSV-file inside a zip file. The metadata file should be in the same zipfile. Use the metadata to
410 * reconstruct the data table.
411 * @param fileName String; file name of the zip file
412 * @param csvName String; name of the CSV-file, without path
413 * @param metaName String; name of the metadata file, without path
414 * @param separator char; the delimiter to use for separating entries
415 * @param quotechar char; the character to use for quoted elements
416 * @return Table the data table reconstructed from the meta data and filled with the data
417 * @throws IOException when the CSV data was not formatted right
418 * @throws TextSerializationException on unknown data type for serialization
419 */
420 public static Table readZippedData(final String fileName, final String csvName, final String metaName, final char separator,
421 final char quotechar) throws IOException, TextSerializationException
422 {
423 try (ZipFile zipFile = new ZipFile(fileName))
424 {
425 Reader reader = null;
426 Reader metaReader = null;
427 Iterator<? extends ZipEntry> iterator = zipFile.entries().asIterator();
428 while (iterator.hasNext())
429 {
430 ZipEntry zipEntry = iterator.next();
431 if (zipEntry.getName().equals(csvName))
432 {
433 reader = new InputStreamReader(zipFile.getInputStream(zipEntry));
434 }
435 else if (zipEntry.getName().equals(metaName))
436 {
437 metaReader = new InputStreamReader(zipFile.getInputStream(zipEntry));
438 }
439 }
440 Throw.whenNull(reader, "File %s not found in %s.", csvName, fileName);
441 Throw.whenNull(metaReader, "File %s not found in %s.", metaName, fileName);
442 return readData(reader, metaReader, separator, quotechar);
443 }
444 }
445
446 }