View Javadoc
1   package org.djutils.data.csv;
2   
3   import java.io.FileReader;
4   import java.io.FileWriter;
5   import java.io.IOException;
6   import java.io.InputStreamReader;
7   import java.io.Reader;
8   import java.io.Writer;
9   import java.util.ArrayList;
10  import java.util.Iterator;
11  import java.util.LinkedHashMap;
12  import java.util.List;
13  import java.util.Map;
14  import java.util.Set;
15  import java.util.function.Consumer;
16  import java.util.zip.ZipEntry;
17  import java.util.zip.ZipFile;
18  
19  import org.djutils.data.Column;
20  import org.djutils.data.ListTable;
21  import org.djutils.data.Row;
22  import org.djutils.data.Table;
23  import org.djutils.data.serialization.TextSerializationException;
24  import org.djutils.data.serialization.TextSerializer;
25  import org.djutils.exceptions.Throw;
26  import org.djutils.io.CompressedFileWriter;
27  import org.djutils.primitives.Primitive;
28  
29  import de.siegmar.fastcsv.reader.NamedCsvReader;
30  import de.siegmar.fastcsv.reader.NamedCsvRow;
31  import de.siegmar.fastcsv.writer.CsvWriter;
32  import de.siegmar.fastcsv.writer.LineDelimiter;
33  
34  /**
35   * CsvData takes care of reading and writing of table data in CSV format. The class can be used, e.g., as follows:
36   * 
37   * <pre>
38   * Table Table = new ListTable("data", "Table", columns);
39   * Writer writer = new FileWriter("c:/data/data.csv");
40   * Writer metaWriter = new FileWriter("c:/data/data.meta.csv");
41   * CsvData.writeData(writer, metaWriter, Table);
42   * </pre>
43   * <p>
44   * Copyright (c) 2020-2024 Delft University of Technology, PO Box 5, 2600 AA, Delft, the Netherlands. All rights reserved. <br>
45   * BSD-style license. See <a href="https://djutils.org/docs/current/djutils/licenses.html">DJUTILS License</a>.
46   * </p>
47   * @author <a href="https://github.com/averbraeck">Alexander Verbraeck</a>
48   * @author <a href="https://tudelft.nl/staff/p.knoppers-1">Peter Knoppers</a>
49   * @author <a href="https://dittlab.tudelft.nl">Wouter Schakel</a>
50   */
51  public final class CsvData
52  {
53      /**
54       * Utility class, no public constructor.
55       */
56      private CsvData()
57      {
58          // utility class
59      }
60  
61      /**
62       * Write the data from the data table in CSV format. The writer writes the data, whereas the metaWriter writes the metadata.
63       * The metadata consists of a CSV file with three columns: the id, the description, and the class. The first row after the
64       * header contains the id, description, and class of the data table itself. The second and further rows contain information
65       * about the columns of the data table.
66       * @param writer Writer; the writer that writes the data, e.g. to a file
67       * @param metaWriter Writer; the writer for the metadata
68       * @param table Table; the data table to write
69       * @param separator char; the delimiter to use for separating entries
70       * @param quotechar char; the character to use for quoted elements
71       * @param lineDelimiter String; the line terminator to use, can be LineDelimiter.CR, LF, CRLF or PLATFORM
72       * @throws IOException on I/O error when writing the data
73       * @throws TextSerializationException on unknown data type for serialization
74       */
75      public static void writeData(final Writer writer, final Writer metaWriter, final Table table, final char separator,
76              final char quotechar, final LineDelimiter lineDelimiter) throws IOException, TextSerializationException
77      {
78          writeMeta(metaWriter, true, table, separator, quotechar, lineDelimiter);
79          writeData(writer, true, table, separator, quotechar, lineDelimiter);
80      }
81  
82      /**
83       * Write the data from the data table in CSV format. The data file and meta data file are zipped. The metadata consists of a
84       * CSV file with three columns: the id, the description, and the class. The first row after the header contains the id,
85       * description, and class of the data table itself. The second and further rows contain information about the columns of the
86       * data table.
87       * @param writer Writer; the writer that writes the data, e.g. to a file
88       * @param csvName String; name of the csv file within the zip file
89       * @param metaName String; name of the meta data file within the zip file
90       * @param table Table; the data table to write
91       * @param separator char; the delimiter to use for separating entries
92       * @param quotechar char; the character to use for quoted elements
93       * @param lineDelimiter String; the line terminator to use, can be LineDelimiter.CR, LF, CRLF or PLATFORM
94       * @throws IOException on I/O error when writing the data
95       * @throws TextSerializationException on unknown data type for serialization
96       */
97      public static void writeZippedData(final CompressedFileWriter writer, final String csvName, final String metaName,
98              final Table table, final char separator, final char quotechar, final LineDelimiter lineDelimiter)
99              throws IOException, TextSerializationException
100     {
101         writeMeta(writer.next(metaName), false, table, separator, quotechar, lineDelimiter);
102         writeData(writer.next(csvName), true, table, separator, quotechar, lineDelimiter);
103     }
104 
105     /**
106      * Write the data from the data table in CSV format. The data file and meta data file are zipped. The metadata consists of a
107      * CSV file with three columns: the id, the description, and the class. The first row after the header contains the id,
108      * description, and class of the data table itself. The second and further rows contain information about the columns of the
109      * data table.
110      * @param writer Writer; the writer that writes the data, e.g. to a file
111      * @param csvName String; name of the CSV file within the zip file
112      * @param metaName String; name of the meta data file within the zip file
113      * @param table Table; the data table to write
114      * @throws IOException on I/O error when writing the data
115      * @throws TextSerializationException on unknown data type for serialization
116      */
117     public static void writeZippedData(final CompressedFileWriter writer, final String csvName, final String metaName,
118             final Table table) throws IOException, TextSerializationException
119     {
120         writeZippedData(writer, csvName, metaName, table, ',', '"', LineDelimiter.CRLF);
121     }
122 
123     /**
124      * Writes the meta data.
125      * @param metaWriter Writer; the writer for the metadata
126      * @param closeWriter boolean; whether to close the stream
127      * @param table Table; the data table to write
128      * @param separator char; the delimiter to use for separating entries
129      * @param quotechar char; the character to use for quoted elements
130      * @param lineDelimiter String; the line terminator to use, can be LineDelimiter.CR, LF, CRLF or PLATFORM
131      * @throws IOException on I/O error when writing the data
132      */
133     private static void writeMeta(final Writer metaWriter, final boolean closeWriter, final Table table, final char separator,
134             final char quotechar, final LineDelimiter lineDelimiter) throws IOException
135     {
136         CsvWriter csvMetaWriter = null;
137         try
138         {
139             csvMetaWriter = CsvWriter.builder().fieldSeparator(separator).quoteCharacter(quotechar).lineDelimiter(lineDelimiter)
140                     .build(metaWriter);
141             csvMetaWriter.writeRow("id", "description", "className", "unit");
142             csvMetaWriter.writeRow(table.getId(), table.getDescription(), table.getClass().getName(), "");
143             for (Column<?> column : table.getColumns())
144             {
145                 if (column.getUnit() == null)
146                 {
147                     csvMetaWriter.writeRow(column.getId(), column.getDescription(), column.getValueType().getName(), "");
148                 }
149                 else
150                 {
151                     csvMetaWriter.writeRow(column.getId(), column.getDescription(), column.getValueType().getName(),
152                             column.getUnit());
153                 }
154             }
155         }
156         finally
157         {
158             if (closeWriter && csvMetaWriter != null)
159             {
160                 csvMetaWriter.close();
161             }
162         }
163     }
164 
165     /**
166      * Writes the data.
167      * @param writer Writer; the writer that writes the data, e.g. to a file
168      * @param closeWriter boolean; whether to close the stream
169      * @param table Table; the data table to write
170      * @param separator char; the delimiter to use for separating entries
171      * @param quotechar char; the character to use for quoted elements
172      * @param lineDelimiter String; the line terminator to use, can be LineDelimiter.CR, LF, CRLF or PLATFORM
173      * @throws IOException on I/O error when writing the data
174      * @throws TextSerializationException on unknown data type for serialization
175      */
176     private static void writeData(final Writer writer, final boolean closeWriter, final Table table, final char separator,
177             final char quotechar, final LineDelimiter lineDelimiter) throws IOException, TextSerializationException
178     {
179         // Assemble the serializer array
180         TextSerializer<?>[] serializers = new TextSerializer[table.getNumberOfColumns()];
181         for (int i = 0; i < table.getNumberOfColumns(); i++)
182         {
183             Column<?> column = table.getColumns().get(i);
184             serializers[i] = TextSerializer.resolve(column.getValueType());
185         }
186 
187         // Write the data file
188         CsvWriter csvWriter = null;
189         try
190         {
191             csvWriter = CsvWriter.builder().fieldSeparator(separator).quoteCharacter(quotechar).lineDelimiter(lineDelimiter)
192                     .build(writer);
193             csvWriter.writeRow(table.getColumnIds());
194             String[] textFields = new String[table.getNumberOfColumns()];
195             for (Row row : table)
196             {
197                 Object[] values = row.getValues();
198                 for (int i = 0; i < table.getNumberOfColumns(); i++)
199                 {
200                     textFields[i] = TextSerializer.serialize(serializers[i], values[i], table.getColumn(i).getUnit());
201                 }
202                 csvWriter.writeRow(textFields);
203             }
204         }
205         finally
206         {
207             if (closeWriter && csvWriter != null)
208             {
209                 csvWriter.close();
210             }
211         }
212     }
213 
214     /**
215      * Write the data from the data table in CSV format. The writer writes the data, whereas the metaWriter writes the metadata.
216      * The metadata consists of a CSV file with three columns: the id, the description, and the class. The first row after the
217      * header contains the id, description, and class of the data table itself. The second and further rows contain information
218      * about the columns of the data table. The line ending used will be CRLF which is RFC 4180 compliant.
219      * @param writer Writer; the writer that writes the data, e.g. to a file
220      * @param metaWriter Writer; the writer for the metadata
221      * @param table Table; the data table to write
222      * @throws IOException on I/O error when writing the data
223      * @throws TextSerializationException on unknown data type for serialization
224      */
225     public static void writeData(final Writer writer, final Writer metaWriter, final Table table)
226             throws IOException, TextSerializationException
227     {
228         writeData(writer, metaWriter, table, ',', '"', LineDelimiter.CRLF);
229     }
230 
231     /**
232      * Write the data from the data table in CSV format.
233      * @param filename String; the file name to write the data to
234      * @param metaFilename String; the file name to write the metadata to
235      * @param table Table; the data table to write
236      * @throws IOException on I/O error when writing the data
237      * @throws TextSerializationException on unknown data type for serialization
238      */
239     public static void writeData(final String filename, final String metaFilename, final Table table)
240             throws IOException, TextSerializationException
241     {
242         try (FileWriter fw = new FileWriter(filename); FileWriter mfw = new FileWriter(metaFilename);)
243         {
244             writeData(fw, mfw, table);
245         }
246     }
247 
248     /**
249      * Read the data from the CSV-file into the data table. Use the metadata to reconstruct the data table.
250      * @param reader Reader; the reader that can read the data, e.g. from a file
251      * @param metaReader Reader; the writer for the metadata
252      * @return Table the data table reconstructed from the meta data and filled with the data
253      * @param separator char; the delimiter to use for separating entries
254      * @param quotechar char; the character to use for quoted elements
255      * @throws IOException when the CSV data was not formatted right
256      * @throws TextSerializationException on unknown data type for serialization
257      */
258     public static Table readData(final Reader reader, final Reader metaReader, final char separator, final char quotechar)
259             throws IOException, TextSerializationException
260     {
261         // Read the metadata file and reconstruct the data table
262         try (NamedCsvReader csvMetaReader =
263                 NamedCsvReader.builder().fieldSeparator(separator).quoteCharacter(quotechar).build(metaReader))
264         {
265             Set<String> metaHeader = csvMetaReader.getHeader();
266             Throw.when(
267                     metaHeader.size() != 4 || !metaHeader.contains("id") || !metaHeader.contains("description")
268                             || !metaHeader.contains("className") || !metaHeader.contains("unit"),
269                     IOException.class,
270                     "header of the metafile does not contain 'id, description, className, unit' as fields, but %s: ",
271                     metaHeader);
272 
273             // table metadata
274             List<Column<?>> columns = new ArrayList<>();
275             Map<String, String> tableRow = new LinkedHashMap<>();
276             Iterator<NamedCsvRow> it = csvMetaReader.iterator();
277             while (it.hasNext())
278             {
279                 NamedCsvRow row = it.next();
280                 // table metadata
281                 if (tableRow.size() == 0)
282                 {
283                     tableRow.putAll(row.getFields());
284                 }
285                 else
286                 {
287                     // column metadata
288                     String type = row.getField("className");
289                     Class<?> valueClass = Primitive.forName(type);
290                     if (valueClass == null)
291                     {
292                         try
293                         {
294                             valueClass = Class.forName(type);
295                         }
296                         catch (ClassNotFoundException exception)
297                         {
298                             throw new IOException("Could not find class " + type, exception);
299                         }
300                     }
301                     Column<?> column =
302                             new Column<>(row.getField("id"), row.getField("description"), valueClass, row.getField("unit"));
303                     columns.add(column);
304                 }
305             }
306 
307             Throw.when(tableRow == null, IOException.class, "no table information in the metafile");
308 
309             // create table
310             Table table;
311             Consumer<Object[]> unserializableTable;
312             if (tableRow.get("className").equals(ListTable.class.getName()))
313             {
314                 ListTable listTable = new ListTable(tableRow.get("id"), tableRow.get("description"), columns);
315                 table = listTable;
316                 unserializableTable = (data) -> listTable.addRow(data);
317             }
318             else
319             {
320                 // fallback
321                 ListTable listTable = new ListTable(tableRow.get("id"), tableRow.get("description"), columns);
322                 table = listTable;
323                 unserializableTable = (data) -> listTable.addRow(data);
324             }
325 
326             // Assemble the serializer array
327             TextSerializer<?>[] serializers = new TextSerializer[table.getNumberOfColumns()];
328             for (int i = 0; i < table.getNumberOfColumns(); i++)
329             {
330                 serializers[i] = TextSerializer.resolve(columns.get(i).getValueType());
331             }
332 
333             // Read the data file
334             try (NamedCsvReader csvReader =
335                     NamedCsvReader.builder().fieldSeparator(separator).quoteCharacter(quotechar).build(reader))
336             {
337                 Set<String> header = csvReader.getHeader();
338                 Throw.when(header.size() != columns.size(), IOException.class,
339                         "Number of columns in the data file does not match column metadata size");
340                 for (int i = 0; i < columns.size(); i++)
341                 {
342                     Throw.when(!header.contains(columns.get(i).getId()), IOException.class,
343                             "Header with id %s not found in the data file", columns.get(i).getId());
344                 }
345 
346                 // Read the data file records
347                 csvReader.forEach(row ->
348                 {
349                     Object[] values = new Object[columns.size()];
350                     for (int i = 0; i < columns.size(); i++)
351                     {
352                         values[i] = TextSerializer.deserialize(serializers[i], row.getField(columns.get(i).getId()),
353                                 columns.get(i));
354                     }
355                     unserializableTable.accept(values); // addRow
356                 });
357                 return table;
358             }
359         }
360     }
361 
362     /**
363      * Read the data from the CSV-file into the data table. Use the metadata to reconstruct the data table.
364      * @param reader Reader; the reader that can read the data, e.g. from a file
365      * @param metaReader Reader; the writer for the metadata
366      * @return Table the data table reconstructed from the meta data and filled with the data
367      * @throws IOException when the CSV data was not formatted right
368      * @throws TextSerializationException on unknown data type for serialization
369      */
370     public static Table readData(final Reader reader, final Reader metaReader) throws IOException, TextSerializationException
371     {
372         return readData(reader, metaReader, ',', '"');
373     }
374 
375     /**
376      * Read the data from the CSV-file into the data table. Use the metadata to reconstruct the data table.
377      * @param filename String; the file name to read the data from
378      * @param metaFilename String; the file name to read the metadata from
379      * @return Table the data table reconstructed from the meta data and filled with the data
380      * @throws IOException when the CSV data was not formatted right
381      * @throws TextSerializationException on unknown data type for serialization
382      */
383     public static Table readData(final String filename, final String metaFilename)
384             throws IOException, TextSerializationException
385     {
386         try (FileReader fr = new FileReader(filename); FileReader mfr = new FileReader(metaFilename);)
387         {
388             return readData(fr, mfr);
389         }
390     }
391 
392     /**
393      * Read the data from a CSV-file inside a zip file. The metadata file should be in the same zipfile. Use the metadata to
394      * reconstruct the data table.
395      * @param fileName String; file name of the zip file
396      * @param csvName String; name of the CSV-file, without path
397      * @param metaName String; name of the metadata file, without path
398      * @return Table the data table reconstructed from the meta data and filled with the data
399      * @throws IOException when the CSV data was not formatted right
400      * @throws TextSerializationException on unknown data type for serialization
401      */
402     public static Table readZippedData(final String fileName, final String csvName, final String metaName)
403             throws IOException, TextSerializationException
404     {
405         return readZippedData(fileName, csvName, metaName, ',', '"');
406     }
407 
408     /**
409      * Read the data from a CSV-file inside a zip file. The metadata file should be in the same zipfile. Use the metadata to
410      * reconstruct the data table.
411      * @param fileName String; file name of the zip file
412      * @param csvName String; name of the CSV-file, without path
413      * @param metaName String; name of the metadata file, without path
414      * @param separator char; the delimiter to use for separating entries
415      * @param quotechar char; the character to use for quoted elements
416      * @return Table the data table reconstructed from the meta data and filled with the data
417      * @throws IOException when the CSV data was not formatted right
418      * @throws TextSerializationException on unknown data type for serialization
419      */
420     public static Table readZippedData(final String fileName, final String csvName, final String metaName, final char separator,
421             final char quotechar) throws IOException, TextSerializationException
422     {
423         try (ZipFile zipFile = new ZipFile(fileName))
424         {
425             Reader reader = null;
426             Reader metaReader = null;
427             Iterator<? extends ZipEntry> iterator = zipFile.entries().asIterator();
428             while (iterator.hasNext())
429             {
430                 ZipEntry zipEntry = iterator.next();
431                 if (zipEntry.getName().equals(csvName))
432                 {
433                     reader = new InputStreamReader(zipFile.getInputStream(zipEntry));
434                 }
435                 else if (zipEntry.getName().equals(metaName))
436                 {
437                     metaReader = new InputStreamReader(zipFile.getInputStream(zipEntry));
438                 }
439             }
440             Throw.whenNull(reader, "File %s not found in %s.", csvName, fileName);
441             Throw.whenNull(metaReader, "File %s not found in %s.", metaName, fileName);
442             return readData(reader, metaReader, separator, quotechar);
443         }
444     }
445 
446 }