View Javadoc
1   package org.djutils.data.csv;
2   
3   import java.io.FileReader;
4   import java.io.FileWriter;
5   import java.io.IOException;
6   import java.io.Reader;
7   import java.io.Writer;
8   import java.util.ArrayList;
9   import java.util.Iterator;
10  import java.util.LinkedHashMap;
11  import java.util.List;
12  import java.util.Map;
13  import java.util.Set;
14  
15  import org.djutils.data.DataColumn;
16  import org.djutils.data.DataRecord;
17  import org.djutils.data.DataTable;
18  import org.djutils.data.ListDataTable;
19  import org.djutils.data.SimpleDataColumn;
20  import org.djutils.data.serialization.TextSerializationException;
21  import org.djutils.data.serialization.TextSerializer;
22  import org.djutils.exceptions.Throw;
23  import org.djutils.primitives.Primitive;
24  
25  import de.siegmar.fastcsv.reader.NamedCsvReader;
26  import de.siegmar.fastcsv.reader.NamedCsvRow;
27  import de.siegmar.fastcsv.writer.CsvWriter;
28  import de.siegmar.fastcsv.writer.LineDelimiter;
29  
30  /**
31   * CSVData takes care of reading and writing of table data in CSV format. The class can be used, e.g., as follows:
32   * 
33   * <pre>
34   * DataTable dataTable = new ListDataTable("data", "dataTable", columns);
35   * Writer writer = new FileWriter("c:/data/data.csv");
36   * Writer metaWriter = new FileWriter("c:/data/data.meta.csv");
37   * CSVData.writeData(writer, metaWriter, dataTable);
38   * </pre>
39   * 
40   * Copyright (c) 2020-2022 Delft University of Technology, Jaffalaan 5, 2628 BX Delft, the Netherlands. All rights reserved. See
41   * for project information <a href="https://djutils.org" target="_blank"> https://djutils.org</a>. The DJUTILS project is
42   * distributed under a three-clause BSD-style license, which can be found at
43   * <a href="https://djutils.org/docs/license.html" target="_blank"> https://djutils.org/docs/license.html</a>. <br>
44   * @author <a href="https://www.tudelft.nl/averbraeck">Alexander Verbraeck</a>
45   * @author <a href="https://www.tudelft.nl/pknoppers">Peter Knoppers</a>
46   * @author <a href="http://www.transport.citg.tudelft.nl">Wouter Schakel</a>
47   */
48  public final class CSVData
49  {
50      /**
51       * Utility class, no public constructor.
52       */
53      private CSVData()
54      {
55          // utility class
56      }
57  
58      /**
59       * Write the data from the data table in CSV format. The writer writes the data, whereas the metaWriter writes the metadata.
60       * The metadata consists of a CSV file with three columns: the id, the description, and the class. The first row after the
61       * header contains the id, description, and class of the data table itself. The second and further rows contain information
62       * about the columns of the data table.
63       * @param writer Writer; the writer that writes the data, e.g. to a file
64       * @param metaWriter Writer; the writer for the metadata
65       * @param dataTable DataTable; the data table to write
66       * @param separator char; the delimiter to use for separating entries
67       * @param quotechar char; the character to use for quoted elements
68       * @param lineDelimiter String; the line terminator to use, can be LineDelimiter.CR, LF, CRLF or PLATFORM
69       * @throws IOException on I/O error when writing the data
70       * @throws TextSerializationException on unknown data type for serialization
71       */
72      public static void writeData(final Writer writer, final Writer metaWriter, final DataTable dataTable, final char separator,
73              final char quotechar, final LineDelimiter lineDelimiter) throws IOException, TextSerializationException
74      {
75          // Write the metadata file
76          try (CsvWriter csvMetaWriter = CsvWriter.builder().fieldSeparator(separator).quoteCharacter(quotechar)
77                  .lineDelimiter(lineDelimiter).build(metaWriter))
78          {
79              csvMetaWriter.writeRow("id", "description", "className");
80              csvMetaWriter.writeRow(dataTable.getId(), dataTable.getDescription(), dataTable.getClass().getName());
81              for (DataColumn<?> column : dataTable.getColumns())
82              {
83                  csvMetaWriter.writeRow(column.getId(), column.getDescription(), column.getValueType().getName());
84              }
85  
86              // Assemble the serializer array
87              TextSerializer<?>[] serializers = new TextSerializer[dataTable.getNumberOfColumns()];
88              for (int i = 0; i < dataTable.getNumberOfColumns(); i++)
89              {
90                  DataColumn<?> column = dataTable.getColumns().get(i);
91                  serializers[i] = TextSerializer.resolve(column.getValueType());
92              }
93  
94              // Write the data file
95              try (CsvWriter csvWriter = CsvWriter.builder().fieldSeparator(separator).quoteCharacter(quotechar)
96                      .lineDelimiter(lineDelimiter).build(writer))
97              {
98                  csvWriter.writeRow(dataTable.getColumnIds());
99                  String[] textFields = new String[dataTable.getNumberOfColumns()];
100                 for (DataRecord record : dataTable)
101                 {
102                     Object[] values = record.getValues();
103                     for (int i = 0; i < dataTable.getNumberOfColumns(); i++)
104                     {
105                         textFields[i] = serializers[i].serialize(values[i]);
106                     }
107                     csvWriter.writeRow(textFields);
108                 }
109             }
110         }
111     }
112 
113     /**
114      * Write the data from the data table in CSV format. The writer writes the data, whereas the metaWriter writes the metadata.
115      * The metadata consists of a CSV file with three columns: the id, the description, and the class. The first row after the
116      * header contains the id, description, and class of the data table itself. The second and further rows contain information
117      * about the columns of the data table. The line ending used will be CRLF which is RFC 4180 compliant.
118      * @param writer Writer; the writer that writes the data, e.g. to a file
119      * @param metaWriter Writer; the writer for the metadata
120      * @param dataTable DataTable; the data table to write
121      * @throws IOException on I/O error when writing the data
122      * @throws TextSerializationException on unknown data type for serialization
123      */
124     public static void writeData(final Writer writer, final Writer metaWriter, final DataTable dataTable)
125             throws IOException, TextSerializationException
126     {
127         writeData(writer, metaWriter, dataTable, ',', '"', LineDelimiter.CRLF);
128     }
129 
130     /**
131      * Write the data from the data table in CSV format.
132      * @param filename String; the file name to write the data to
133      * @param metaFilename String; the file name to write the metadata to
134      * @param dataTable DataTable; the data table to write
135      * @throws IOException on I/O error when writing the data
136      * @throws TextSerializationException on unknown data type for serialization
137      */
138     public static void writeData(final String filename, final String metaFilename, final DataTable dataTable)
139             throws IOException, TextSerializationException
140     {
141         FileWriter fw = null;
142         FileWriter mfw = null;
143         try
144         {
145             fw = new FileWriter(filename);
146             mfw = new FileWriter(metaFilename);
147             writeData(fw, mfw, dataTable);
148         }
149         finally
150         {
151             if (null != fw)
152             {
153                 fw.close(); // May have already been closed when the CSV writer was closed, but multiple close is harmless
154             }
155             if (null != mfw)
156             {
157                 mfw.close();
158             }
159         }
160     }
161 
162     /**
163      * Read the data from the CSV-file into the data table. Use the metadata to reconstruct the data table.
164      * @param reader Reader; the reader that can read the data, e.g. from a file
165      * @param metaReader Reader; the writer for the metadata
166      * @return dataTable the data table reconstructed from the meta data and filled with the data
167      * @param separator char; the delimiter to use for separating entries
168      * @param quotechar char; the character to use for quoted elements
169      * @throws IOException when the CSV data was not formatted right
170      * @throws TextSerializationException on unknown data type for serialization
171      */
172     public static DataTable readData(final Reader reader, final Reader metaReader, final char separator, final char quotechar)
173             throws IOException, TextSerializationException
174     {
175         // Read the metadata file and reconstruct the data table
176         List<DataColumn<?>> columns = new ArrayList<>();
177         try (NamedCsvReader csvMetaReader =
178                 NamedCsvReader.builder().fieldSeparator(separator).quoteCharacter(quotechar).build(metaReader))
179         {
180             Set<String> metaHeader = csvMetaReader.getHeader();
181             Throw.when(
182                     metaHeader.size() != 3 || !metaHeader.contains("id") || !metaHeader.contains("description")
183                             || !metaHeader.contains("className"),
184                     IOException.class,
185                     "header of the metafile does not contain 'id, description, className' as fields, but %s: ", metaHeader);
186 
187             // table metadata
188             Map<String, String> tableRow = new LinkedHashMap<>();
189             Iterator<NamedCsvRow> it = csvMetaReader.iterator();
190             while (it.hasNext())
191             {
192                 NamedCsvRow row = it.next();
193                 // table metadata
194                 if (tableRow.size() == 0)
195                 {
196                     tableRow.putAll(row.getFields());
197                     if (!tableRow.get("className").endsWith("ListDataTable"))
198                     {
199                         throw new IOException("Currently, this method can only recreate a ListDataTable");
200                     }
201                 }
202                 else
203                 {
204                     // column metadata
205                     String type = row.getField("className");
206                     Class<?> valueClass = Primitive.forName(type);
207                     if (valueClass == null)
208                     {
209                         try
210                         {
211                             valueClass = Class.forName(type);
212                         }
213                         catch (ClassNotFoundException exception)
214                         {
215                             throw new IOException("Could not find class " + type, exception);
216                         }
217                     }
218                     @SuppressWarnings({"rawtypes", "unchecked"})
219                     DataColumn<?> column = new SimpleDataColumn(row.getField("id"), row.getField("description"), valueClass);
220                     columns.add(column);
221                 }
222             }
223 
224             Throw.when(tableRow == null, IOException.class, "no table information in the metafile");
225 
226             // create DataTable
227             ListDataTable dataTable = new ListDataTable(tableRow.get("id"), tableRow.get("description"), columns);
228 
229             // Assemble the serializer array
230             TextSerializer<?>[] serializers = new TextSerializer[dataTable.getNumberOfColumns()];
231             for (int i = 0; i < dataTable.getNumberOfColumns(); i++)
232             {
233                 DataColumn<?> column = dataTable.getColumns().get(i);
234                 serializers[i] = TextSerializer.resolve(column.getValueType());
235             }
236 
237             // Read the data file
238             try (NamedCsvReader csvReader =
239                     NamedCsvReader.builder().fieldSeparator(separator).quoteCharacter(quotechar).build(reader))
240             {
241                 Set<String> header = csvReader.getHeader();
242                 Throw.when(header.size() != columns.size(), IOException.class,
243                         "Number of columns in the data file does not match column metadata size");
244                 for (int i = 0; i < columns.size(); i++)
245                 {
246                     Throw.when(!header.contains(columns.get(i).getId()), IOException.class,
247                             "Header with id %s not found in the data file", columns.get(i).getId());
248                 }
249 
250                 // Read the data file records
251                 csvReader.forEach(row ->
252                 {
253                     Object[] values = new Object[columns.size()];
254                     for (int i = 0; i < columns.size(); i++)
255                     {
256                         values[i] = serializers[i].deserialize(row.getField(columns.get(i).getId()));
257                     }
258                     dataTable.addRecord(values);
259                 });
260                 return dataTable;
261             }
262         }
263     }
264 
265     /**
266      * Read the data from the CSV-file into the data table. Use the metadata to reconstruct the data table.
267      * @param reader Reader; the reader that can read the data, e.g. from a file
268      * @param metaReader Reader; the writer for the metadata
269      * @return dataTable the data table reconstructed from the meta data and filled with the data
270      * @throws IOException when the CSV data was not formatted right
271      * @throws TextSerializationException on unknown data type for serialization
272      */
273     public static DataTable readData(final Reader reader, final Reader metaReader)
274             throws IOException, TextSerializationException
275     {
276         return readData(reader, metaReader, ',', '"');
277     }
278 
279     /**
280      * Read the data from the CSV-file into the data table. Use the metadata to reconstruct the data table.
281      * @param filename String; the file name to read the data from
282      * @param metaFilename String; the file name to read the metadata from
283      * @return dataTable the data table reconstructed from the meta data and filled with the data
284      * @throws IOException when the CSV data was not formatted right
285      * @throws TextSerializationException on unknown data type for serialization
286      */
287     public static DataTable readData(final String filename, final String metaFilename)
288             throws IOException, TextSerializationException
289     {
290         FileReader fr = null;
291         FileReader mfr = null;
292         try
293         {
294             fr = new FileReader(filename);
295             mfr = new FileReader(metaFilename);
296             return readData(fr, mfr);
297         }
298         finally
299         {
300             if (null != fr)
301             {
302                 fr.close(); // May have already been closed when the CSV reader was closed, but multiple close is harmless
303             }
304             if (null != mfr)
305             {
306                 mfr.close();
307             }
308         }
309     }
310 
311 }