View Javadoc
1   package org.djutils.data.csv;
2   
3   import java.io.FileReader;
4   import java.io.FileWriter;
5   import java.io.IOException;
6   import java.io.Reader;
7   import java.io.Writer;
8   import java.util.ArrayList;
9   import java.util.List;
10  
11  import org.djutils.data.DataColumn;
12  import org.djutils.data.DataRecord;
13  import org.djutils.data.DataTable;
14  import org.djutils.data.ListDataTable;
15  import org.djutils.data.SimpleDataColumn;
16  import org.djutils.data.serialization.TextSerializationException;
17  import org.djutils.data.serialization.TextSerializer;
18  import org.djutils.exceptions.Throw;
19  import org.djutils.primitives.Primitive;
20  
21  import com.opencsv.CSVParserBuilder;
22  import com.opencsv.CSVReader;
23  import com.opencsv.CSVReaderBuilder;
24  import com.opencsv.CSVWriter;
25  import com.opencsv.ICSVWriter;
26  import com.opencsv.exceptions.CsvValidationException;
27  
28  /**
29   * CSVData takes care of reading and writing of table data in CSV format. The class can be used, e.g., as follows:
30   * 
31   * <pre>
32   * DataTable dataTable = new ListDataTable("data", "dataTable", columns);
33   * Writer writer = new FileWriter("c:/data/data.csv");
34   * Writer metaWriter = new FileWriter("c:/data/data.meta.csv");
35   * CSVData.writeData(writer, metaWriter, dataTable);
36   * </pre>
37   * 
38   * Copyright (c) 2020-2020 Delft University of Technology, Jaffalaan 5, 2628 BX Delft, the Netherlands. All rights reserved. See
39   * for project information <a href="https://djutils.org" target="_blank"> https://djutils.org</a>. The DJUTILS project is
40   * distributed under a three-clause BSD-style license, which can be found at
41   * <a href="https://djutils.org/docs/license.html" target="_blank"> https://djutils.org/docs/license.html</a>. <br>
42   * @author <a href="https://www.tudelft.nl/averbraeck">Alexander Verbraeck</a>
43   * @author <a href="https://www.tudelft.nl/pknoppers">Peter Knoppers</a>
44   * @author <a href="http://www.transport.citg.tudelft.nl">Wouter Schakel</a>
45   */
46  public final class CSVData
47  {
48      /**
49       * Utility class, no public constructor.
50       */
51      private CSVData()
52      {
53          // utility class
54      }
55  
56      /**
57       * Write the data from the data table in CSV format. The writer writes the data, whereas the metaWriter writes the metadata.
58       * The metadata consists of a CSV file with three columns: the id, the description, and the class. The first row after the
59       * header contains the id, description, and class of the data table itself. The second and further rows contain information
60       * about the columns of the data table.
61       * @param writer Writer; the writer that writes the data, e.g. to a file
62       * @param metaWriter Writer; the writer for the metadata
63       * @param dataTable the data table to write
64       * @param separator char; the delimiter to use for separating entries
65       * @param quotechar char; the character to use for quoted elements
66       * @param escapechar char; the character to use for escaping quotechars or escapechars
67       * @param lineEnd String; the line feed terminator to use
68       * @throws IOException on I/O error when writing the data
69       * @throws TextSerializationException on unknown data type for serialization
70       */
71      public static void writeData(final Writer writer, final Writer metaWriter, final DataTable dataTable, final char separator,
72              final char quotechar, final char escapechar, final String lineEnd) throws IOException, TextSerializationException
73      {
74          // Write the metadata file
75          CSVWriter csvMetaWriter = null;
76          CSVWriter csvWriter = null;
77          try
78          {
79              csvMetaWriter = new CSVWriter(metaWriter, separator, quotechar, escapechar, lineEnd);
80              csvMetaWriter.writeNext(new String[] { "id", "description", "className" });
81              csvMetaWriter
82                      .writeNext(new String[] { dataTable.getId(), dataTable.getDescription(), dataTable.getClass().getName() });
83              for (DataColumn<?> column : dataTable.getColumns())
84              {
85                  csvMetaWriter
86                          .writeNext(new String[] { column.getId(), column.getDescription(), column.getValueType().getName() });
87              }
88  
89              // Assemble the serializer array
90              TextSerializer<?>[] serializers = new TextSerializer[dataTable.getNumberOfColumns()];
91              for (int i = 0; i < dataTable.getNumberOfColumns(); i++)
92              {
93                  DataColumn<?> column = dataTable.getColumns().get(i);
94                  serializers[i] = TextSerializer.resolve(column.getValueType());
95              }
96  
97              // Write the data file
98              csvWriter = new CSVWriter(writer, separator, quotechar, escapechar, lineEnd);
99              csvWriter.writeNext(dataTable.getColumnIds());
100             String[] textFields = new String[dataTable.getNumberOfColumns()];
101             for (DataRecord record : dataTable)
102             {
103                 Object[] values = record.getValues();
104                 for (int i = 0; i < dataTable.getNumberOfColumns(); i++)
105                 {
106                     textFields[i] = serializers[i].serialize(values[i]);
107                 }
108                 csvWriter.writeNext(textFields);
109             }
110         }
111         finally
112         {
113             if (null != csvMetaWriter)
114             {
115                 csvMetaWriter.close();
116             }
117             if (null != csvWriter)
118             {
119                 csvWriter.close();
120             }
121         }
122     }
123 
124     /**
125      * Write the data from the data table in CSV format. The writer writes the data, whereas the metaWriter writes the metadata.
126      * The metadata consists of a CSV file with three columns: the id, the description, and the class. The first row after the
127      * header contains the id, description, and class of the data table itself. The second and further rows contain information
128      * about the columns of the data table.
129      * @param writer Writer; the writer that writes the data, e.g. to a file
130      * @param metaWriter Writer; the writer for the metadata
131      * @param dataTable the data table to write
132      * @throws IOException on I/O error when writing the data
133      * @throws TextSerializationException on unknown data type for serialization
134      */
135     public static void writeData(final Writer writer, final Writer metaWriter, final DataTable dataTable)
136             throws IOException, TextSerializationException
137     {
138         writeData(writer, metaWriter, dataTable, ICSVWriter.DEFAULT_SEPARATOR, ICSVWriter.DEFAULT_QUOTE_CHARACTER, '\\',
139                 ICSVWriter.DEFAULT_LINE_END);
140     }
141 
142     /**
143      * Write the data from the data table in CSV format.
144      * @param filename String; the file name to write the data to
145      * @param metaFilename String; the file name to write the metadata to
146      * @param dataTable the data table to write
147      * @throws IOException on I/O error when writing the data
148      * @throws TextSerializationException on unknown data type for serialization
149      */
150     public static void writeData(final String filename, final String metaFilename, final DataTable dataTable)
151             throws IOException, TextSerializationException
152     {
153         FileWriter fw = null;
154         FileWriter mfw = null;
155         try
156         {
157             fw = new FileWriter(filename);
158             mfw = new FileWriter(metaFilename);
159             writeData(fw, mfw, dataTable);
160         }
161         finally
162         {
163             if (null != fw)
164             {
165                 fw.close(); // May have already been closed when the CSV writer was closed, but multiple close is harmless
166             }
167             if (null != mfw)
168             {
169                 mfw.close();
170             }
171         }
172     }
173 
174     /**
175      * Read the data from the CSV-file into the data table. Use the metadata to reconstruct the data table.
176      * @param reader Reader; the reader that can read the data, e.g. from a file
177      * @param metaReader Reader; the writer for the metadata
178      * @return dataTable the data table reconstructed from the meta data and filled with the data
179      * @param separator char; the delimiter to use for separating entries
180      * @param quotechar char; the character to use for quoted elements
181      * @param escapechar char; the character to use for escaping quotechars or escapechars
182      * @param lineEnd String; the line feed terminator to use
183      * @throws IOException on I/O error when reading the data
184      * @throws CsvValidationException when the CSV data was not formatted right
185      * @throws TextSerializationException on unknown data type for serialization
186      */
187     public static DataTable readData(final Reader reader, final Reader metaReader, final char separator, final char quotechar,
188             final char escapechar, final String lineEnd) throws IOException, CsvValidationException, TextSerializationException
189     {
190         CSVReader csvMetaReader = null;
191         CSVReader csvReader = null;
192         try
193         {
194             // Read the metadata file and reconstruct the data table
195             csvMetaReader = new CSVReaderBuilder(metaReader).withCSVParser(
196                     new CSVParserBuilder().withSeparator(separator).withQuoteChar(quotechar).withEscapeChar(escapechar).build())
197                     .build();
198             List<DataColumn<?>> columns = new ArrayList<>();
199             String[] header = csvMetaReader.readNext();
200             Throw.when(
201                     header.length != 3 || !"id".equals(header[0]) || !"description".equals(header[1])
202                             || !"className".equals(header[2]),
203                     IOException.class, "header of the metafile does not contain 'id, description, className' as fields");
204 
205             // table metadata
206             String[] tableLine = csvMetaReader.readNext();
207             Throw.when(tableLine == null, IOException.class, "no table information in the metafile");
208             Throw.when(tableLine.length != 3, IOException.class, "table data in the metafile does not contain 3 fields");
209             Throw.when(!tableLine[2].endsWith("ListDataTable"), IOException.class,
210                     "Currently, this method can only recreate a ListDataTable");
211 
212             // column metadata
213             String[] line = csvMetaReader.readNext();
214             while (line != null)
215             {
216                 Throw.when(line.length != 3, IOException.class, "column data in the metafile does not contain 3 fields");
217                 String type = line[2];
218                 Class<?> valueClass = Primitive.forName(type);
219                 if (valueClass == null)
220                 {
221                     try
222                     {
223                         valueClass = Class.forName(type);
224                     }
225                     catch (ClassNotFoundException exception)
226                     {
227                         throw new IOException("Could not find class " + type, exception);
228                     }
229                 }
230                 @SuppressWarnings({ "rawtypes", "unchecked" })
231                 DataColumn<?> column = new SimpleDataColumn(line[0], line[1], valueClass);
232                 columns.add(column);
233                 line = csvMetaReader.readNext();
234             }
235 
236             // create DataTable
237             ListDataTablestDataTable">ListDataTable dataTable = new ListDataTable(tableLine[0], tableLine[1], columns);
238 
239             // Assemble the serializer array
240             TextSerializer<?>[] serializers = new TextSerializer[dataTable.getNumberOfColumns()];
241             for (int i = 0; i < dataTable.getNumberOfColumns(); i++)
242             {
243                 DataColumn<?> column = dataTable.getColumns().get(i);
244                 serializers[i] = TextSerializer.resolve(column.getValueType());
245             }
246 
247             // Read the data file header
248             csvReader = new CSVReaderBuilder(reader).withCSVParser(
249                     new CSVParserBuilder().withSeparator(separator).withQuoteChar(quotechar).withEscapeChar(escapechar).build())
250                     .build();
251             header = csvReader.readNext();
252             Throw.when(header.length != columns.size(), IOException.class,
253                     "Number of columns in the data file does not match column metadata size");
254             for (int i = 0; i < header.length; i++)
255             {
256                 Throw.when(!header[i].equals(columns.get(i).getId()), IOException.class,
257                         "Header for column %d in the data file does not match column metadata info", i);
258             }
259 
260             // Read the data file records
261             String[] data = csvReader.readNext();
262             while (data != null)
263             {
264                 Object[] values = new Object[columns.size()];
265                 for (int i = 0; i < values.length; i++)
266                 {
267                     values[i] = serializers[i].deserialize(data[i]);
268                 }
269                 dataTable.addRecord(values);
270                 data = csvReader.readNext();
271             }
272             return dataTable;
273         }
274         finally
275         {
276             if (null != csvMetaReader)
277             {
278                 csvMetaReader.close();
279             }
280             if (null != csvReader)
281             {
282                 csvReader.close();
283             }
284         }
285     }
286 
287     /**
288      * Read the data from the CSV-file into the data table. Use the metadata to reconstruct the data table.
289      * @param reader Reader; the reader that can read the data, e.g. from a file
290      * @param metaReader Reader; the writer for the metadata
291      * @return dataTable the data table reconstructed from the meta data and filled with the data
292      * @throws IOException on I/O error when reading the data
293      * @throws CsvValidationException when the CSV data was not formatted right
294      * @throws TextSerializationException on unknown data type for serialization
295      */
296     public static DataTable readData(final Reader reader, final Reader metaReader)
297             throws IOException, CsvValidationException, TextSerializationException
298     {
299         return readData(reader, metaReader, ICSVWriter.DEFAULT_SEPARATOR, ICSVWriter.DEFAULT_QUOTE_CHARACTER, '\\',
300                 ICSVWriter.DEFAULT_LINE_END);
301     }
302 
303     /**
304      * Read the data from the CSV-file into the data table. Use the metadata to reconstruct the data table.
305      * @param filename String; the file name to read the data from
306      * @param metaFilename String; the file name to read the metadata from
307      * @return dataTable the data table reconstructed from the meta data and filled with the data
308      * @throws IOException on I/O error when reading the data
309      * @throws CsvValidationException when the CSV data was not formatted right
310      * @throws TextSerializationException on unknown data type for serialization
311      */
312     public static DataTable readData(final String filename, final String metaFilename)
313             throws IOException, CsvValidationException, TextSerializationException
314     {
315         FileReader fr = null;
316         FileReader mfr = null;
317         try
318         {
319             fr = new FileReader(filename);
320             mfr = new FileReader(metaFilename);
321             return readData(fr, mfr);
322         }
323         finally
324         {
325             if (null != fr)
326             {
327                 fr.close(); // May have already been closed when the CSV reader was closed, but multiple close is harmless
328             }
329             if (null != mfr)
330             {
331                 mfr.close();
332             }
333         }
334     }
335 
336 }