View Javadoc
1   package org.djutils.data.csv;
2   
3   import java.io.FileReader;
4   import java.io.FileWriter;
5   import java.io.IOException;
6   import java.io.Reader;
7   import java.io.Writer;
8   import java.util.ArrayList;
9   import java.util.List;
10  
11  import org.djutils.data.DataColumn;
12  import org.djutils.data.DataRecord;
13  import org.djutils.data.DataTable;
14  import org.djutils.data.ListDataTable;
15  import org.djutils.data.SimpleDataColumn;
16  import org.djutils.data.serialization.TextSerializationException;
17  import org.djutils.data.serialization.TextSerializer;
18  import org.djutils.exceptions.Throw;
19  import org.djutils.primitives.Primitive;
20  
21  import de.siegmar.fastcsv.reader.CsvParser;
22  import de.siegmar.fastcsv.reader.CsvReader;
23  import de.siegmar.fastcsv.reader.CsvRow;
24  import de.siegmar.fastcsv.writer.CsvAppender;
25  import de.siegmar.fastcsv.writer.CsvWriter;
26  
27  /**
28   * CSVData takes care of reading and writing of table data in CSV format. The class can be used, e.g., as follows:
29   * 
30   * <pre>
31   * DataTable dataTable = new ListDataTable("data", "dataTable", columns);
32   * Writer writer = new FileWriter("c:/data/data.csv");
33   * Writer metaWriter = new FileWriter("c:/data/data.meta.csv");
34   * CSVData.writeData(writer, metaWriter, dataTable);
35   * </pre>
36   * 
37   * Copyright (c) 2020-2020 Delft University of Technology, Jaffalaan 5, 2628 BX Delft, the Netherlands. All rights reserved. See
38   * for project information <a href="https://djutils.org" target="_blank"> https://djutils.org</a>. The DJUTILS project is
39   * distributed under a three-clause BSD-style license, which can be found at
40   * <a href="https://djutils.org/docs/license.html" target="_blank"> https://djutils.org/docs/license.html</a>. <br>
41   * @author <a href="https://www.tudelft.nl/averbraeck">Alexander Verbraeck</a>
42   * @author <a href="https://www.tudelft.nl/pknoppers">Peter Knoppers</a>
43   * @author <a href="http://www.transport.citg.tudelft.nl">Wouter Schakel</a>
44   */
45  public final class CSVData
46  {
47      /**
48       * Utility class, no public constructor.
49       */
50      private CSVData()
51      {
52          // utility class
53      }
54  
55      /**
56       * Write the data from the data table in CSV format. The writer writes the data, whereas the metaWriter writes the metadata.
57       * The metadata consists of a CSV file with three columns: the id, the description, and the class. The first row after the
58       * header contains the id, description, and class of the data table itself. The second and further rows contain information
59       * about the columns of the data table.
60       * @param writer Writer; the writer that writes the data, e.g. to a file
61       * @param metaWriter Writer; the writer for the metadata
62   * @param dataTable DataTable; the data table to write
63  
64       * @param separator char; the delimiter to use for separating entries
65       * @param quotechar char; the character to use for quoted elements
66       * @param escapechar char; the character to use for escaping quotechars or escapechars
67       * @param lineEnd String; the line feed terminator to use
68       * @throws IOException on I/O error when writing the data
69       * @throws TextSerializationException on unknown data type for serialization
70       */
71      public static void writeData(final Writer writer, final Writer metaWriter, final DataTable dataTable, final char separator,
72              final char quotechar, final char escapechar, final String lineEnd) throws IOException, TextSerializationException
73      {
74          // Write the metadata file
75          CsvWriter csvMetaWriter = null;
76          CsvWriter csvWriter = null;
77          csvMetaWriter = new CsvWriter();
78          csvMetaWriter.setFieldSeparator(separator);
79          csvMetaWriter.setTextDelimiter(quotechar);
80          csvMetaWriter.setLineDelimiter(lineEnd.toCharArray());
81          try (CsvAppender csvMetaAppender = csvMetaWriter.append(metaWriter))
82          {
83              csvMetaAppender.appendLine("id", "description", "className");
84              csvMetaAppender.appendLine(dataTable.getId(), dataTable.getDescription(), dataTable.getClass().getName());
85              for (DataColumn<?> column : dataTable.getColumns())
86              {
87                  csvMetaAppender.appendLine(column.getId(), column.getDescription(), column.getValueType().getName());
88              }
89  
90              // Assemble the serializer array
91              TextSerializer<?>[] serializers = new TextSerializer[dataTable.getNumberOfColumns()];
92              for (int i = 0; i < dataTable.getNumberOfColumns(); i++)
93              {
94                  DataColumn<?> column = dataTable.getColumns().get(i);
95                  serializers[i] = TextSerializer.resolve(column.getValueType());
96              }
97  
98              // Write the data file
99              csvWriter = new CsvWriter();
100             csvWriter.setFieldSeparator(separator);
101             csvWriter.setTextDelimiter(quotechar);
102             csvWriter.setLineDelimiter(lineEnd.toCharArray());
103             try (CsvAppender csvAppender = csvWriter.append(writer))
104             {
105                 csvAppender.appendLine(dataTable.getColumnIds());
106                 String[] textFields = new String[dataTable.getNumberOfColumns()];
107                 for (DataRecord record : dataTable)
108                 {
109                     Object[] values = record.getValues();
110                     for (int i = 0; i < dataTable.getNumberOfColumns(); i++)
111                     {
112                         textFields[i] = serializers[i].serialize(values[i]);
113                     }
114                     csvAppender.appendLine(textFields);
115                 }
116             }
117         }
118     }
119 
120     /**
121      * Write the data from the data table in CSV format. The writer writes the data, whereas the metaWriter writes the metadata.
122      * The metadata consists of a CSV file with three columns: the id, the description, and the class. The first row after the
123      * header contains the id, description, and class of the data table itself. The second and further rows contain information
124      * about the columns of the data table.
125      * @param writer Writer; the writer that writes the data, e.g. to a file
126      * @param metaWriter Writer; the writer for the metadata
127  * @param dataTable DataTable; the data table to write
128 
129      * @throws IOException on I/O error when writing the data
130      * @throws TextSerializationException on unknown data type for serialization
131      */
132     public static void writeData(final Writer writer, final Writer metaWriter, final DataTable dataTable)
133             throws IOException, TextSerializationException
134     {
135         writeData(writer, metaWriter, dataTable, ',', '"', '\\', "\n");
136     }
137 
138     /**
139      * Write the data from the data table in CSV format.
140      * @param filename String; the file name to write the data to
141      * @param metaFilename String; the file name to write the metadata to
142  * @param dataTable DataTable; the data table to write
143 
144      * @throws IOException on I/O error when writing the data
145      * @throws TextSerializationException on unknown data type for serialization
146      */
147     public static void writeData(final String filename, final String metaFilename, final DataTable dataTable)
148             throws IOException, TextSerializationException
149     {
150         FileWriter fw = null;
151         FileWriter mfw = null;
152         try
153         {
154             fw = new FileWriter(filename);
155             mfw = new FileWriter(metaFilename);
156             writeData(fw, mfw, dataTable);
157         }
158         finally
159         {
160             if (null != fw)
161             {
162                 fw.close(); // May have already been closed when the CSV writer was closed, but multiple close is harmless
163             }
164             if (null != mfw)
165             {
166                 mfw.close();
167             }
168         }
169     }
170 
171     /**
172      * Read the data from the CSV-file into the data table. Use the metadata to reconstruct the data table.
173      * @param reader Reader; the reader that can read the data, e.g. from a file
174      * @param metaReader Reader; the writer for the metadata
175      * @return dataTable the data table reconstructed from the meta data and filled with the data
176      * @param separator char; the delimiter to use for separating entries
177      * @param quotechar char; the character to use for quoted elements
178      * @param escapechar char; the character to use for escaping quotechars or escapechars
179      * @param lineEnd String; the line feed terminator to use
180      * @throws IOException when the CSV data was not formatted right
181      * @throws TextSerializationException on unknown data type for serialization
182      */
183     public static DataTable readData(final Reader reader, final Reader metaReader, final char separator, final char quotechar,
184             final char escapechar, final String lineEnd) throws IOException, TextSerializationException
185     {
186         // Read the metadata file and reconstruct the data table
187         CsvReader csvMetaReader = new CsvReader();
188         csvMetaReader.setFieldSeparator(separator);
189         csvMetaReader.setTextDelimiter(quotechar);
190         List<DataColumn<?>> columns = new ArrayList<>();
191         try (CsvParser csvMetaParser = csvMetaReader.parse(metaReader))
192         {
193             CsvRow row = csvMetaParser.nextRow();
194             Throw.when(row == null, IOException.class, "metafile does not contain header row");
195             List<String> header = row.getFields();
196             Throw.when(
197                     header.size() != 3 || !"id".equals(header.get(0)) || !"description".equals(header.get(1))
198                             || !"className".equals(header.get(2)),
199                     IOException.class,
200                     "header of the metafile does not contain 'id, description, className' as fields, but %s: ", header);
201 
202             // table metadata
203             row = csvMetaParser.nextRow();
204             Throw.when(row == null, IOException.class, "no table information in the metafile");
205             List<String> tableLine = row.getFields();
206             Throw.when(tableLine.size() != 3, IOException.class, "table data in the metafile does not contain 3 fields");
207             Throw.when(!tableLine.get(2).endsWith("ListDataTable"), IOException.class,
208                     "Currently, this method can only recreate a ListDataTable");
209 
210             // column metadata
211             while ((row = csvMetaParser.nextRow()) != null)
212             {
213                 List<String> line = row.getFields();
214                 Throw.when(line.size() != 3, IOException.class, "column data in the metafile does not contain 3 fields");
215                 String type = line.get(2);
216                 Class<?> valueClass = Primitive.forName(type);
217                 if (valueClass == null)
218                 {
219                     try
220                     {
221                         valueClass = Class.forName(type);
222                     }
223                     catch (ClassNotFoundException exception)
224                     {
225                         throw new IOException("Could not find class " + type, exception);
226                     }
227                 }
228                 @SuppressWarnings({"rawtypes", "unchecked"})
229                 DataColumn<?> column = new SimpleDataColumn(line.get(0), line.get(1), valueClass);
230                 columns.add(column);
231             }
232 
233             // create DataTable
234             ListDataTablestDataTable">ListDataTable dataTable = new ListDataTable(tableLine.get(0), tableLine.get(1), columns);
235 
236             // Assemble the serializer array
237             TextSerializer<?>[] serializers = new TextSerializer[dataTable.getNumberOfColumns()];
238             for (int i = 0; i < dataTable.getNumberOfColumns(); i++)
239             {
240                 DataColumn<?> column = dataTable.getColumns().get(i);
241                 serializers[i] = TextSerializer.resolve(column.getValueType());
242             }
243 
244             // Read the data file header
245             CsvReader csvReader = new CsvReader();
246             csvReader.setFieldSeparator(separator);
247             csvReader.setTextDelimiter(quotechar);
248             try (CsvParser csvParser = csvReader.parse(reader))
249             {
250                 row = csvParser.nextRow();
251                 Throw.when(row == null, IOException.class, "file does not contain header row");
252                 header = row.getFields();
253                 Throw.when(header.size() != columns.size(), IOException.class,
254                         "Number of columns in the data file does not match column metadata size");
255                 for (int i = 0; i < header.size(); i++)
256                 {
257                     Throw.when(!header.get(i).equals(columns.get(i).getId()), IOException.class,
258                             "Header for column %d in the data file does not match column metadata info", i);
259                 }
260 
261                 // Read the data file records
262                 List<String> data;
263                 while ((row = csvParser.nextRow()) != null)
264                 {
265                     data = row.getFields();
266                     Object[] values = new Object[columns.size()];
267                     for (int i = 0; i < values.length; i++)
268                     {
269                         values[i] = serializers[i].deserialize(data.get(i));
270                     }
271                     dataTable.addRecord(values);
272                 }
273                 return dataTable;
274             }
275         }
276     }
277 
278     /**
279      * Read the data from the CSV-file into the data table. Use the metadata to reconstruct the data table.
280      * @param reader Reader; the reader that can read the data, e.g. from a file
281      * @param metaReader Reader; the writer for the metadata
282      * @return dataTable the data table reconstructed from the meta data and filled with the data
283      * @throws IOException when the CSV data was not formatted right
284      * @throws TextSerializationException on unknown data type for serialization
285      */
286     public static DataTable readData(final Reader reader, final Reader metaReader)
287             throws IOException, TextSerializationException
288     {
289         return readData(reader, metaReader, ',', '"', '\\', "\n");
290     }
291 
292     /**
293      * Read the data from the CSV-file into the data table. Use the metadata to reconstruct the data table.
294      * @param filename String; the file name to read the data from
295      * @param metaFilename String; the file name to read the metadata from
296      * @return dataTable the data table reconstructed from the meta data and filled with the data
297      * @throws IOException when the CSV data was not formatted right
298      * @throws TextSerializationException on unknown data type for serialization
299      */
300     public static DataTable readData(final String filename, final String metaFilename)
301             throws IOException, TextSerializationException
302     {
303         FileReader fr = null;
304         FileReader mfr = null;
305         try
306         {
307             fr = new FileReader(filename);
308             mfr = new FileReader(metaFilename);
309             return readData(fr, mfr);
310         }
311         finally
312         {
313             if (null != fr)
314             {
315                 fr.close(); // May have already been closed when the CSV reader was closed, but multiple close is harmless
316             }
317             if (null != mfr)
318             {
319                 mfr.close();
320             }
321         }
322     }
323 
324 }