View Javadoc
1   package org.djutils.data.csv;
2   
3   import java.io.FileReader;
4   import java.io.FileWriter;
5   import java.io.IOException;
6   import java.io.Reader;
7   import java.io.Writer;
8   import java.util.ArrayList;
9   import java.util.List;
10  
11  import org.djutils.data.DataColumn;
12  import org.djutils.data.DataRecord;
13  import org.djutils.data.DataTable;
14  import org.djutils.data.ListDataTable;
15  import org.djutils.data.SimpleDataColumn;
16  import org.djutils.data.serialization.TextSerializationException;
17  import org.djutils.data.serialization.TextSerializer;
18  import org.djutils.exceptions.Throw;
19  import org.djutils.primitives.Primitive;
20  
21  import de.siegmar.fastcsv.reader.CsvParser;
22  import de.siegmar.fastcsv.reader.CsvReader;
23  import de.siegmar.fastcsv.reader.CsvRow;
24  import de.siegmar.fastcsv.writer.CsvAppender;
25  import de.siegmar.fastcsv.writer.CsvWriter;
26  
27  /**
28   * CSVData takes care of reading and writing of table data in CSV format. The class can be used, e.g., as follows:
29   * 
30   * <pre>
31   * DataTable dataTable = new ListDataTable("data", "dataTable", columns);
32   * Writer writer = new FileWriter("c:/data/data.csv");
33   * Writer metaWriter = new FileWriter("c:/data/data.meta.csv");
34   * CSVData.writeData(writer, metaWriter, dataTable);
35   * </pre>
36   * 
37   * Copyright (c) 2020-2021 Delft University of Technology, Jaffalaan 5, 2628 BX Delft, the Netherlands. All rights reserved. See
38   * for project information <a href="https://djutils.org" target="_blank"> https://djutils.org</a>. The DJUTILS project is
39   * distributed under a three-clause BSD-style license, which can be found at
40   * <a href="https://djutils.org/docs/license.html" target="_blank"> https://djutils.org/docs/license.html</a>. <br>
41   * @author <a href="https://www.tudelft.nl/averbraeck">Alexander Verbraeck</a>
42   * @author <a href="https://www.tudelft.nl/pknoppers">Peter Knoppers</a>
43   * @author <a href="http://www.transport.citg.tudelft.nl">Wouter Schakel</a>
44   */
45  public final class CSVData
46  {
47      /**
48       * Utility class, no public constructor.
49       */
50      private CSVData()
51      {
52          // utility class
53      }
54  
55      /**
56       * Write the data from the data table in CSV format. The writer writes the data, whereas the metaWriter writes the metadata.
57       * The metadata consists of a CSV file with three columns: the id, the description, and the class. The first row after the
58       * header contains the id, description, and class of the data table itself. The second and further rows contain information
59       * about the columns of the data table.
60       * @param writer Writer; the writer that writes the data, e.g. to a file
61       * @param metaWriter Writer; the writer for the metadata
62       * @param dataTable DataTable; the data table to write
63       * @param separator char; the delimiter to use for separating entries
64       * @param quotechar char; the character to use for quoted elements
65       * @param escapechar char; the character to use for escaping quotechars or escapechars
66       * @param lineEnd String; the line feed terminator to use
67       * @throws IOException on I/O error when writing the data
68       * @throws TextSerializationException on unknown data type for serialization
69       */
70      public static void writeData(final Writer writer, final Writer metaWriter, final DataTable dataTable, final char separator,
71              final char quotechar, final char escapechar, final String lineEnd) throws IOException, TextSerializationException
72      {
73          // Write the metadata file
74          CsvWriter csvMetaWriter = null;
75          CsvWriter csvWriter = null;
76          csvMetaWriter = new CsvWriter();
77          csvMetaWriter.setFieldSeparator(separator);
78          csvMetaWriter.setTextDelimiter(quotechar);
79          csvMetaWriter.setLineDelimiter(lineEnd.toCharArray());
80          try (CsvAppender csvMetaAppender = csvMetaWriter.append(metaWriter))
81          {
82              csvMetaAppender.appendLine("id", "description", "className");
83              csvMetaAppender.appendLine(dataTable.getId(), dataTable.getDescription(), dataTable.getClass().getName());
84              for (DataColumn<?> column : dataTable.getColumns())
85              {
86                  csvMetaAppender.appendLine(column.getId(), column.getDescription(), column.getValueType().getName());
87              }
88  
89              // Assemble the serializer array
90              TextSerializer<?>[] serializers = new TextSerializer[dataTable.getNumberOfColumns()];
91              for (int i = 0; i < dataTable.getNumberOfColumns(); i++)
92              {
93                  DataColumn<?> column = dataTable.getColumns().get(i);
94                  serializers[i] = TextSerializer.resolve(column.getValueType());
95              }
96  
97              // Write the data file
98              csvWriter = new CsvWriter();
99              csvWriter.setFieldSeparator(separator);
100             csvWriter.setTextDelimiter(quotechar);
101             csvWriter.setLineDelimiter(lineEnd.toCharArray());
102             try (CsvAppender csvAppender = csvWriter.append(writer))
103             {
104                 csvAppender.appendLine(dataTable.getColumnIds());
105                 String[] textFields = new String[dataTable.getNumberOfColumns()];
106                 for (DataRecord record : dataTable)
107                 {
108                     Object[] values = record.getValues();
109                     for (int i = 0; i < dataTable.getNumberOfColumns(); i++)
110                     {
111                         textFields[i] = serializers[i].serialize(values[i]);
112                     }
113                     csvAppender.appendLine(textFields);
114                 }
115             }
116         }
117     }
118 
119     /**
120      * Write the data from the data table in CSV format. The writer writes the data, whereas the metaWriter writes the metadata.
121      * The metadata consists of a CSV file with three columns: the id, the description, and the class. The first row after the
122      * header contains the id, description, and class of the data table itself. The second and further rows contain information
123      * about the columns of the data table.
124      * @param writer Writer; the writer that writes the data, e.g. to a file
125      * @param metaWriter Writer; the writer for the metadata
126      * @param dataTable DataTable; the data table to write
127      * @throws IOException on I/O error when writing the data
128      * @throws TextSerializationException on unknown data type for serialization
129      */
130     public static void writeData(final Writer writer, final Writer metaWriter, final DataTable dataTable)
131             throws IOException, TextSerializationException
132     {
133         writeData(writer, metaWriter, dataTable, ',', '"', '\\', "\n");
134     }
135 
136     /**
137      * Write the data from the data table in CSV format.
138      * @param filename String; the file name to write the data to
139      * @param metaFilename String; the file name to write the metadata to
140      * @param dataTable DataTable; the data table to write
141      * @throws IOException on I/O error when writing the data
142      * @throws TextSerializationException on unknown data type for serialization
143      */
144     public static void writeData(final String filename, final String metaFilename, final DataTable dataTable)
145             throws IOException, TextSerializationException
146     {
147         FileWriter fw = null;
148         FileWriter mfw = null;
149         try
150         {
151             fw = new FileWriter(filename);
152             mfw = new FileWriter(metaFilename);
153             writeData(fw, mfw, dataTable);
154         }
155         finally
156         {
157             if (null != fw)
158             {
159                 fw.close(); // May have already been closed when the CSV writer was closed, but multiple close is harmless
160             }
161             if (null != mfw)
162             {
163                 mfw.close();
164             }
165         }
166     }
167 
168     /**
169      * Read the data from the CSV-file into the data table. Use the metadata to reconstruct the data table.
170      * @param reader Reader; the reader that can read the data, e.g. from a file
171      * @param metaReader Reader; the writer for the metadata
172      * @return dataTable the data table reconstructed from the meta data and filled with the data
173      * @param separator char; the delimiter to use for separating entries
174      * @param quotechar char; the character to use for quoted elements
175      * @param escapechar char; the character to use for escaping quotechars or escapechars
176      * @param lineEnd String; the line feed terminator to use
177      * @throws IOException when the CSV data was not formatted right
178      * @throws TextSerializationException on unknown data type for serialization
179      */
180     public static DataTable readData(final Reader reader, final Reader metaReader, final char separator, final char quotechar,
181             final char escapechar, final String lineEnd) throws IOException, TextSerializationException
182     {
183         // Read the metadata file and reconstruct the data table
184         CsvReader csvMetaReader = new CsvReader();
185         csvMetaReader.setFieldSeparator(separator);
186         csvMetaReader.setTextDelimiter(quotechar);
187         List<DataColumn<?>> columns = new ArrayList<>();
188         try (CsvParser csvMetaParser = csvMetaReader.parse(metaReader))
189         {
190             CsvRow row = csvMetaParser.nextRow();
191             Throw.when(row == null, IOException.class, "metafile does not contain header row");
192             List<String> header = row.getFields();
193             Throw.when(
194                     header.size() != 3 || !"id".equals(header.get(0)) || !"description".equals(header.get(1))
195                             || !"className".equals(header.get(2)),
196                     IOException.class,
197                     "header of the metafile does not contain 'id, description, className' as fields, but %s: ", header);
198 
199             // table metadata
200             row = csvMetaParser.nextRow();
201             Throw.when(row == null, IOException.class, "no table information in the metafile");
202             List<String> tableLine = row.getFields();
203             Throw.when(tableLine.size() != 3, IOException.class, "table data in the metafile does not contain 3 fields");
204             Throw.when(!tableLine.get(2).endsWith("ListDataTable"), IOException.class,
205                     "Currently, this method can only recreate a ListDataTable");
206 
207             // column metadata
208             while ((row = csvMetaParser.nextRow()) != null)
209             {
210                 List<String> line = row.getFields();
211                 Throw.when(line.size() != 3, IOException.class, "column data in the metafile does not contain 3 fields");
212                 String type = line.get(2);
213                 Class<?> valueClass = Primitive.forName(type);
214                 if (valueClass == null)
215                 {
216                     try
217                     {
218                         valueClass = Class.forName(type);
219                     }
220                     catch (ClassNotFoundException exception)
221                     {
222                         throw new IOException("Could not find class " + type, exception);
223                     }
224                 }
225                 @SuppressWarnings({"rawtypes", "unchecked"})
226                 DataColumn<?> column = new SimpleDataColumn(line.get(0), line.get(1), valueClass);
227                 columns.add(column);
228             }
229 
230             // create DataTable
231             ListDataTablestDataTable">ListDataTable dataTable = new ListDataTable(tableLine.get(0), tableLine.get(1), columns);
232 
233             // Assemble the serializer array
234             TextSerializer<?>[] serializers = new TextSerializer[dataTable.getNumberOfColumns()];
235             for (int i = 0; i < dataTable.getNumberOfColumns(); i++)
236             {
237                 DataColumn<?> column = dataTable.getColumns().get(i);
238                 serializers[i] = TextSerializer.resolve(column.getValueType());
239             }
240 
241             // Read the data file header
242             CsvReader csvReader = new CsvReader();
243             csvReader.setFieldSeparator(separator);
244             csvReader.setTextDelimiter(quotechar);
245             try (CsvParser csvParser = csvReader.parse(reader))
246             {
247                 row = csvParser.nextRow();
248                 Throw.when(row == null, IOException.class, "file does not contain header row");
249                 header = row.getFields();
250                 Throw.when(header.size() != columns.size(), IOException.class,
251                         "Number of columns in the data file does not match column metadata size");
252                 for (int i = 0; i < header.size(); i++)
253                 {
254                     Throw.when(!header.get(i).equals(columns.get(i).getId()), IOException.class,
255                             "Header for column %d in the data file does not match column metadata info", i);
256                 }
257 
258                 // Read the data file records
259                 List<String> data;
260                 while ((row = csvParser.nextRow()) != null)
261                 {
262                     data = row.getFields();
263                     Object[] values = new Object[columns.size()];
264                     for (int i = 0; i < values.length; i++)
265                     {
266                         values[i] = serializers[i].deserialize(data.get(i));
267                     }
268                     dataTable.addRecord(values);
269                 }
270                 return dataTable;
271             }
272         }
273     }
274 
275     /**
276      * Read the data from the CSV-file into the data table. Use the metadata to reconstruct the data table.
277      * @param reader Reader; the reader that can read the data, e.g. from a file
278      * @param metaReader Reader; the writer for the metadata
279      * @return dataTable the data table reconstructed from the meta data and filled with the data
280      * @throws IOException when the CSV data was not formatted right
281      * @throws TextSerializationException on unknown data type for serialization
282      */
283     public static DataTable readData(final Reader reader, final Reader metaReader)
284             throws IOException, TextSerializationException
285     {
286         return readData(reader, metaReader, ',', '"', '\\', "\n");
287     }
288 
289     /**
290      * Read the data from the CSV-file into the data table. Use the metadata to reconstruct the data table.
291      * @param filename String; the file name to read the data from
292      * @param metaFilename String; the file name to read the metadata from
293      * @return dataTable the data table reconstructed from the meta data and filled with the data
294      * @throws IOException when the CSV data was not formatted right
295      * @throws TextSerializationException on unknown data type for serialization
296      */
297     public static DataTable readData(final String filename, final String metaFilename)
298             throws IOException, TextSerializationException
299     {
300         FileReader fr = null;
301         FileReader mfr = null;
302         try
303         {
304             fr = new FileReader(filename);
305             mfr = new FileReader(metaFilename);
306             return readData(fr, mfr);
307         }
308         finally
309         {
310             if (null != fr)
311             {
312                 fr.close(); // May have already been closed when the CSV reader was closed, but multiple close is harmless
313             }
314             if (null != mfr)
315             {
316                 mfr.close();
317             }
318         }
319     }
320 
321 }