1 package org.djutils.data.csv;
2
3 import java.io.FileReader;
4 import java.io.FileWriter;
5 import java.io.IOException;
6 import java.io.Reader;
7 import java.io.Writer;
8 import java.util.ArrayList;
9 import java.util.Iterator;
10 import java.util.LinkedHashMap;
11 import java.util.List;
12 import java.util.Map;
13 import java.util.Set;
14
15 import org.djutils.data.DataColumn;
16 import org.djutils.data.DataRecord;
17 import org.djutils.data.DataTable;
18 import org.djutils.data.ListDataTable;
19 import org.djutils.data.SimpleDataColumn;
20 import org.djutils.data.serialization.TextSerializationException;
21 import org.djutils.data.serialization.TextSerializer;
22 import org.djutils.exceptions.Throw;
23 import org.djutils.primitives.Primitive;
24
25 import de.siegmar.fastcsv.reader.NamedCsvReader;
26 import de.siegmar.fastcsv.reader.NamedCsvRow;
27 import de.siegmar.fastcsv.writer.CsvWriter;
28 import de.siegmar.fastcsv.writer.LineDelimiter;
29
30 /**
31 * CSVData takes care of reading and writing of table data in CSV format. The class can be used, e.g., as follows:
32 *
33 * <pre>
34 * DataTable dataTable = new ListDataTable("data", "dataTable", columns);
35 * Writer writer = new FileWriter("c:/data/data.csv");
36 * Writer metaWriter = new FileWriter("c:/data/data.meta.csv");
37 * CSVData.writeData(writer, metaWriter, dataTable);
38 * </pre>
39 *
40 * Copyright (c) 2020-2022 Delft University of Technology, Jaffalaan 5, 2628 BX Delft, the Netherlands. All rights reserved. See
41 * for project information <a href="https://djutils.org" target="_blank"> https://djutils.org</a>. The DJUTILS project is
42 * distributed under a three-clause BSD-style license, which can be found at
43 * <a href="https://djutils.org/docs/license.html" target="_blank"> https://djutils.org/docs/license.html</a>. <br>
44 * @author <a href="https://www.tudelft.nl/averbraeck">Alexander Verbraeck</a>
45 * @author <a href="https://www.tudelft.nl/pknoppers">Peter Knoppers</a>
46 * @author <a href="http://www.transport.citg.tudelft.nl">Wouter Schakel</a>
47 */
48 public final class CSVData
49 {
50 /**
51 * Utility class, no public constructor.
52 */
53 private CSVData()
54 {
55 // utility class
56 }
57
58 /**
59 * Write the data from the data table in CSV format. The writer writes the data, whereas the metaWriter writes the metadata.
60 * The metadata consists of a CSV file with three columns: the id, the description, and the class. The first row after the
61 * header contains the id, description, and class of the data table itself. The second and further rows contain information
62 * about the columns of the data table.
63 * @param writer Writer; the writer that writes the data, e.g. to a file
64 * @param metaWriter Writer; the writer for the metadata
65 * @param dataTable DataTable; the data table to write
66 * @param separator char; the delimiter to use for separating entries
67 * @param quotechar char; the character to use for quoted elements
68 * @param lineDelimiter String; the line terminator to use, can be LineDelimiter.CR, LF, CRLF or PLATFORM
69 * @throws IOException on I/O error when writing the data
70 * @throws TextSerializationException on unknown data type for serialization
71 */
72 public static void writeData(final Writer writer, final Writer metaWriter, final DataTable dataTable, final char separator,
73 final char quotechar, final LineDelimiter lineDelimiter) throws IOException, TextSerializationException
74 {
75 // Write the metadata file
76 try (CsvWriter csvMetaWriter = CsvWriter.builder().fieldSeparator(separator).quoteCharacter(quotechar)
77 .lineDelimiter(lineDelimiter).build(metaWriter))
78 {
79 csvMetaWriter.writeRow("id", "description", "className");
80 csvMetaWriter.writeRow(dataTable.getId(), dataTable.getDescription(), dataTable.getClass().getName());
81 for (DataColumn<?> column : dataTable.getColumns())
82 {
83 csvMetaWriter.writeRow(column.getId(), column.getDescription(), column.getValueType().getName());
84 }
85
86 // Assemble the serializer array
87 TextSerializer<?>[] serializers = new TextSerializer[dataTable.getNumberOfColumns()];
88 for (int i = 0; i < dataTable.getNumberOfColumns(); i++)
89 {
90 DataColumn<?> column = dataTable.getColumns().get(i);
91 serializers[i] = TextSerializer.resolve(column.getValueType());
92 }
93
94 // Write the data file
95 try (CsvWriter csvWriter = CsvWriter.builder().fieldSeparator(separator).quoteCharacter(quotechar)
96 .lineDelimiter(lineDelimiter).build(writer))
97 {
98 csvWriter.writeRow(dataTable.getColumnIds());
99 String[] textFields = new String[dataTable.getNumberOfColumns()];
100 for (DataRecord record : dataTable)
101 {
102 Object[] values = record.getValues();
103 for (int i = 0; i < dataTable.getNumberOfColumns(); i++)
104 {
105 textFields[i] = serializers[i].serialize(values[i]);
106 }
107 csvWriter.writeRow(textFields);
108 }
109 }
110 }
111 }
112
113 /**
114 * Write the data from the data table in CSV format. The writer writes the data, whereas the metaWriter writes the metadata.
115 * The metadata consists of a CSV file with three columns: the id, the description, and the class. The first row after the
116 * header contains the id, description, and class of the data table itself. The second and further rows contain information
117 * about the columns of the data table. The line ending used will be CRLF which is RFC 4180 compliant.
118 * @param writer Writer; the writer that writes the data, e.g. to a file
119 * @param metaWriter Writer; the writer for the metadata
120 * @param dataTable DataTable; the data table to write
121 * @throws IOException on I/O error when writing the data
122 * @throws TextSerializationException on unknown data type for serialization
123 */
124 public static void writeData(final Writer writer, final Writer metaWriter, final DataTable dataTable)
125 throws IOException, TextSerializationException
126 {
127 writeData(writer, metaWriter, dataTable, ',', '"', LineDelimiter.CRLF);
128 }
129
130 /**
131 * Write the data from the data table in CSV format.
132 * @param filename String; the file name to write the data to
133 * @param metaFilename String; the file name to write the metadata to
134 * @param dataTable DataTable; the data table to write
135 * @throws IOException on I/O error when writing the data
136 * @throws TextSerializationException on unknown data type for serialization
137 */
138 public static void writeData(final String filename, final String metaFilename, final DataTable dataTable)
139 throws IOException, TextSerializationException
140 {
141 FileWriter fw = null;
142 FileWriter mfw = null;
143 try
144 {
145 fw = new FileWriter(filename);
146 mfw = new FileWriter(metaFilename);
147 writeData(fw, mfw, dataTable);
148 }
149 finally
150 {
151 if (null != fw)
152 {
153 fw.close(); // May have already been closed when the CSV writer was closed, but multiple close is harmless
154 }
155 if (null != mfw)
156 {
157 mfw.close();
158 }
159 }
160 }
161
162 /**
163 * Read the data from the CSV-file into the data table. Use the metadata to reconstruct the data table.
164 * @param reader Reader; the reader that can read the data, e.g. from a file
165 * @param metaReader Reader; the writer for the metadata
166 * @return dataTable the data table reconstructed from the meta data and filled with the data
167 * @param separator char; the delimiter to use for separating entries
168 * @param quotechar char; the character to use for quoted elements
169 * @throws IOException when the CSV data was not formatted right
170 * @throws TextSerializationException on unknown data type for serialization
171 */
172 public static DataTable readData(final Reader reader, final Reader metaReader, final char separator, final char quotechar)
173 throws IOException, TextSerializationException
174 {
175 // Read the metadata file and reconstruct the data table
176 List<DataColumn<?>> columns = new ArrayList<>();
177 try (NamedCsvReader csvMetaReader =
178 NamedCsvReader.builder().fieldSeparator(separator).quoteCharacter(quotechar).build(metaReader))
179 {
180 Set<String> metaHeader = csvMetaReader.getHeader();
181 Throw.when(
182 metaHeader.size() != 3 || !metaHeader.contains("id") || !metaHeader.contains("description")
183 || !metaHeader.contains("className"),
184 IOException.class,
185 "header of the metafile does not contain 'id, description, className' as fields, but %s: ", metaHeader);
186
187 // table metadata
188 Map<String, String> tableRow = new LinkedHashMap<>();
189 Iterator<NamedCsvRow> it = csvMetaReader.iterator();
190 while (it.hasNext())
191 {
192 NamedCsvRow row = it.next();
193 // table metadata
194 if (tableRow.size() == 0)
195 {
196 tableRow.putAll(row.getFields());
197 if (!tableRow.get("className").endsWith("ListDataTable"))
198 {
199 throw new IOException("Currently, this method can only recreate a ListDataTable");
200 }
201 }
202 else
203 {
204 // column metadata
205 String type = row.getField("className");
206 Class<?> valueClass = Primitive.forName(type);
207 if (valueClass == null)
208 {
209 try
210 {
211 valueClass = Class.forName(type);
212 }
213 catch (ClassNotFoundException exception)
214 {
215 throw new IOException("Could not find class " + type, exception);
216 }
217 }
218 @SuppressWarnings({"rawtypes", "unchecked"})
219 DataColumn<?> column = new SimpleDataColumn(row.getField("id"), row.getField("description"), valueClass);
220 columns.add(column);
221 }
222 }
223
224 Throw.when(tableRow == null, IOException.class, "no table information in the metafile");
225
226 // create DataTable
227 ListDataTable dataTable = new ListDataTable(tableRow.get("id"), tableRow.get("description"), columns);
228
229 // Assemble the serializer array
230 TextSerializer<?>[] serializers = new TextSerializer[dataTable.getNumberOfColumns()];
231 for (int i = 0; i < dataTable.getNumberOfColumns(); i++)
232 {
233 DataColumn<?> column = dataTable.getColumns().get(i);
234 serializers[i] = TextSerializer.resolve(column.getValueType());
235 }
236
237 // Read the data file
238 try (NamedCsvReader csvReader =
239 NamedCsvReader.builder().fieldSeparator(separator).quoteCharacter(quotechar).build(reader))
240 {
241 Set<String> header = csvReader.getHeader();
242 Throw.when(header.size() != columns.size(), IOException.class,
243 "Number of columns in the data file does not match column metadata size");
244 for (int i = 0; i < columns.size(); i++)
245 {
246 Throw.when(!header.contains(columns.get(i).getId()), IOException.class,
247 "Header with id %s not found in the data file", columns.get(i).getId());
248 }
249
250 // Read the data file records
251 csvReader.forEach(row ->
252 {
253 Object[] values = new Object[columns.size()];
254 for (int i = 0; i < columns.size(); i++)
255 {
256 values[i] = serializers[i].deserialize(row.getField(columns.get(i).getId()));
257 }
258 dataTable.addRecord(values);
259 });
260 return dataTable;
261 }
262 }
263 }
264
265 /**
266 * Read the data from the CSV-file into the data table. Use the metadata to reconstruct the data table.
267 * @param reader Reader; the reader that can read the data, e.g. from a file
268 * @param metaReader Reader; the writer for the metadata
269 * @return dataTable the data table reconstructed from the meta data and filled with the data
270 * @throws IOException when the CSV data was not formatted right
271 * @throws TextSerializationException on unknown data type for serialization
272 */
273 public static DataTable readData(final Reader reader, final Reader metaReader)
274 throws IOException, TextSerializationException
275 {
276 return readData(reader, metaReader, ',', '"');
277 }
278
279 /**
280 * Read the data from the CSV-file into the data table. Use the metadata to reconstruct the data table.
281 * @param filename String; the file name to read the data from
282 * @param metaFilename String; the file name to read the metadata from
283 * @return dataTable the data table reconstructed from the meta data and filled with the data
284 * @throws IOException when the CSV data was not formatted right
285 * @throws TextSerializationException on unknown data type for serialization
286 */
287 public static DataTable readData(final String filename, final String metaFilename)
288 throws IOException, TextSerializationException
289 {
290 FileReader fr = null;
291 FileReader mfr = null;
292 try
293 {
294 fr = new FileReader(filename);
295 mfr = new FileReader(metaFilename);
296 return readData(fr, mfr);
297 }
298 finally
299 {
300 if (null != fr)
301 {
302 fr.close(); // May have already been closed when the CSV reader was closed, but multiple close is harmless
303 }
304 if (null != mfr)
305 {
306 mfr.close();
307 }
308 }
309 }
310
311 }