CsvData.java
package org.djutils.data.csv;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Consumer;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import org.djutils.data.Column;
import org.djutils.data.ListTable;
import org.djutils.data.Row;
import org.djutils.data.Table;
import org.djutils.data.serialization.TextSerializationException;
import org.djutils.data.serialization.TextSerializer;
import org.djutils.exceptions.Throw;
import org.djutils.io.CompressedFileWriter;
import org.djutils.primitives.Primitive;
import de.siegmar.fastcsv.reader.NamedCsvReader;
import de.siegmar.fastcsv.reader.NamedCsvRow;
import de.siegmar.fastcsv.writer.CsvWriter;
import de.siegmar.fastcsv.writer.LineDelimiter;
/**
* CsvData takes care of reading and writing of table data in CSV format. The class can be used, e.g., as follows:
*
* <pre>
* Table Table = new ListTable("data", "Table", columns);
* Writer writer = new FileWriter("c:/data/data.csv");
* Writer metaWriter = new FileWriter("c:/data/data.meta.csv");
* CsvData.writeData(writer, metaWriter, Table);
* </pre>
* <p>
* Copyright (c) 2020-2024 Delft University of Technology, PO Box 5, 2600 AA, Delft, the Netherlands. All rights reserved. <br>
* BSD-style license. See <a href="https://djutils.org/docs/current/djutils/licenses.html">DJUTILS License</a>.
* </p>
* @author <a href="https://github.com/averbraeck">Alexander Verbraeck</a>
* @author <a href="https://tudelft.nl/staff/p.knoppers-1">Peter Knoppers</a>
* @author <a href="https://dittlab.tudelft.nl">Wouter Schakel</a>
*/
public final class CsvData
{
/**
* Utility class, no public constructor.
*/
private CsvData()
{
// utility class
}
/**
* Write the data from the data table in CSV format. The writer writes the data, whereas the metaWriter writes the metadata.
* The metadata consists of a CSV file with three columns: the id, the description, and the class. The first row after the
* header contains the id, description, and class of the data table itself. The second and further rows contain information
* about the columns of the data table.
* @param writer Writer; the writer that writes the data, e.g. to a file
* @param metaWriter Writer; the writer for the metadata
* @param table Table; the data table to write
* @param separator char; the delimiter to use for separating entries
* @param quotechar char; the character to use for quoted elements
* @param lineDelimiter String; the line terminator to use, can be LineDelimiter.CR, LF, CRLF or PLATFORM
* @throws IOException on I/O error when writing the data
* @throws TextSerializationException on unknown data type for serialization
*/
public static void writeData(final Writer writer, final Writer metaWriter, final Table table, final char separator,
final char quotechar, final LineDelimiter lineDelimiter) throws IOException, TextSerializationException
{
writeMeta(metaWriter, true, table, separator, quotechar, lineDelimiter);
writeData(writer, true, table, separator, quotechar, lineDelimiter);
}
/**
* Write the data from the data table in CSV format. The data file and meta data file are zipped. The metadata consists of a
* CSV file with three columns: the id, the description, and the class. The first row after the header contains the id,
* description, and class of the data table itself. The second and further rows contain information about the columns of the
* data table.
* @param writer Writer; the writer that writes the data, e.g. to a file
* @param csvName String; name of the csv file within the zip file
* @param metaName String; name of the meta data file within the zip file
* @param table Table; the data table to write
* @param separator char; the delimiter to use for separating entries
* @param quotechar char; the character to use for quoted elements
* @param lineDelimiter String; the line terminator to use, can be LineDelimiter.CR, LF, CRLF or PLATFORM
* @throws IOException on I/O error when writing the data
* @throws TextSerializationException on unknown data type for serialization
*/
public static void writeZippedData(final CompressedFileWriter writer, final String csvName, final String metaName,
final Table table, final char separator, final char quotechar, final LineDelimiter lineDelimiter)
throws IOException, TextSerializationException
{
writeMeta(writer.next(metaName), false, table, separator, quotechar, lineDelimiter);
writeData(writer.next(csvName), true, table, separator, quotechar, lineDelimiter);
}
/**
* Write the data from the data table in CSV format. The data file and meta data file are zipped. The metadata consists of a
* CSV file with three columns: the id, the description, and the class. The first row after the header contains the id,
* description, and class of the data table itself. The second and further rows contain information about the columns of the
* data table.
* @param writer Writer; the writer that writes the data, e.g. to a file
* @param csvName String; name of the CSV file within the zip file
* @param metaName String; name of the meta data file within the zip file
* @param table Table; the data table to write
* @throws IOException on I/O error when writing the data
* @throws TextSerializationException on unknown data type for serialization
*/
public static void writeZippedData(final CompressedFileWriter writer, final String csvName, final String metaName,
final Table table) throws IOException, TextSerializationException
{
writeZippedData(writer, csvName, metaName, table, ',', '"', LineDelimiter.CRLF);
}
/**
* Writes the meta data.
* @param metaWriter Writer; the writer for the metadata
* @param closeWriter boolean; whether to close the stream
* @param table Table; the data table to write
* @param separator char; the delimiter to use for separating entries
* @param quotechar char; the character to use for quoted elements
* @param lineDelimiter String; the line terminator to use, can be LineDelimiter.CR, LF, CRLF or PLATFORM
* @throws IOException on I/O error when writing the data
*/
private static void writeMeta(final Writer metaWriter, final boolean closeWriter, final Table table, final char separator,
final char quotechar, final LineDelimiter lineDelimiter) throws IOException
{
CsvWriter csvMetaWriter = null;
try
{
csvMetaWriter = CsvWriter.builder().fieldSeparator(separator).quoteCharacter(quotechar).lineDelimiter(lineDelimiter)
.build(metaWriter);
csvMetaWriter.writeRow("id", "description", "className", "unit");
csvMetaWriter.writeRow(table.getId(), table.getDescription(), table.getClass().getName(), "");
for (Column<?> column : table.getColumns())
{
if (column.getUnit() == null)
{
csvMetaWriter.writeRow(column.getId(), column.getDescription(), column.getValueType().getName(), "");
}
else
{
csvMetaWriter.writeRow(column.getId(), column.getDescription(), column.getValueType().getName(),
column.getUnit());
}
}
}
finally
{
if (closeWriter && csvMetaWriter != null)
{
csvMetaWriter.close();
}
}
}
/**
* Writes the data.
* @param writer Writer; the writer that writes the data, e.g. to a file
* @param closeWriter boolean; whether to close the stream
* @param table Table; the data table to write
* @param separator char; the delimiter to use for separating entries
* @param quotechar char; the character to use for quoted elements
* @param lineDelimiter String; the line terminator to use, can be LineDelimiter.CR, LF, CRLF or PLATFORM
* @throws IOException on I/O error when writing the data
* @throws TextSerializationException on unknown data type for serialization
*/
private static void writeData(final Writer writer, final boolean closeWriter, final Table table, final char separator,
final char quotechar, final LineDelimiter lineDelimiter) throws IOException, TextSerializationException
{
// Assemble the serializer array
TextSerializer<?>[] serializers = new TextSerializer[table.getNumberOfColumns()];
for (int i = 0; i < table.getNumberOfColumns(); i++)
{
Column<?> column = table.getColumns().get(i);
serializers[i] = TextSerializer.resolve(column.getValueType());
}
// Write the data file
CsvWriter csvWriter = null;
try
{
csvWriter = CsvWriter.builder().fieldSeparator(separator).quoteCharacter(quotechar).lineDelimiter(lineDelimiter)
.build(writer);
csvWriter.writeRow(table.getColumnIds());
String[] textFields = new String[table.getNumberOfColumns()];
for (Row row : table)
{
Object[] values = row.getValues();
for (int i = 0; i < table.getNumberOfColumns(); i++)
{
textFields[i] = TextSerializer.serialize(serializers[i], values[i], table.getColumn(i).getUnit());
}
csvWriter.writeRow(textFields);
}
}
finally
{
if (closeWriter && csvWriter != null)
{
csvWriter.close();
}
}
}
/**
* Write the data from the data table in CSV format. The writer writes the data, whereas the metaWriter writes the metadata.
* The metadata consists of a CSV file with three columns: the id, the description, and the class. The first row after the
* header contains the id, description, and class of the data table itself. The second and further rows contain information
* about the columns of the data table. The line ending used will be CRLF which is RFC 4180 compliant.
* @param writer Writer; the writer that writes the data, e.g. to a file
* @param metaWriter Writer; the writer for the metadata
* @param table Table; the data table to write
* @throws IOException on I/O error when writing the data
* @throws TextSerializationException on unknown data type for serialization
*/
public static void writeData(final Writer writer, final Writer metaWriter, final Table table)
throws IOException, TextSerializationException
{
writeData(writer, metaWriter, table, ',', '"', LineDelimiter.CRLF);
}
/**
* Write the data from the data table in CSV format.
* @param filename String; the file name to write the data to
* @param metaFilename String; the file name to write the metadata to
* @param table Table; the data table to write
* @throws IOException on I/O error when writing the data
* @throws TextSerializationException on unknown data type for serialization
*/
public static void writeData(final String filename, final String metaFilename, final Table table)
throws IOException, TextSerializationException
{
try (FileWriter fw = new FileWriter(filename); FileWriter mfw = new FileWriter(metaFilename);)
{
writeData(fw, mfw, table);
}
}
/**
* Read the data from the CSV-file into the data table. Use the metadata to reconstruct the data table.
* @param reader Reader; the reader that can read the data, e.g. from a file
* @param metaReader Reader; the writer for the metadata
* @return Table the data table reconstructed from the meta data and filled with the data
* @param separator char; the delimiter to use for separating entries
* @param quotechar char; the character to use for quoted elements
* @throws IOException when the CSV data was not formatted right
* @throws TextSerializationException on unknown data type for serialization
*/
public static Table readData(final Reader reader, final Reader metaReader, final char separator, final char quotechar)
throws IOException, TextSerializationException
{
// Read the metadata file and reconstruct the data table
try (NamedCsvReader csvMetaReader =
NamedCsvReader.builder().fieldSeparator(separator).quoteCharacter(quotechar).build(metaReader))
{
Set<String> metaHeader = csvMetaReader.getHeader();
Throw.when(
metaHeader.size() != 4 || !metaHeader.contains("id") || !metaHeader.contains("description")
|| !metaHeader.contains("className") || !metaHeader.contains("unit"),
IOException.class,
"header of the metafile does not contain 'id, description, className, unit' as fields, but %s: ",
metaHeader);
// table metadata
List<Column<?>> columns = new ArrayList<>();
Map<String, String> tableRow = new LinkedHashMap<>();
Iterator<NamedCsvRow> it = csvMetaReader.iterator();
while (it.hasNext())
{
NamedCsvRow row = it.next();
// table metadata
if (tableRow.size() == 0)
{
tableRow.putAll(row.getFields());
}
else
{
// column metadata
String type = row.getField("className");
Class<?> valueClass = Primitive.forName(type);
if (valueClass == null)
{
try
{
valueClass = Class.forName(type);
}
catch (ClassNotFoundException exception)
{
throw new IOException("Could not find class " + type, exception);
}
}
Column<?> column =
new Column<>(row.getField("id"), row.getField("description"), valueClass, row.getField("unit"));
columns.add(column);
}
}
Throw.when(tableRow == null, IOException.class, "no table information in the metafile");
// create table
Table table;
Consumer<Object[]> unserializableTable;
if (tableRow.get("className").equals(ListTable.class.getName()))
{
ListTable listTable = new ListTable(tableRow.get("id"), tableRow.get("description"), columns);
table = listTable;
unserializableTable = (data) -> listTable.addRow(data);
}
else
{
// fallback
ListTable listTable = new ListTable(tableRow.get("id"), tableRow.get("description"), columns);
table = listTable;
unserializableTable = (data) -> listTable.addRow(data);
}
// Assemble the serializer array
TextSerializer<?>[] serializers = new TextSerializer[table.getNumberOfColumns()];
for (int i = 0; i < table.getNumberOfColumns(); i++)
{
serializers[i] = TextSerializer.resolve(columns.get(i).getValueType());
}
// Read the data file
try (NamedCsvReader csvReader =
NamedCsvReader.builder().fieldSeparator(separator).quoteCharacter(quotechar).build(reader))
{
Set<String> header = csvReader.getHeader();
Throw.when(header.size() != columns.size(), IOException.class,
"Number of columns in the data file does not match column metadata size");
for (int i = 0; i < columns.size(); i++)
{
Throw.when(!header.contains(columns.get(i).getId()), IOException.class,
"Header with id %s not found in the data file", columns.get(i).getId());
}
// Read the data file records
csvReader.forEach(row ->
{
Object[] values = new Object[columns.size()];
for (int i = 0; i < columns.size(); i++)
{
values[i] = TextSerializer.deserialize(serializers[i], row.getField(columns.get(i).getId()),
columns.get(i));
}
unserializableTable.accept(values); // addRow
});
return table;
}
}
}
/**
* Read the data from the CSV-file into the data table. Use the metadata to reconstruct the data table.
* @param reader Reader; the reader that can read the data, e.g. from a file
* @param metaReader Reader; the writer for the metadata
* @return Table the data table reconstructed from the meta data and filled with the data
* @throws IOException when the CSV data was not formatted right
* @throws TextSerializationException on unknown data type for serialization
*/
public static Table readData(final Reader reader, final Reader metaReader) throws IOException, TextSerializationException
{
return readData(reader, metaReader, ',', '"');
}
/**
* Read the data from the CSV-file into the data table. Use the metadata to reconstruct the data table.
* @param filename String; the file name to read the data from
* @param metaFilename String; the file name to read the metadata from
* @return Table the data table reconstructed from the meta data and filled with the data
* @throws IOException when the CSV data was not formatted right
* @throws TextSerializationException on unknown data type for serialization
*/
public static Table readData(final String filename, final String metaFilename)
throws IOException, TextSerializationException
{
try (FileReader fr = new FileReader(filename); FileReader mfr = new FileReader(metaFilename);)
{
return readData(fr, mfr);
}
}
/**
* Read the data from a CSV-file inside a zip file. The metadata file should be in the same zipfile. Use the metadata to
* reconstruct the data table.
* @param fileName String; file name of the zip file
* @param csvName String; name of the CSV-file, without path
* @param metaName String; name of the metadata file, without path
* @return Table the data table reconstructed from the meta data and filled with the data
* @throws IOException when the CSV data was not formatted right
* @throws TextSerializationException on unknown data type for serialization
*/
public static Table readZippedData(final String fileName, final String csvName, final String metaName)
throws IOException, TextSerializationException
{
return readZippedData(fileName, csvName, metaName, ',', '"');
}
/**
* Read the data from a CSV-file inside a zip file. The metadata file should be in the same zipfile. Use the metadata to
* reconstruct the data table.
* @param fileName String; file name of the zip file
* @param csvName String; name of the CSV-file, without path
* @param metaName String; name of the metadata file, without path
* @param separator char; the delimiter to use for separating entries
* @param quotechar char; the character to use for quoted elements
* @return Table the data table reconstructed from the meta data and filled with the data
* @throws IOException when the CSV data was not formatted right
* @throws TextSerializationException on unknown data type for serialization
*/
public static Table readZippedData(final String fileName, final String csvName, final String metaName, final char separator,
final char quotechar) throws IOException, TextSerializationException
{
try (ZipFile zipFile = new ZipFile(fileName))
{
Reader reader = null;
Reader metaReader = null;
Iterator<? extends ZipEntry> iterator = zipFile.entries().asIterator();
while (iterator.hasNext())
{
ZipEntry zipEntry = iterator.next();
if (zipEntry.getName().equals(csvName))
{
reader = new InputStreamReader(zipFile.getInputStream(zipEntry));
}
else if (zipEntry.getName().equals(metaName))
{
metaReader = new InputStreamReader(zipFile.getInputStream(zipEntry));
}
}
Throw.whenNull(reader, "File %s not found in %s.", csvName, fileName);
Throw.whenNull(metaReader, "File %s not found in %s.", metaName, fileName);
return readData(reader, metaReader, separator, quotechar);
}
}
}