From f335478f10a9731adb209c51118a6a15691d43d6 Mon Sep 17 00:00:00 2001 From: Bart Hanssens Date: Thu, 11 Jul 2024 15:43:25 +0100 Subject: [PATCH] GH-5058: additional parser code (WIP) --- .../eclipse/rdf4j/rio/csvw/CSVWParser.java | 45 +++++++------------ .../rio/csvw/parsers/CellParserDate.java | 1 + .../rio/csvw/parsers/CellParserString.java | 4 -- .../src/test/resources/painters-metadata.json | 2 +- 4 files changed, 19 insertions(+), 33 deletions(-) diff --git a/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/CSVWParser.java b/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/CSVWParser.java index 3ffe653a14..3eafd1e952 100644 --- a/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/CSVWParser.java +++ b/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/CSVWParser.java @@ -55,7 +55,9 @@ import com.opencsv.exceptions.CsvValidationException; /** - * Basic (experimental) CSV on the Web Parser + * Basic (experimental) CSV on the Web parser. + * + * Currently only "minimal mode" is supported * * @author Bart Hanssens * @see CSV on the Web Primer @@ -107,9 +109,8 @@ public synchronized void parse(InputStream in, String baseURI) @Override public void parse(Reader reader, String baseURI) throws IOException, RDFParseException, RDFHandlerException { - Model metadata = parseMetadata(null, reader, baseURI); - clear(); + Model metadata = parseMetadata(null, reader, baseURI); } /** @@ -217,9 +218,6 @@ private CellParser getCellParser(Model metadata, Resource column) { CellParser parser = CellParserFactory.create(datatype); - Models.getPropertyString(metadata, column, CSVW.LANG).ifPresent(v -> parser.setLang(v)); - getFormat(metadata, column).ifPresent(v -> parser.setFormat(v.stringValue())); - Models.getPropertyString(metadata, column, CSVW.NAME) .ifPresentOrElse(v -> parser.setName(v), () -> new RDFParseException("Metadata file does not contain name for column " + column)); @@ -227,6 +225,17 @@ private CellParser getCellParser(Model metadata, Resource column) { Models.getPropertyString(metadata, column, CSVW.DEFAULT).ifPresent(v -> parser.setDefaultValue(v)); Models.getPropertyString(metadata, column, CSVW.REQUIRED) .ifPresent(v -> parser.setIsRequired(Boolean.parseBoolean(v))); + + // only useful for strings + Models.getPropertyString(metadata, column, CSVW.LANG).ifPresent(v -> parser.setLang(v)); + + // only useful for numeric + Models.getPropertyString(metadata, column, CSVW.DECIMAL_CHAR).ifPresent(v -> parser.setDecimalChar(v)); + Models.getPropertyString(metadata, column, CSVW.GROUP_CHAR).ifPresent(v -> parser.setGroupChar(v)); + + // mostly for date formats + Models.getPropertyString(metadata, column, CSVW.FORMAT).ifPresent(v -> parser.setFormat(v)); + Models.getPropertyString(metadata, column, CSVW.VALUE_URL).ifPresent(v -> parser.setValueURL(v)); // use a property from a vocabulary as predicate, or create a property relative to the namespace of the CSV @@ -238,7 +247,7 @@ private CellParser getCellParser(Model metadata, Resource column) { } /** - * Get IRI of base or derived datatype + * Get name of base or derived datatype * * @param metadata * @param column @@ -263,26 +272,6 @@ private IRI getDatatypeIRI(Model metadata, Resource column) { return XSD.valueOf(datatype.stringValue().toUpperCase()).getIri(); } - /** - * Get name of the generic datatype or more specific datatype - * - * @param metadata - * @param column - * @return - */ - private Optional getFormat(Model metadata, Resource column) { - Optional val = Models.getProperty(metadata, column, CSVW.DATATYPE); - if (val.isPresent()) { - Value datatype = val.get(); - // derived datatype - if (datatype.isBNode()) { - Optional fmt = Models.getProperty(metadata, (Resource) datatype, CSVW.FORMAT); - val = Models.getProperty(metadata, (Resource) fmt.get(), CSVW.BASE); - } - } - return val; - } - /** * Get "about" URL template, to be used to create the subject of the triples * @@ -376,7 +365,7 @@ private CSVReader getCSVReader(Model metadata, Resource table, Reader reader) { Models.getPropertyString(metadata, (Resource) dialect.get(), CSVW.DELIMITER) .ifPresent(v -> parserBuilder.withSeparator(v.charAt(0))); Models.getPropertyString(metadata, (Resource) dialect.get(), CSVW.HEADER) - .ifPresent(v -> builder.withSkipLines(v.equalsIgnoreCase("false") ? 1 : 0)); + .ifPresent(v -> builder.withSkipLines(v.equalsIgnoreCase("false") ? 0 : 1)); Models.getPropertyString(metadata, (Resource) dialect.get(), CSVW.QUOTE_CHAR) .ifPresent(v -> parserBuilder.withQuoteChar(v.charAt(0))); } diff --git a/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/parsers/CellParserDate.java b/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/parsers/CellParserDate.java index 1b25e8e097..bafee06f24 100644 --- a/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/parsers/CellParserDate.java +++ b/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/parsers/CellParserDate.java @@ -28,6 +28,7 @@ public class CellParserDate extends CellParser { @Override public void setFormat(String format) { super.setFormat(format); + System.err.println(format); formatter = DateTimeFormatter.ofPattern(format); } diff --git a/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/parsers/CellParserString.java b/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/parsers/CellParserString.java index c079156df3..ccd8e21b1a 100644 --- a/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/parsers/CellParserString.java +++ b/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/parsers/CellParserString.java @@ -10,10 +10,6 @@ *******************************************************************************/ package org.eclipse.rdf4j.rio.csvw.parsers; -import java.util.Set; - -import org.eclipse.rdf4j.model.IRI; -import org.eclipse.rdf4j.model.Namespace; import org.eclipse.rdf4j.model.Value; import org.eclipse.rdf4j.model.util.Values; diff --git a/core/rio/csvw/src/test/resources/painters-metadata.json b/core/rio/csvw/src/test/resources/painters-metadata.json index 8df163f8d5..a31781f74b 100644 --- a/core/rio/csvw/src/test/resources/painters-metadata.json +++ b/core/rio/csvw/src/test/resources/painters-metadata.json @@ -21,7 +21,7 @@ "base": "date", "format": "d/M/yyyy" } }, - { "name": "maried", + { "name": "married", "datatype": { "base": "boolean", "format": "Yes|No"