Skip to content

Commit

Permalink
eclipse-rdf4jGH-5058: additional parser code (WIP)
Browse files Browse the repository at this point in the history
  • Loading branch information
barthanssens committed Jul 11, 2024
1 parent 43aa09b commit f335478
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@
import com.opencsv.exceptions.CsvValidationException;

/**
* Basic (experimental) CSV on the Web Parser
* Basic (experimental) CSV on the Web parser.
*
* Currently only "minimal mode" is supported
*
* @author Bart Hanssens
* @see <a href="https://w3c.github.io/csvw/primer/">CSV on the Web Primer</a>
Expand Down Expand Up @@ -107,9 +109,8 @@ public synchronized void parse(InputStream in, String baseURI)
@Override
public void parse(Reader reader, String baseURI)
throws IOException, RDFParseException, RDFHandlerException {
Model metadata = parseMetadata(null, reader, baseURI);

clear();
Model metadata = parseMetadata(null, reader, baseURI);
}

/**
Expand Down Expand Up @@ -217,16 +218,24 @@ private CellParser getCellParser(Model metadata, Resource column) {

CellParser parser = CellParserFactory.create(datatype);

Models.getPropertyString(metadata, column, CSVW.LANG).ifPresent(v -> parser.setLang(v));
getFormat(metadata, column).ifPresent(v -> parser.setFormat(v.stringValue()));

Models.getPropertyString(metadata, column, CSVW.NAME)
.ifPresentOrElse(v -> parser.setName(v),
() -> new RDFParseException("Metadata file does not contain name for column " + column));

Models.getPropertyString(metadata, column, CSVW.DEFAULT).ifPresent(v -> parser.setDefaultValue(v));
Models.getPropertyString(metadata, column, CSVW.REQUIRED)
.ifPresent(v -> parser.setIsRequired(Boolean.parseBoolean(v)));

// only useful for strings
Models.getPropertyString(metadata, column, CSVW.LANG).ifPresent(v -> parser.setLang(v));

// only useful for numeric
Models.getPropertyString(metadata, column, CSVW.DECIMAL_CHAR).ifPresent(v -> parser.setDecimalChar(v));
Models.getPropertyString(metadata, column, CSVW.GROUP_CHAR).ifPresent(v -> parser.setGroupChar(v));

// mostly for date formats
Models.getPropertyString(metadata, column, CSVW.FORMAT).ifPresent(v -> parser.setFormat(v));

Models.getPropertyString(metadata, column, CSVW.VALUE_URL).ifPresent(v -> parser.setValueURL(v));

// use a property from a vocabulary as predicate, or create a property relative to the namespace of the CSV
Expand All @@ -238,7 +247,7 @@ private CellParser getCellParser(Model metadata, Resource column) {
}

/**
* Get IRI of base or derived datatype
* Get name of base or derived datatype
*
* @param metadata
* @param column
Expand All @@ -263,26 +272,6 @@ private IRI getDatatypeIRI(Model metadata, Resource column) {
return XSD.valueOf(datatype.stringValue().toUpperCase()).getIri();
}

/**
* Get name of the generic datatype or more specific datatype
*
* @param metadata
* @param column
* @return
*/
private Optional<Value> getFormat(Model metadata, Resource column) {
Optional<Value> val = Models.getProperty(metadata, column, CSVW.DATATYPE);
if (val.isPresent()) {
Value datatype = val.get();
// derived datatype
if (datatype.isBNode()) {
Optional<Value> fmt = Models.getProperty(metadata, (Resource) datatype, CSVW.FORMAT);
val = Models.getProperty(metadata, (Resource) fmt.get(), CSVW.BASE);
}
}
return val;
}

/**
* Get "about" URL template, to be used to create the subject of the triples
*
Expand Down Expand Up @@ -376,7 +365,7 @@ private CSVReader getCSVReader(Model metadata, Resource table, Reader reader) {
Models.getPropertyString(metadata, (Resource) dialect.get(), CSVW.DELIMITER)
.ifPresent(v -> parserBuilder.withSeparator(v.charAt(0)));
Models.getPropertyString(metadata, (Resource) dialect.get(), CSVW.HEADER)
.ifPresent(v -> builder.withSkipLines(v.equalsIgnoreCase("false") ? 1 : 0));
.ifPresent(v -> builder.withSkipLines(v.equalsIgnoreCase("false") ? 0 : 1));
Models.getPropertyString(metadata, (Resource) dialect.get(), CSVW.QUOTE_CHAR)
.ifPresent(v -> parserBuilder.withQuoteChar(v.charAt(0)));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ public class CellParserDate extends CellParser {
@Override
public void setFormat(String format) {
super.setFormat(format);
System.err.println(format);
formatter = DateTimeFormatter.ofPattern(format);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,6 @@
*******************************************************************************/
package org.eclipse.rdf4j.rio.csvw.parsers;

import java.util.Set;

import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Namespace;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.util.Values;

Expand Down
2 changes: 1 addition & 1 deletion core/rio/csvw/src/test/resources/painters-metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
"base": "date",
"format": "d/M/yyyy"
} },
{ "name": "maried",
{ "name": "married",
"datatype": {
"base": "boolean",
"format": "Yes|No"
Expand Down

0 comments on commit f335478

Please sign in to comment.