diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/notice/InvalidCharacterNotice.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/notice/InvalidCharacterNotice.java new file mode 100644 index 0000000000..428e09475a --- /dev/null +++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/notice/InvalidCharacterNotice.java @@ -0,0 +1,33 @@ +package org.mobilitydata.gtfsvalidator.notice; + +import static org.mobilitydata.gtfsvalidator.notice.SeverityLevel.ERROR; + +import org.mobilitydata.gtfsvalidator.annotation.GtfsValidationNotice; + +/** + * This field contains invalid characters, such as the replacement character ("\uFFFD"). + * + *

Check that text was properly encoded in UTF-8 as required by GTFS. + */ +@GtfsValidationNotice(severity = ERROR) +public class InvalidCharacterNotice extends ValidationNotice { + /** The name of the file containing the invalid characters. */ + private final String filename; + + /** The row number in the CSV file where the invalid characters were found. */ + private final long csvRowNumber; + + /** The name of the field containing the invalid characters. */ + private final String fieldName; + + /** The value of the field containing the invalid characters. */ + private final String fieldValue; + + public InvalidCharacterNotice( + String filename, long csvRowNumber, String fieldName, String fieldValue) { + this.filename = filename; + this.csvRowNumber = csvRowNumber; + this.fieldName = fieldName; + this.fieldValue = fieldValue; + } +} diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/parsing/RowParser.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/parsing/RowParser.java index e29f7b53c5..5a1f5bb469 100644 --- a/core/src/main/java/org/mobilitydata/gtfsvalidator/parsing/RowParser.java +++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/parsing/RowParser.java @@ -23,23 +23,7 @@ import java.util.function.Function; import javax.annotation.Nullable; import org.mobilitydata.gtfsvalidator.annotation.FieldLevelEnum; -import org.mobilitydata.gtfsvalidator.notice.EmptyRowNotice; -import org.mobilitydata.gtfsvalidator.notice.InvalidColorNotice; -import org.mobilitydata.gtfsvalidator.notice.InvalidCurrencyNotice; -import org.mobilitydata.gtfsvalidator.notice.InvalidDateNotice; -import org.mobilitydata.gtfsvalidator.notice.InvalidFloatNotice; -import org.mobilitydata.gtfsvalidator.notice.InvalidIntegerNotice; -import org.mobilitydata.gtfsvalidator.notice.InvalidLanguageCodeNotice; -import org.mobilitydata.gtfsvalidator.notice.InvalidRowLengthNotice; -import org.mobilitydata.gtfsvalidator.notice.InvalidTimeNotice; -import org.mobilitydata.gtfsvalidator.notice.InvalidTimezoneNotice; -import org.mobilitydata.gtfsvalidator.notice.MissingRecommendedFieldNotice; -import org.mobilitydata.gtfsvalidator.notice.MissingRequiredFieldNotice; -import org.mobilitydata.gtfsvalidator.notice.NoticeContainer; -import org.mobilitydata.gtfsvalidator.notice.NumberOutOfRangeNotice; -import org.mobilitydata.gtfsvalidator.notice.TooManyRowsNotice; -import org.mobilitydata.gtfsvalidator.notice.UnexpectedEnumValueNotice; -import org.mobilitydata.gtfsvalidator.notice.ValidationNotice; +import org.mobilitydata.gtfsvalidator.notice.*; import org.mobilitydata.gtfsvalidator.table.GtfsColumnDescriptor; import org.mobilitydata.gtfsvalidator.table.GtfsEnum; import org.mobilitydata.gtfsvalidator.type.GtfsColor; @@ -137,6 +121,12 @@ public String asString(int columnIndex, GtfsColumnDescriptor columnDescriptor) { fileName, getRowNumber(), columnDescriptor.columnName())); } if (s != null) { + // Validate if the string contains invalid characters + if (containsInvalidCharacters(s)) { + noticeContainer.addValidationNotice( + new InvalidCharacterNotice(fileName, getRowNumber(), columnDescriptor.columnName(), s)); + } + s = fieldValidator.validateField( s, @@ -146,6 +136,10 @@ public String asString(int columnIndex, GtfsColumnDescriptor columnDescriptor) { return s; } + private boolean containsInvalidCharacters(String string) { + return string.contains("\uFFFD"); + } + @Nullable public String asText(int columnIndex, GtfsColumnDescriptor columnDescriptor) { return asString(columnIndex, columnDescriptor);