From cace781476c6f57b0eb163915a960551b3f70750 Mon Sep 17 00:00:00 2001 From: Jingsi Lu Date: Wed, 16 Oct 2024 11:05:58 -0400 Subject: [PATCH 1/4] added check for invalid characters in asString method --- .../notice/InvalidCharactersNotice.java | 35 +++++++++++++++++++ .../gtfsvalidator/parsing/RowParser.java | 31 ++++++++-------- 2 files changed, 49 insertions(+), 17 deletions(-) create mode 100644 core/src/main/java/org/mobilitydata/gtfsvalidator/notice/InvalidCharactersNotice.java diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/notice/InvalidCharactersNotice.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/notice/InvalidCharactersNotice.java new file mode 100644 index 0000000000..3c3ea177d0 --- /dev/null +++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/notice/InvalidCharactersNotice.java @@ -0,0 +1,35 @@ +package org.mobilitydata.gtfsvalidator.notice; + +import static org.mobilitydata.gtfsvalidator.notice.SeverityLevel.ERROR; + +import org.mobilitydata.gtfsvalidator.annotation.GtfsValidationNotice; + +/** + * Invalid characters. + * + *

This field contains invalid characters such as the replacement character (U+FFFD). Fields with + * customer-facing text should not contain invalid characters to ensure good readability and + * accessibility. + */ +@GtfsValidationNotice(severity = ERROR) +public class InvalidCharactersNotice extends ValidationNotice { + /** The name of the file containing the invalid characters. */ + private final String filename; + + /** The row number in the CSV file where the invalid characters were found. */ + private final long csvRowNumber; + + /** The name of the field containing the invalid characters. */ + private final String fieldName; + + /** The value of the field containing the invalid characters. */ + private final String fieldValue; + + public InvalidCharactersNotice( + String filename, long csvRowNumber, String fieldName, String fieldValue) { + this.filename = filename; + this.csvRowNumber = csvRowNumber; + this.fieldName = fieldName; + this.fieldValue = fieldValue; + } +} diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/parsing/RowParser.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/parsing/RowParser.java index e29f7b53c5..50c7ba704e 100644 --- a/core/src/main/java/org/mobilitydata/gtfsvalidator/parsing/RowParser.java +++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/parsing/RowParser.java @@ -23,23 +23,7 @@ import java.util.function.Function; import javax.annotation.Nullable; import org.mobilitydata.gtfsvalidator.annotation.FieldLevelEnum; -import org.mobilitydata.gtfsvalidator.notice.EmptyRowNotice; -import org.mobilitydata.gtfsvalidator.notice.InvalidColorNotice; -import org.mobilitydata.gtfsvalidator.notice.InvalidCurrencyNotice; -import org.mobilitydata.gtfsvalidator.notice.InvalidDateNotice; -import org.mobilitydata.gtfsvalidator.notice.InvalidFloatNotice; -import org.mobilitydata.gtfsvalidator.notice.InvalidIntegerNotice; -import org.mobilitydata.gtfsvalidator.notice.InvalidLanguageCodeNotice; -import org.mobilitydata.gtfsvalidator.notice.InvalidRowLengthNotice; -import org.mobilitydata.gtfsvalidator.notice.InvalidTimeNotice; -import org.mobilitydata.gtfsvalidator.notice.InvalidTimezoneNotice; -import org.mobilitydata.gtfsvalidator.notice.MissingRecommendedFieldNotice; -import org.mobilitydata.gtfsvalidator.notice.MissingRequiredFieldNotice; -import org.mobilitydata.gtfsvalidator.notice.NoticeContainer; -import org.mobilitydata.gtfsvalidator.notice.NumberOutOfRangeNotice; -import org.mobilitydata.gtfsvalidator.notice.TooManyRowsNotice; -import org.mobilitydata.gtfsvalidator.notice.UnexpectedEnumValueNotice; -import org.mobilitydata.gtfsvalidator.notice.ValidationNotice; +import org.mobilitydata.gtfsvalidator.notice.*; import org.mobilitydata.gtfsvalidator.table.GtfsColumnDescriptor; import org.mobilitydata.gtfsvalidator.table.GtfsEnum; import org.mobilitydata.gtfsvalidator.type.GtfsColor; @@ -137,6 +121,15 @@ public String asString(int columnIndex, GtfsColumnDescriptor columnDescriptor) { fileName, getRowNumber(), columnDescriptor.columnName())); } if (s != null) { + // Validate if the string contains invalid characters + if (fileName.equals("stops.txt")) { + if (containsInvalidCharacters(s)) { + noticeContainer.addValidationNotice( + new InvalidCharactersNotice( + fileName, getRowNumber(), columnDescriptor.columnName(), s)); + return null; + } + } s = fieldValidator.validateField( s, @@ -146,6 +139,10 @@ public String asString(int columnIndex, GtfsColumnDescriptor columnDescriptor) { return s; } + private boolean containsInvalidCharacters(String string) { + return string.contains("\uFFFD"); + } + @Nullable public String asText(int columnIndex, GtfsColumnDescriptor columnDescriptor) { return asString(columnIndex, columnDescriptor); From 84a7fdeb9b84d0a0501d79a62f03c1499cab0bcd Mon Sep 17 00:00:00 2001 From: Jingsi Lu Date: Wed, 16 Oct 2024 11:21:45 -0400 Subject: [PATCH 2/4] formatted code --- .../notice/InvalidCharactersNotice.java | 7 +++---- .../gtfsvalidator/parsing/RowParser.java | 14 +++++++------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/notice/InvalidCharactersNotice.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/notice/InvalidCharactersNotice.java index 3c3ea177d0..311eb07b3b 100644 --- a/core/src/main/java/org/mobilitydata/gtfsvalidator/notice/InvalidCharactersNotice.java +++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/notice/InvalidCharactersNotice.java @@ -5,11 +5,10 @@ import org.mobilitydata.gtfsvalidator.annotation.GtfsValidationNotice; /** - * Invalid characters. + * This field contains invalid characters such as the replacement character ("\uFFFD"). * - *

This field contains invalid characters such as the replacement character (U+FFFD). Fields with - * customer-facing text should not contain invalid characters to ensure good readability and - * accessibility. + *

Fields with customer-facing text should not contain invalid characters to ensure good + * readability and accessibility. */ @GtfsValidationNotice(severity = ERROR) public class InvalidCharactersNotice extends ValidationNotice { diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/parsing/RowParser.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/parsing/RowParser.java index 50c7ba704e..a46a1084ab 100644 --- a/core/src/main/java/org/mobilitydata/gtfsvalidator/parsing/RowParser.java +++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/parsing/RowParser.java @@ -122,14 +122,14 @@ public String asString(int columnIndex, GtfsColumnDescriptor columnDescriptor) { } if (s != null) { // Validate if the string contains invalid characters - if (fileName.equals("stops.txt")) { - if (containsInvalidCharacters(s)) { - noticeContainer.addValidationNotice( - new InvalidCharactersNotice( - fileName, getRowNumber(), columnDescriptor.columnName(), s)); - return null; - } + + if (containsInvalidCharacters(s)) { + noticeContainer.addValidationNotice( + new InvalidCharactersNotice( + fileName, getRowNumber(), columnDescriptor.columnName(), s)); + return null; } + s = fieldValidator.validateField( s, From aba5d263293fc934641a3f1307f0152a3812bea3 Mon Sep 17 00:00:00 2001 From: Jingsi Lu Date: Wed, 16 Oct 2024 13:20:16 -0400 Subject: [PATCH 3/4] removed return null; --- .../java/org/mobilitydata/gtfsvalidator/parsing/RowParser.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/parsing/RowParser.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/parsing/RowParser.java index a46a1084ab..d476b2d787 100644 --- a/core/src/main/java/org/mobilitydata/gtfsvalidator/parsing/RowParser.java +++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/parsing/RowParser.java @@ -122,12 +122,10 @@ public String asString(int columnIndex, GtfsColumnDescriptor columnDescriptor) { } if (s != null) { // Validate if the string contains invalid characters - if (containsInvalidCharacters(s)) { noticeContainer.addValidationNotice( new InvalidCharactersNotice( fileName, getRowNumber(), columnDescriptor.columnName(), s)); - return null; } s = From 2a2628f834520189f3f9c1de21dc3c4998ce17b1 Mon Sep 17 00:00:00 2001 From: Jingsi Lu Date: Wed, 16 Oct 2024 18:41:21 -0400 Subject: [PATCH 4/4] InvalidCharacterNotice changes based on requirement --- ...CharactersNotice.java => InvalidCharacterNotice.java} | 9 ++++----- .../mobilitydata/gtfsvalidator/parsing/RowParser.java | 3 +-- 2 files changed, 5 insertions(+), 7 deletions(-) rename core/src/main/java/org/mobilitydata/gtfsvalidator/notice/{InvalidCharactersNotice.java => InvalidCharacterNotice.java} (73%) diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/notice/InvalidCharactersNotice.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/notice/InvalidCharacterNotice.java similarity index 73% rename from core/src/main/java/org/mobilitydata/gtfsvalidator/notice/InvalidCharactersNotice.java rename to core/src/main/java/org/mobilitydata/gtfsvalidator/notice/InvalidCharacterNotice.java index 311eb07b3b..428e09475a 100644 --- a/core/src/main/java/org/mobilitydata/gtfsvalidator/notice/InvalidCharactersNotice.java +++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/notice/InvalidCharacterNotice.java @@ -5,13 +5,12 @@ import org.mobilitydata.gtfsvalidator.annotation.GtfsValidationNotice; /** - * This field contains invalid characters such as the replacement character ("\uFFFD"). + * This field contains invalid characters, such as the replacement character ("\uFFFD"). * - *

Fields with customer-facing text should not contain invalid characters to ensure good - * readability and accessibility. + *

Check that text was properly encoded in UTF-8 as required by GTFS. */ @GtfsValidationNotice(severity = ERROR) -public class InvalidCharactersNotice extends ValidationNotice { +public class InvalidCharacterNotice extends ValidationNotice { /** The name of the file containing the invalid characters. */ private final String filename; @@ -24,7 +23,7 @@ public class InvalidCharactersNotice extends ValidationNotice { /** The value of the field containing the invalid characters. */ private final String fieldValue; - public InvalidCharactersNotice( + public InvalidCharacterNotice( String filename, long csvRowNumber, String fieldName, String fieldValue) { this.filename = filename; this.csvRowNumber = csvRowNumber; diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/parsing/RowParser.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/parsing/RowParser.java index d476b2d787..5a1f5bb469 100644 --- a/core/src/main/java/org/mobilitydata/gtfsvalidator/parsing/RowParser.java +++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/parsing/RowParser.java @@ -124,8 +124,7 @@ public String asString(int columnIndex, GtfsColumnDescriptor columnDescriptor) { // Validate if the string contains invalid characters if (containsInvalidCharacters(s)) { noticeContainer.addValidationNotice( - new InvalidCharactersNotice( - fileName, getRowNumber(), columnDescriptor.columnName(), s)); + new InvalidCharacterNotice(fileName, getRowNumber(), columnDescriptor.columnName(), s)); } s =