From f16f52e0d781f8c5fa99ca2f2e45c9fcd057de63 Mon Sep 17 00:00:00 2001
From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com>
Date: Mon, 30 Sep 2024 16:33:06 -0400
Subject: [PATCH 01/30] add memory usage records to the JSON report
---
core/build.gradle | 3 +
.../performance/MemoryMonitor.java | 12 ++
.../performance/MemoryMonitorAspect.java | 33 ++++++
.../performance/MemoryUsage.java | 69 +++++++++++
.../performance/MemoryUsageRegister.java | 56 +++++++++
.../gtfsvalidator/table/GtfsFeedLoader.java | 109 ++++++++++++------
.../report/JsonReportSummary.java | 3 +
.../report/model/FeedMetadata.java | 4 +
.../runner/ValidationRunner.java | 12 ++
9 files changed, 267 insertions(+), 34 deletions(-)
create mode 100644 core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryMonitor.java
create mode 100644 core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryMonitorAspect.java
create mode 100644 core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsage.java
create mode 100644 core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsageRegister.java
diff --git a/core/build.gradle b/core/build.gradle
index cccd38adc9..1e02f6c615 100644
--- a/core/build.gradle
+++ b/core/build.gradle
@@ -18,6 +18,7 @@ plugins {
id 'java'
id 'maven-publish'
id 'signing'
+ id 'io.freefair.aspectj.post-compile-weaving' version '6.4.1' // Add AspectJ plugin
}
//publishing {
@@ -42,6 +43,8 @@ dependencies {
implementation 'com.googlecode.libphonenumber:libphonenumber:8.12.13'
implementation 'com.google.flogger:flogger:0.6'
implementation 'io.github.classgraph:classgraph:4.8.146'
+ implementation 'org.aspectj:aspectjrt:1.9.22.1'
+ implementation 'org.aspectj:aspectjweaver:1.9.22.1'
testImplementation 'com.google.flogger:flogger-system-backend:0.6'
testImplementation group: 'junit', name: 'junit', version: '4.13'
testImplementation "com.google.truth:truth:1.0.1"
diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryMonitor.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryMonitor.java
new file mode 100644
index 0000000000..179ce3cd78
--- /dev/null
+++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryMonitor.java
@@ -0,0 +1,12 @@
+package org.mobilitydata.gtfsvalidator.performance;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+@Target(ElementType.METHOD)
+@Retention(RetentionPolicy.RUNTIME)
+public @interface MemoryMonitor {
+ String key() default "";
+}
diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryMonitorAspect.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryMonitorAspect.java
new file mode 100644
index 0000000000..f6baf95945
--- /dev/null
+++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryMonitorAspect.java
@@ -0,0 +1,33 @@
+package org.mobilitydata.gtfsvalidator.performance;
+
+import org.apache.commons.lang3.StringUtils;
+import org.aspectj.lang.ProceedingJoinPoint;
+import org.aspectj.lang.annotation.Around;
+import org.aspectj.lang.annotation.Aspect;
+import org.aspectj.lang.reflect.MethodSignature;
+
+@Aspect
+public class MemoryMonitorAspect {
+
+ // @Around("@annotation(MemoryMonitor)")
+ @Around("execution(@org.mobilitydata.gtfsvalidator.performance.MemoryMonitor * *(..))")
+ public Object monitorMemoryUsage(ProceedingJoinPoint joinPoint) throws Throwable {
+ String key = extractKey(joinPoint);
+ MemoryUsage before = MemoryUsageRegister.getInstance().getMemoryUsageSnapshot(key, null);
+ try {
+ Object result = joinPoint.proceed();
+ return result;
+ } finally {
+ MemoryUsage after = MemoryUsageRegister.getInstance().getMemoryUsageSnapshot(key, before);
+ MemoryUsageRegister.getInstance().registerMemoryUsage(after);
+ }
+ }
+
+ private String extractKey(ProceedingJoinPoint joinPoint) {
+ var method = ((MethodSignature) joinPoint.getSignature()).getMethod();
+ var memoryMonitor = method.getAnnotation(MemoryMonitor.class);
+ return memoryMonitor != null && StringUtils.isNotBlank(memoryMonitor.key())
+ ? memoryMonitor.key()
+ : method.getDeclaringClass().getCanonicalName() + "." + method.getName();
+ }
+}
diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsage.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsage.java
new file mode 100644
index 0000000000..fb5982fb49
--- /dev/null
+++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsage.java
@@ -0,0 +1,69 @@
+package org.mobilitydata.gtfsvalidator.performance;
+
+import com.google.auto.value.AutoValue;
+import java.text.DecimalFormat;
+import javax.annotation.Nullable;
+import org.apache.commons.lang3.StringUtils;
+
+@AutoValue
+public abstract class MemoryUsage {
+ private static final DecimalFormat TWO_DECIMAL_FORMAT = new DecimalFormat("0.00");
+
+ public static MemoryUsage create(
+ String key, long totalMemory, long freeMemory, long maxMemory, Long memoryDiff) {
+ return new AutoValue_MemoryUsage(key, totalMemory, freeMemory, maxMemory, memoryDiff);
+ }
+
+ public static String convertToHumanReadableMemory(Long size) {
+ if (size == null) {
+ return "N/A";
+ }
+ if (size <= 0) {
+ return "0";
+ }
+ if (size < 1024) {
+ return size + " bytes";
+ }
+ if (size < 1048576) {
+ return TWO_DECIMAL_FORMAT.format(size / 1024.0) + " KiB";
+ }
+ if (size < 1073741824) {
+ return TWO_DECIMAL_FORMAT.format(size / 1048576.0) + " MiB";
+ }
+ if (size < 1099511627776L) {
+ return TWO_DECIMAL_FORMAT.format(size / 1073741824.0) + " GiB";
+ }
+ return TWO_DECIMAL_FORMAT.format(size / 1099511627776L) + " TiB";
+ }
+
+ public abstract String key();
+
+ public abstract long totalMemory();
+
+ public abstract long freeMemory();
+
+ public abstract long maxMemory();
+
+ @Nullable
+ public abstract Long diffMemory();
+
+ public long usedMemory() {
+ return totalMemory() - freeMemory();
+ }
+
+ public String humanReadablePrint() {
+ StringBuffer result = new StringBuffer();
+ result.append("Memory usage registered");
+ if (StringUtils.isNotBlank(key())) {
+ result.append(" for key: ").append(key());
+ } else {
+ result.append(":");
+ }
+ result.append(" Max: ").append(convertToHumanReadableMemory(maxMemory()));
+ result.append(" Total: ").append(convertToHumanReadableMemory(totalMemory()));
+ result.append(" Free: ").append(convertToHumanReadableMemory(freeMemory()));
+ result.append(" Used: ").append(convertToHumanReadableMemory(usedMemory()));
+ result.append(" Diff: ").append(convertToHumanReadableMemory(diffMemory()));
+ return result.toString();
+ }
+}
diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsageRegister.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsageRegister.java
new file mode 100644
index 0000000000..38c20b8baf
--- /dev/null
+++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsageRegister.java
@@ -0,0 +1,56 @@
+package org.mobilitydata.gtfsvalidator.performance;
+
+import com.google.common.flogger.FluentLogger;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+public class MemoryUsageRegister {
+ private static final FluentLogger logger = FluentLogger.forEnclosingClass();
+
+ private static MemoryUsageRegister instance = new MemoryUsageRegister();
+ private final Runtime runtime;
+ private List registry = new ArrayList<>();
+
+ private MemoryUsageRegister() {
+ runtime = Runtime.getRuntime();
+ }
+
+ public static MemoryUsageRegister getInstance() {
+ return instance;
+ }
+
+ public List getRegistry() {
+ return Collections.unmodifiableList(registry);
+ }
+
+ public MemoryUsage getMemoryUsageSnapshot(String key, MemoryUsage previous) {
+ Long memoryDiff = null;
+ if (previous != null) {
+ memoryDiff = runtime.freeMemory() - previous.freeMemory();
+ }
+ return MemoryUsage.create(
+ key, runtime.totalMemory(), runtime.freeMemory(), runtime.maxMemory(), memoryDiff);
+ }
+
+ public MemoryUsage registerMemoryUsage(String key) {
+ MemoryUsage memoryUsage = getMemoryUsageSnapshot(key, null);
+ registerMemoryUsage(memoryUsage);
+ return memoryUsage;
+ }
+
+ public MemoryUsage registerMemoryUsage(String key, MemoryUsage previous) {
+ MemoryUsage memoryUsage = getMemoryUsageSnapshot(key, previous);
+ registerMemoryUsage(memoryUsage);
+ return memoryUsage;
+ }
+
+ public void registerMemoryUsage(MemoryUsage memoryUsage) {
+ registry.add(memoryUsage);
+ logger.atInfo().log(memoryUsage.humanReadablePrint());
+ }
+
+ public void clearRegistry() {
+ registry.clear();
+ }
+}
diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsFeedLoader.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsFeedLoader.java
index 1edb051184..4dffea4ba0 100644
--- a/core/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsFeedLoader.java
+++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsFeedLoader.java
@@ -35,6 +35,8 @@
import org.mobilitydata.gtfsvalidator.notice.RuntimeExceptionInLoaderError;
import org.mobilitydata.gtfsvalidator.notice.ThreadExecutionError;
import org.mobilitydata.gtfsvalidator.notice.UnknownFileNotice;
+import org.mobilitydata.gtfsvalidator.performance.MemoryMonitor;
+import org.mobilitydata.gtfsvalidator.performance.MemoryUsageRegister;
import org.mobilitydata.gtfsvalidator.validator.FileValidator;
import org.mobilitydata.gtfsvalidator.validator.ValidatorProvider;
import org.mobilitydata.gtfsvalidator.validator.ValidatorUtil;
@@ -90,6 +92,7 @@ public List> getMultiFileValidatorsWithParsingErr
}
@SuppressWarnings("unchecked")
+ @MemoryMonitor()
public GtfsFeedContainer loadAndValidate(
GtfsInput gtfsInput, ValidatorProvider validatorProvider, NoticeContainer noticeContainer)
throws InterruptedException {
@@ -137,47 +140,85 @@ public GtfsFeedContainer loadAndValidate(
AnyTableLoader.loadMissingFile(tableDescriptor, validatorProvider, noticeContainer));
}
try {
- for (Future futureContainer : exec.invokeAll(loaderCallables)) {
- try {
- TableAndNoticeContainers containers = futureContainer.get();
- tableContainers.add(containers.tableContainer);
- noticeContainer.addAll(containers.noticeContainer);
- } catch (ExecutionException e) {
- // All runtime exceptions should be caught above.
- // ExecutionException is not expected to happen.
- addThreadExecutionError(e, noticeContainer);
- }
- }
+ var beforeLoading =
+ MemoryUsageRegister.getInstance()
+ .getMemoryUsageSnapshot("GtfsFeedLoader.loadTables", null);
+ loadTables(noticeContainer, exec, loaderCallables, tableContainers);
+ MemoryUsageRegister.getInstance()
+ .registerMemoryUsage("GtfsFeedLoader.loadTables", beforeLoading);
+
GtfsFeedContainer feed = new GtfsFeedContainer(tableContainers);
- List> validatorCallables = new ArrayList<>();
- // Validators with parser-error dependencies will not be returned here, but instead added to
- // the skippedValidators list.
- for (FileValidator validator :
- validatorProvider.createMultiFileValidators(
- feed, multiFileValidatorsWithParsingErrors::add)) {
- validatorCallables.add(
- () -> {
- NoticeContainer validatorNotices = new NoticeContainer();
- ValidatorUtil.safeValidate(
- validator::validate, validator.getClass(), validatorNotices);
- return validatorNotices;
- });
- }
- for (Future futureContainer : exec.invokeAll(validatorCallables)) {
- try {
- noticeContainer.addAll(futureContainer.get());
- } catch (ExecutionException e) {
- // All runtime exceptions should be caught above.
- // ExecutionException is not expected to happen.
- addThreadExecutionError(e, noticeContainer);
- }
- }
+ var beforeMultiFileValidators =
+ MemoryUsageRegister.getInstance()
+ .getMemoryUsageSnapshot("GtfsFeedLoader.executeMultiFileValidators", null);
+ executeMultiFileValidators(validatorProvider, noticeContainer, feed, exec);
+ MemoryUsageRegister.getInstance()
+ .registerMemoryUsage(
+ "GtfsFeedLoader.executeMultiFileValidators", beforeMultiFileValidators);
+
return feed;
} finally {
exec.shutdown();
}
}
+ private static void loadTables(
+ NoticeContainer noticeContainer,
+ ExecutorService exec,
+ List> loaderCallables,
+ ArrayList> tableContainers)
+ throws InterruptedException {
+ for (Future futureContainer : exec.invokeAll(loaderCallables)) {
+ try {
+ TableAndNoticeContainers containers = futureContainer.get();
+ tableContainers.add(containers.tableContainer);
+ noticeContainer.addAll(containers.noticeContainer);
+ } catch (ExecutionException e) {
+ // All runtime exceptions should be caught above.
+ // ExecutionException is not expected to happen.
+ addThreadExecutionError(e, noticeContainer);
+ }
+ }
+ }
+
+ private void executeMultiFileValidators(
+ ValidatorProvider validatorProvider,
+ NoticeContainer noticeContainer,
+ GtfsFeedContainer feed,
+ ExecutorService exec)
+ throws InterruptedException {
+ List> validatorCallables = new ArrayList<>();
+ // Validators with parser-error dependencies will not be returned here, but instead added to
+ // the skippedValidators list.
+ for (FileValidator validator :
+ validatorProvider.createMultiFileValidators(
+ feed, multiFileValidatorsWithParsingErrors::add)) {
+ validatorCallables.add(
+ () -> {
+ NoticeContainer validatorNotices = new NoticeContainer();
+ ValidatorUtil.safeValidate(validator::validate, validator.getClass(), validatorNotices);
+ return validatorNotices;
+ });
+ }
+ collectMultiFileValidationNotices(noticeContainer, exec, validatorCallables);
+ }
+
+ private static void collectMultiFileValidationNotices(
+ NoticeContainer noticeContainer,
+ ExecutorService exec,
+ List> validatorCallables)
+ throws InterruptedException {
+ for (Future futureContainer : exec.invokeAll(validatorCallables)) {
+ try {
+ noticeContainer.addAll(futureContainer.get());
+ } catch (ExecutionException e) {
+ // All runtime exceptions should be caught above.
+ // ExecutionException is not expected to happen.
+ addThreadExecutionError(e, noticeContainer);
+ }
+ }
+ }
+
/** Adds a ThreadExecutionError to the notice container. */
private static void addThreadExecutionError(
ExecutionException e, NoticeContainer noticeContainer) {
diff --git a/main/src/main/java/org/mobilitydata/gtfsvalidator/report/JsonReportSummary.java b/main/src/main/java/org/mobilitydata/gtfsvalidator/report/JsonReportSummary.java
index b5ba4ed1a9..4e79ff3faa 100644
--- a/main/src/main/java/org/mobilitydata/gtfsvalidator/report/JsonReportSummary.java
+++ b/main/src/main/java/org/mobilitydata/gtfsvalidator/report/JsonReportSummary.java
@@ -6,6 +6,7 @@
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
+import org.mobilitydata.gtfsvalidator.performance.MemoryUsage;
import org.mobilitydata.gtfsvalidator.report.model.AgencyMetadata;
import org.mobilitydata.gtfsvalidator.report.model.FeedMetadata;
import org.mobilitydata.gtfsvalidator.runner.ValidationRunnerConfig;
@@ -34,6 +35,7 @@ public class JsonReportSummary {
private List agencies;
private Set files;
private Double validationTimeSeconds;
+ public List memoryUsageRecords;
@SerializedName("counts")
private JsonReportCounts jsonReportCounts;
@@ -67,6 +69,7 @@ public JsonReportSummary(
if (feedMetadata.feedInfo != null) {
this.feedInfo = new JsonReportFeedInfo(feedMetadata.feedInfo);
this.validationTimeSeconds = feedMetadata.validationTimeSeconds;
+ this.memoryUsageRecords = feedMetadata.memoryUsageRecords;
} else {
logger.atSevere().log(
"No feed info for feed "
diff --git a/main/src/main/java/org/mobilitydata/gtfsvalidator/report/model/FeedMetadata.java b/main/src/main/java/org/mobilitydata/gtfsvalidator/report/model/FeedMetadata.java
index b2a4813eee..9292d644c4 100644
--- a/main/src/main/java/org/mobilitydata/gtfsvalidator/report/model/FeedMetadata.java
+++ b/main/src/main/java/org/mobilitydata/gtfsvalidator/report/model/FeedMetadata.java
@@ -8,6 +8,8 @@
import java.time.format.DateTimeFormatter;
import java.util.*;
import java.util.function.Function;
+import org.mobilitydata.gtfsvalidator.performance.MemoryUsage;
+import org.mobilitydata.gtfsvalidator.performance.MemoryUsageRegister;
import org.mobilitydata.gtfsvalidator.table.*;
import org.mobilitydata.gtfsvalidator.util.CalendarUtil;
import org.mobilitydata.gtfsvalidator.util.ServicePeriod;
@@ -54,6 +56,7 @@ public class FeedMetadata {
public double validationTimeSeconds;
+ public List memoryUsageRecords;
// List of features that only require checking the presence of one record in the file.
private final List> FILE_BASED_FEATURES =
List.of(
@@ -110,6 +113,7 @@ public static FeedMetadata from(GtfsFeedContainer feedContainer, ImmutableSet
Date: Mon, 30 Sep 2024 17:04:51 -0400
Subject: [PATCH 02/30] downgrade aspectj dependecies to be compatible with jdk
11
---
core/build.gradle | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/core/build.gradle b/core/build.gradle
index 1e02f6c615..019cba6bad 100644
--- a/core/build.gradle
+++ b/core/build.gradle
@@ -18,7 +18,7 @@ plugins {
id 'java'
id 'maven-publish'
id 'signing'
- id 'io.freefair.aspectj.post-compile-weaving' version '6.4.1' // Add AspectJ plugin
+ id 'io.freefair.aspectj.post-compile-weaving' version '6.4.1'
}
//publishing {
@@ -43,8 +43,8 @@ dependencies {
implementation 'com.googlecode.libphonenumber:libphonenumber:8.12.13'
implementation 'com.google.flogger:flogger:0.6'
implementation 'io.github.classgraph:classgraph:4.8.146'
- implementation 'org.aspectj:aspectjrt:1.9.22.1'
- implementation 'org.aspectj:aspectjweaver:1.9.22.1'
+ implementation 'org.aspectj:aspectjrt:1.9.20'
+ implementation 'org.aspectj:aspectjweaver:1.9.20'
testImplementation 'com.google.flogger:flogger-system-backend:0.6'
testImplementation group: 'junit', name: 'junit', version: '4.13'
testImplementation "com.google.truth:truth:1.0.1"
From 5f6c71b28f2314e53d3b91fdbf5bdf18ed80f1d1 Mon Sep 17 00:00:00 2001
From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com>
Date: Tue, 1 Oct 2024 16:57:12 -0400
Subject: [PATCH 03/30] add memory usage to validator comparator
---
.../io/ValidationReportDeserializer.java | 19 ++-
.../gtfsvalidator/model/ValidationReport.java | 14 +-
.../io/BoundedPriorityQueue.java | 56 +++++++
.../io/DatasetMemoryUsage.java | 47 ++++++
.../io/MemoryUsageUsedMemoryComparator.java | 30 ++++
.../io/ValidationPerformanceCollector.java | 139 ++++++++++++++++++
.../ValidationPerformanceCollectorTest.java | 65 +++++++-
7 files changed, 356 insertions(+), 14 deletions(-)
create mode 100644 output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/BoundedPriorityQueue.java
create mode 100644 output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java
create mode 100644 output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/MemoryUsageUsedMemoryComparator.java
diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/io/ValidationReportDeserializer.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/io/ValidationReportDeserializer.java
index 3197be8f81..d088be3285 100644
--- a/core/src/main/java/org/mobilitydata/gtfsvalidator/io/ValidationReportDeserializer.java
+++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/io/ValidationReportDeserializer.java
@@ -23,16 +23,13 @@
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import java.lang.reflect.Type;
-import java.util.Collection;
-import java.util.LinkedHashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
+import java.util.*;
import org.mobilitydata.gtfsvalidator.model.NoticeReport;
import org.mobilitydata.gtfsvalidator.model.ValidationReport;
import org.mobilitydata.gtfsvalidator.notice.Notice;
import org.mobilitydata.gtfsvalidator.notice.NoticeContainer;
import org.mobilitydata.gtfsvalidator.notice.ResolvedNotice;
+import org.mobilitydata.gtfsvalidator.performance.MemoryUsage;
/**
* Used to (de)serialize a JSON validation report. This represents a validation report as a list of
@@ -44,6 +41,7 @@ public class ValidationReportDeserializer implements JsonDeserializer memoryUsageRecords = null;
if (rootObject.has(SUMMARY_MEMBER_NAME)) {
JsonObject summaryObject = rootObject.getAsJsonObject(SUMMARY_MEMBER_NAME);
if (summaryObject.has(VALIDATION_TIME_MEMBER_NAME)) {
validationTimeSeconds = summaryObject.get(VALIDATION_TIME_MEMBER_NAME).getAsDouble();
}
+ if (summaryObject.has(MEMORY_USAGE_RECORDS_MEMBER_NAME)) {
+ JsonArray memoryUsageArray = summaryObject.getAsJsonArray(MEMORY_USAGE_RECORDS_MEMBER_NAME);
+ memoryUsageRecords = new ArrayList<>();
+ for (JsonElement element : memoryUsageArray) {
+ MemoryUsage memoryUsage = Notice.GSON.fromJson(element, MemoryUsage.class);
+ memoryUsageRecords.add(memoryUsage);
+ }
+ }
}
JsonArray noticesArray = rootObject.getAsJsonArray(NOTICES_MEMBER_NAME);
for (JsonElement childObject : noticesArray) {
notices.add(Notice.GSON.fromJson(childObject, NoticeReport.class));
}
- return new ValidationReport(notices, validationTimeSeconds);
+ return new ValidationReport(notices, validationTimeSeconds, memoryUsageRecords);
}
public static JsonObject serialize(
diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/model/ValidationReport.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/model/ValidationReport.java
index 5ea4c76a35..c1a3da670e 100644
--- a/core/src/main/java/org/mobilitydata/gtfsvalidator/model/ValidationReport.java
+++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/model/ValidationReport.java
@@ -23,8 +23,10 @@
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Collections;
+import java.util.List;
import java.util.Set;
import org.mobilitydata.gtfsvalidator.io.ValidationReportDeserializer;
+import org.mobilitydata.gtfsvalidator.performance.MemoryUsage;
/**
* Used to (de)serialize a {@code NoticeContainer}. This represents a validation report as a list of
@@ -42,6 +44,7 @@ public class ValidationReport {
.create();
private final Set notices;
private final Double validationTimeSeconds;
+ private List memoryUsageRecords;
/**
* Public constructor needed for deserialization by {@code ValidationReportDeserializer}. Only
@@ -50,7 +53,7 @@ public class ValidationReport {
* @param noticeReports set of {@code NoticeReport}s
*/
public ValidationReport(Set noticeReports) {
- this(noticeReports, null);
+ this(noticeReports, null, null);
}
/**
@@ -60,9 +63,13 @@ public ValidationReport(Set noticeReports) {
* @param noticeReports set of {@code NoticeReport}s
* @param validationTimeSeconds the time taken to validate the GTFS dataset
*/
- public ValidationReport(Set noticeReports, Double validationTimeSeconds) {
+ public ValidationReport(
+ Set noticeReports,
+ Double validationTimeSeconds,
+ List memoryUsageRecords) {
this.notices = Collections.unmodifiableSet(noticeReports);
this.validationTimeSeconds = validationTimeSeconds;
+ this.memoryUsageRecords = memoryUsageRecords;
}
/**
@@ -86,6 +93,9 @@ public Double getValidationTimeSeconds() {
return validationTimeSeconds;
}
+ public List getMemoryUsageRecords() {
+ return memoryUsageRecords;
+ }
/**
* Determines if two validation reports are equal regardless of the order of the fields in the set
* of {@code NoticeReport}.
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/BoundedPriorityQueue.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/BoundedPriorityQueue.java
new file mode 100644
index 0000000000..8e49308ffd
--- /dev/null
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/BoundedPriorityQueue.java
@@ -0,0 +1,56 @@
+package org.mobilitydata.gtfsvalidator.outputcomparator.io;
+
+import java.util.Comparator;
+import java.util.PriorityQueue;
+
+/**
+ * A bounded priority queue that keeps the N smallest elements. If the queue is full and a new
+ * element is offered, the largest element is removed. The smallest element is computed using a
+ * comparator or its natural order.
+ *
+ * @param
+ */
+public class BoundedPriorityQueue extends PriorityQueue {
+ private final int maxCapacity;
+
+ public BoundedPriorityQueue(int maxCapacity) {
+ super();
+ if (maxCapacity <= 0) {
+ throw new IllegalArgumentException("Max capacity must be greater than zero");
+ }
+ this.maxCapacity = maxCapacity;
+ }
+
+ public BoundedPriorityQueue(int maxCapacity, int initialCapacity, Comparator comparator) {
+ super(initialCapacity, comparator);
+ if (maxCapacity <= 0) {
+ throw new IllegalArgumentException("Max capacity must be greater than zero");
+ }
+ this.maxCapacity = maxCapacity;
+ }
+
+ @Override
+ public boolean offer(E e) {
+ if (size() >= maxCapacity) {
+ E head = peek();
+ if (head != null && compare(e, head) > 0) {
+ poll();
+ } else {
+ return false;
+ }
+ }
+ return super.offer(e);
+ }
+
+ @SuppressWarnings("unchecked")
+ private int compare(E a, E b) {
+ if (comparator() != null) {
+ return comparator().compare(a, b);
+ }
+ return ((Comparable super E>) a).compareTo(b);
+ }
+
+ public int getMaxCapacity() {
+ return maxCapacity;
+ }
+}
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java
new file mode 100644
index 0000000000..489606ac05
--- /dev/null
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java
@@ -0,0 +1,47 @@
+package org.mobilitydata.gtfsvalidator.outputcomparator.io;
+
+import org.mobilitydata.gtfsvalidator.performance.MemoryUsage;
+
+public class DatasetMemoryUsage {
+
+ private String datasetId;
+ private String key;
+ private MemoryUsage referenceMemoryUsage;
+ private MemoryUsage latestMemoryUsage;
+
+ public DatasetMemoryUsage(
+ String datasetId, MemoryUsage referenceMemoryUsage, MemoryUsage latestMemoryUsage) {
+ this.datasetId = datasetId;
+ this.key = referenceMemoryUsage != null ? referenceMemoryUsage.key() : latestMemoryUsage.key();
+ this.referenceMemoryUsage = referenceMemoryUsage;
+ this.latestMemoryUsage = latestMemoryUsage;
+ }
+
+ public String getDatasetId() {
+ return datasetId;
+ }
+
+ public void setDatasetId(String datasetId) {
+ this.datasetId = datasetId;
+ }
+
+ public MemoryUsage getReferenceMemoryUsage() {
+ return referenceMemoryUsage;
+ }
+
+ public void setReferenceMemoryUsage(MemoryUsage referenceMemoryUsage) {
+ this.referenceMemoryUsage = referenceMemoryUsage;
+ }
+
+ public MemoryUsage getLatestMemoryUsage() {
+ return latestMemoryUsage;
+ }
+
+ public void setLatestMemoryUsage(MemoryUsage latestMemoryUsage) {
+ this.latestMemoryUsage = latestMemoryUsage;
+ }
+
+ public String getKey() {
+ return key;
+ }
+}
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/MemoryUsageUsedMemoryComparator.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/MemoryUsageUsedMemoryComparator.java
new file mode 100644
index 0000000000..1145d46b31
--- /dev/null
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/MemoryUsageUsedMemoryComparator.java
@@ -0,0 +1,30 @@
+package org.mobilitydata.gtfsvalidator.outputcomparator.io;
+
+import java.util.Comparator;
+
+/** A comparator for MemoryUsage objects that compares them based on the used memory. */
+public class MemoryUsageUsedMemoryComparator implements Comparator {
+
+ @Override
+ public int compare(DatasetMemoryUsage o1, DatasetMemoryUsage o2) {
+ if (o1 == o2) {
+ return 0;
+ }
+ if (o1 == null || o2 == null) {
+ return o1 == null ? -1 : 1;
+ }
+ if (o1.getReferenceMemoryUsage() == null && o2.getLatestMemoryUsage() == null) {
+ return 0;
+ }
+ if (o1.getReferenceMemoryUsage() == null || o2.getLatestMemoryUsage() == null) {
+ return o1.getReferenceMemoryUsage() == null ? -1 : 1;
+ }
+ if (o1.getReferenceMemoryUsage().usedMemory() < o2.getLatestMemoryUsage().usedMemory()) {
+ return -1;
+ }
+ if (o1.getReferenceMemoryUsage().usedMemory() > o2.getLatestMemoryUsage().usedMemory()) {
+ return 1;
+ }
+ return 0;
+ }
+}
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
index eadd1861ee..c6746f3c4d 100644
--- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
@@ -1,17 +1,26 @@
package org.mobilitydata.gtfsvalidator.outputcomparator.io;
import java.util.*;
+import java.util.stream.Collectors;
import org.mobilitydata.gtfsvalidator.model.ValidationReport;
import org.mobilitydata.gtfsvalidator.outputcomparator.model.report.ValidationPerformance;
+import org.mobilitydata.gtfsvalidator.performance.MemoryUsage;
public class ValidationPerformanceCollector {
+ private static final int MEMORY_USAGE_COMPARE_MAX = 20;
private final Map referenceTimes;
private final Map latestTimes;
+ private final Map>
+ largestFirstMemoryUsageBySourceId;
+ private final Map>
+ smallestFirstMemoryMapBySourceId;
public ValidationPerformanceCollector() {
this.referenceTimes = new HashMap<>();
this.latestTimes = new HashMap<>();
+ this.largestFirstMemoryUsageBySourceId = new HashMap<>();
+ this.smallestFirstMemoryMapBySourceId = new HashMap<>();
}
public void addReferenceTime(String sourceId, Double time) {
@@ -69,6 +78,21 @@ private String formatMetrics(String metric, String datasetId, Double reference,
"| %s | %s | %.2f | %.2f | %s |\n", metric, datasetId, reference, latest, diff);
}
+ private static String getMemoryDiff(Long reference, Long latest) {
+ String diff;
+ if (reference == null || latest == null) {
+ diff = "N/A";
+ } else {
+ long difference = latest - reference;
+ if (difference == 0) {
+ return "-";
+ }
+ String arrow = difference > 0 ? "⬆️+" : "⬇️";
+ diff = String.format("%s%s", arrow, MemoryUsage.convertToHumanReadableMemory(difference));
+ }
+ return diff;
+ }
+
public String generateLogString() {
StringBuilder b = new StringBuilder();
b.append("### ⏱️ Performance Assessment\n")
@@ -176,11 +200,72 @@ public String generateLogString() {
.append(String.join(", ", warnings))
.append("\n\n");
}
+
+ if (smallestFirstMemoryMapBySourceId.size() > 0
+ || largestFirstMemoryUsageBySourceId.size() > 0) {
+ b.append("📜 Memory Consumption\n");
+ addMemoryUsageReport(smallestFirstMemoryMapBySourceId, "decreased", b);
+ addMemoryUsageReport(largestFirstMemoryUsageBySourceId, "increased", b);
+ }
+
b.append("\n\n");
return b.toString();
}
+ private void addMemoryUsageReport(
+ Map> queueMap,
+ String order,
+ StringBuilder b) {
+ b.append(
+ String.format(
+ "List of %s datasets where memory has %s .
\n",
+ MEMORY_USAGE_COMPARE_MAX, order))
+ .append("\n")
+ .append(
+ "| Key(Used Memory) | Dataset ID | Reference (s) | Latest (s) | Difference (s) |\n")
+ .append(
+ "|-----------------------------|-------------------|----------------|----------------|----------------|\n");
+
+ queueMap.keySet().stream()
+ .forEachOrdered(
+ sourceId -> {
+ var pq = queueMap.get(sourceId);
+ List datasetMemoryUsages =
+ Arrays.asList(pq.toArray(new DatasetMemoryUsage[pq.size()]));
+ Collections.sort(datasetMemoryUsages, pq.comparator());
+ generateMemoryLogByKey(datasetMemoryUsages, b);
+ });
+ }
+
+ private static void generateMemoryLogByKey(
+ List memoryIncreases, StringBuilder b) {
+ memoryIncreases.stream()
+ .forEachOrdered(
+ item -> {
+ String usedMemoryDiff =
+ getMemoryDiff(
+ item.getReferenceMemoryUsage() != null
+ ? item.getReferenceMemoryUsage().usedMemory()
+ : null,
+ item.getLatestMemoryUsage() != null
+ ? item.getLatestMemoryUsage().usedMemory()
+ : null);
+ b.append(
+ String.format(
+ "| %s | %s | %s | %s | %s |\n",
+ item.getKey(),
+ item.getDatasetId(),
+ item.getReferenceMemoryUsage() != null
+ ? item.getReferenceMemoryUsage().usedMemory()
+ : "-",
+ item.getLatestMemoryUsage() != null
+ ? item.getLatestMemoryUsage().usedMemory()
+ : "-",
+ usedMemoryDiff));
+ });
+ }
+
public void compareValidationReports(
String sourceId, ValidationReport referenceReport, ValidationReport latestReport) {
if (referenceReport.getValidationTimeSeconds() != null) {
@@ -189,6 +274,60 @@ public void compareValidationReports(
if (latestReport.getValidationTimeSeconds() != null) {
addLatestTime(sourceId, latestReport.getValidationTimeSeconds());
}
+
+ compareValidationReportMemoryUsage(sourceId, referenceReport, latestReport);
+ }
+
+ private void compareValidationReportMemoryUsage(
+ String sourceId, ValidationReport referenceReport, ValidationReport latestReport) {
+ Set keys =
+ referenceReport.getMemoryUsageRecords() != null
+ ? referenceReport.getMemoryUsageRecords().stream()
+ .map(MemoryUsage::key)
+ .collect(Collectors.toSet())
+ : Collections.EMPTY_SET;
+ if (latestReport.getMemoryUsageRecords() != null) {
+ keys.addAll(
+ latestReport.getMemoryUsageRecords().stream()
+ .map(MemoryUsage::key)
+ .collect(Collectors.toSet()));
+ }
+ Map referenceMap =
+ referenceReport.getMemoryUsageRecords() != null
+ ? referenceReport.getMemoryUsageRecords().stream()
+ .collect(Collectors.toMap(MemoryUsage::key, memoryUsage -> memoryUsage))
+ : new HashMap<>();
+ Map latestMap =
+ referenceReport.getMemoryUsageRecords() != null
+ ? latestReport.getMemoryUsageRecords().stream()
+ .collect(Collectors.toMap(MemoryUsage::key, memoryUsage -> memoryUsage))
+ : new HashMap<>();
+ keys.stream()
+ .forEachOrdered(
+ key -> {
+ var datasetMemoryUsage =
+ new DatasetMemoryUsage(sourceId, referenceMap.get(key), latestMap.get(key));
+ BoundedPriorityQueue decreasingQueue =
+ largestFirstMemoryUsageBySourceId.get(sourceId);
+ BoundedPriorityQueue increasingQueue =
+ smallestFirstMemoryMapBySourceId.get(sourceId);
+ if (decreasingQueue == null) {
+ decreasingQueue =
+ new BoundedPriorityQueue<>(
+ MEMORY_USAGE_COMPARE_MAX,
+ 2,
+ (new MemoryUsageUsedMemoryComparator()).reversed());
+ largestFirstMemoryUsageBySourceId.put(sourceId, decreasingQueue);
+ increasingQueue =
+ new BoundedPriorityQueue<>(
+ MEMORY_USAGE_COMPARE_MAX, 2, new MemoryUsageUsedMemoryComparator());
+ smallestFirstMemoryMapBySourceId.put(sourceId, increasingQueue);
+ }
+ if (referenceMap.containsKey(key) || latestMap.containsKey(key)) {
+ increasingQueue.offer(datasetMemoryUsage);
+ decreasingQueue.offer(datasetMemoryUsage);
+ }
+ });
}
public List toReport() {
diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java
index 962331fc32..f5c4971056 100644
--- a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java
+++ b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java
@@ -2,7 +2,11 @@
import static com.google.common.truth.Truth.assertThat;
+import java.util.Arrays;
+import java.util.Collections;
import org.junit.Test;
+import org.mobilitydata.gtfsvalidator.model.ValidationReport;
+import org.mobilitydata.gtfsvalidator.performance.MemoryUsage;
public class ValidationPerformanceCollectorTest {
@@ -11,13 +15,43 @@ public void generateLogString_test() {
ValidationPerformanceCollector collector = new ValidationPerformanceCollector();
// Adding some sample data
- collector.addReferenceTime("feed-id-a", 12.0);
- collector.addReferenceTime("feed-id-a", 14.0);
- collector.addLatestTime("feed-id-a", 16.0);
- collector.addLatestTime("feed-id-a", 18.0);
+ long baseMemory = 1000000;
+ // Memory usage latest null
+ collector.compareValidationReports(
+ "feed-id-a",
+ new ValidationReport(
+ Collections.EMPTY_SET,
+ 12.0,
+ Arrays.asList(
+ MemoryUsage.create("key1", baseMemory, baseMemory, 200, 50L),
+ MemoryUsage.create("key2", baseMemory, baseMemory, 200, 50L))),
+ new ValidationReport(Collections.EMPTY_SET, 16.0, Collections.EMPTY_LIST));
+ // Memory usage decreased
+ collector.compareValidationReports(
+ "feed-id-a",
+ new ValidationReport(
+ Collections.EMPTY_SET,
+ 14.0,
+ Arrays.asList(
+ MemoryUsage.create("key3", baseMemory, baseMemory - 1000, 200, 50L),
+ MemoryUsage.create("key4", baseMemory, baseMemory - 1000, 200, 50L))),
+ new ValidationReport(
+ Collections.EMPTY_SET,
+ 18.0,
+ Arrays.asList(
+ MemoryUsage.create("key3", baseMemory, baseMemory - baseMemory / 2, 200, null),
+ MemoryUsage.create("key4", baseMemory, baseMemory - baseMemory / 2, 200, null))));
- collector.addReferenceTime("feed-id-b", 20.0);
- collector.addLatestTime("feed-id-b", 22.0);
+ // Memory usage decreased
+ collector.compareValidationReports(
+ "feed-id-b",
+ new ValidationReport(
+ Collections.EMPTY_SET,
+ 20.0,
+ Arrays.asList(
+ MemoryUsage.create("key3", baseMemory, baseMemory * 2, 200, null),
+ MemoryUsage.create("key4", baseMemory, baseMemory * 2, 200, null))),
+ new ValidationReport(Collections.EMPTY_SET, 22.0, Collections.EMPTY_LIST));
// Generating the log string
String logString = collector.generateLogString();
@@ -37,6 +71,25 @@ public void generateLogString_test() {
+ "| Maximum in Reference Reports | feed-id-b | 20.00 | 22.00 | ⬆️+2.00 |\n"
+ "| Minimum in Latest Reports | feed-id-a | 14.00 | 18.00 | ⬆\uFE0F+4.00 |\n"
+ "| Maximum in Latest Reports | feed-id-b | 20.00 | 22.00 | ⬆️+2.00 |\n"
+ + "📜 Memory Consumption\n"
+ + "List of 20 datasets where memory has decreased .
\n\n"
+ + "| Key(Used Memory) | Dataset ID | Reference (s) | Latest (s) | Difference (s) |\n"
+ + "|-----------------------------|-------------------|----------------|----------------|----------------|\n"
+ + "| key1 | feed-id-a | 0 | - | N/A |\n"
+ + "| key2 | feed-id-a | 0 | - | N/A |\n"
+ + "| key4 | feed-id-a | 1000 | 500000 | ⬆️+487.30 KiB |\n"
+ + "| key3 | feed-id-a | 1000 | 500000 | ⬆️+487.30 KiB |\n"
+ + "| key3 | feed-id-b | -1000000 | - | N/A |\n"
+ + "| key4 | feed-id-b | -1000000 | - | N/A |\n"
+ + "List of 20 datasets where memory has increased .
\n\n"
+ + "| Key(Used Memory) | Dataset ID | Reference (s) | Latest (s) | Difference (s) |\n"
+ + "|-----------------------------|-------------------|----------------|----------------|----------------|\n"
+ + "| key3 | feed-id-a | 1000 | 500000 | ⬆️+487.30 KiB |\n"
+ + "| key4 | feed-id-a | 1000 | 500000 | ⬆️+487.30 KiB |\n"
+ + "| key1 | feed-id-a | 0 | - | N/A |\n"
+ + "| key2 | feed-id-a | 0 | - | N/A |\n"
+ + "| key3 | feed-id-b | -1000000 | - | N/A |\n"
+ + "| key4 | feed-id-b | -1000000 | - | N/A |\n"
+ "\n\n";
// Assert that the generated log string matches the expected log string
assertThat(logString).isEqualTo(expectedLogString);
From 848d798d956c9c464d3264df28e2f96138875900 Mon Sep 17 00:00:00 2001
From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com>
Date: Tue, 1 Oct 2024 16:59:13 -0400
Subject: [PATCH 04/30] run acceptance tests with sample data
---
.github/workflows/acceptance_test.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/acceptance_test.yml b/.github/workflows/acceptance_test.yml
index 4759f4172e..ce9c650fbc 100644
--- a/.github/workflows/acceptance_test.yml
+++ b/.github/workflows/acceptance_test.yml
@@ -127,7 +127,7 @@ jobs:
- name: Set URL matrix
id: set-matrix
run: |
- DATASETS=$(python3 scripts/mobility-database-harvester/harvest_latest_versions.py -d scripts/mobility-database-harvester/datasets_metadata -l gtfs_latest_versions.json)
+ DATASETS=$(python3 scripts/mobility-database-harvester/harvest_latest_versions.py -d scripts/mobility-database-harvester/datasets_metadata -l gtfs_latest_versions.json -s)
echo $DATASETS
echo "matrix=$DATASETS" >> $GITHUB_OUTPUT
- name: Persist metadata
From e89816ced5c46f7294481997f2d4f5661b1e4a64 Mon Sep 17 00:00:00 2001
From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com>
Date: Tue, 1 Oct 2024 22:01:09 -0400
Subject: [PATCH 05/30] fix memory usage serialization
---
.../performance/MemoryUsage.java | 142 +++++++++++++++---
.../performance/MemoryUsageRegister.java | 4 +-
.../io/DatasetMemoryUsage.java | 3 +-
.../io/ValidationPerformanceCollector.java | 8 +-
.../ValidationPerformanceCollectorTest.java | 16 +-
5 files changed, 133 insertions(+), 40 deletions(-)
diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsage.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsage.java
index fb5982fb49..4e3b01b9b1 100644
--- a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsage.java
+++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsage.java
@@ -1,17 +1,26 @@
package org.mobilitydata.gtfsvalidator.performance;
-import com.google.auto.value.AutoValue;
import java.text.DecimalFormat;
-import javax.annotation.Nullable;
import org.apache.commons.lang3.StringUtils;
-@AutoValue
-public abstract class MemoryUsage {
+public class MemoryUsage {
private static final DecimalFormat TWO_DECIMAL_FORMAT = new DecimalFormat("0.00");
- public static MemoryUsage create(
- String key, long totalMemory, long freeMemory, long maxMemory, Long memoryDiff) {
- return new AutoValue_MemoryUsage(key, totalMemory, freeMemory, maxMemory, memoryDiff);
+ private String key;
+ private long totalMemory;
+ private long freeMemory;
+ private long maxMemory;
+ private Long diffMemory;
+
+ public MemoryUsage() {}
+
+ public MemoryUsage(
+ String key, long totalMemory, long freeMemory, long maxMemory, Long diffMemory) {
+ this.key = key;
+ this.totalMemory = totalMemory;
+ this.freeMemory = freeMemory;
+ this.maxMemory = maxMemory;
+ this.diffMemory = diffMemory;
}
public static String convertToHumanReadableMemory(Long size) {
@@ -36,34 +45,117 @@ public static String convertToHumanReadableMemory(Long size) {
return TWO_DECIMAL_FORMAT.format(size / 1099511627776L) + " TiB";
}
- public abstract String key();
-
- public abstract long totalMemory();
-
- public abstract long freeMemory();
-
- public abstract long maxMemory();
-
- @Nullable
- public abstract Long diffMemory();
-
public long usedMemory() {
- return totalMemory() - freeMemory();
+ return totalMemory - freeMemory;
}
public String humanReadablePrint() {
StringBuffer result = new StringBuffer();
result.append("Memory usage registered");
- if (StringUtils.isNotBlank(key())) {
- result.append(" for key: ").append(key());
+ if (StringUtils.isNotBlank(key)) {
+ result.append(" for key: ").append(key);
} else {
result.append(":");
}
- result.append(" Max: ").append(convertToHumanReadableMemory(maxMemory()));
- result.append(" Total: ").append(convertToHumanReadableMemory(totalMemory()));
- result.append(" Free: ").append(convertToHumanReadableMemory(freeMemory()));
+ result.append(" Max: ").append(convertToHumanReadableMemory(maxMemory));
+ result.append(" Total: ").append(convertToHumanReadableMemory(totalMemory));
+ result.append(" Free: ").append(convertToHumanReadableMemory(freeMemory));
result.append(" Used: ").append(convertToHumanReadableMemory(usedMemory()));
- result.append(" Diff: ").append(convertToHumanReadableMemory(diffMemory()));
+ result.append(" Diff: ").append(convertToHumanReadableMemory(diffMemory));
return result.toString();
}
+
+ public String getKey() {
+ return key;
+ }
+
+ public void setKey(String key) {
+ this.key = key;
+ }
+
+ public long getTotalMemory() {
+ return totalMemory;
+ }
+
+ public void setTotalMemory(long totalMemory) {
+ this.totalMemory = totalMemory;
+ }
+
+ public long getFreeMemory() {
+ return freeMemory;
+ }
+
+ public void setFreeMemory(long freeMemory) {
+ this.freeMemory = freeMemory;
+ }
+
+ public long getMaxMemory() {
+ return maxMemory;
+ }
+
+ public void setMaxMemory(long maxMemory) {
+ this.maxMemory = maxMemory;
+ }
+
+ public Long getDiffMemory() {
+ return diffMemory;
+ }
+
+ public void setDiffMemory(Long diffMemory) {
+ this.diffMemory = diffMemory;
+ }
+
+ @Override
+ public String toString() {
+ return "MemoryUsage{"
+ + "key="
+ + key
+ + ", "
+ + "totalMemory="
+ + totalMemory
+ + ", "
+ + "freeMemory="
+ + freeMemory
+ + ", "
+ + "maxMemory="
+ + maxMemory
+ + ", "
+ + "diffMemory="
+ + diffMemory
+ + "}";
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o == this) {
+ return true;
+ }
+ if (o instanceof MemoryUsage) {
+ MemoryUsage that = (MemoryUsage) o;
+ return this.key.equals(that.getKey())
+ && this.totalMemory == that.getTotalMemory()
+ && this.freeMemory == that.getFreeMemory()
+ && this.maxMemory == that.getMaxMemory()
+ && (this.diffMemory == null
+ ? that.getDiffMemory() == null
+ : this.getDiffMemory().equals(that.getDiffMemory()));
+ }
+ return false;
+ }
+
+ @Override
+ public int hashCode() {
+ int h$ = 1;
+ h$ *= 1000003;
+ h$ ^= key.hashCode();
+ h$ *= 1000003;
+ h$ ^= (int) ((totalMemory >>> 32) ^ totalMemory);
+ h$ *= 1000003;
+ h$ ^= (int) ((freeMemory >>> 32) ^ freeMemory);
+ h$ *= 1000003;
+ h$ ^= (int) ((maxMemory >>> 32) ^ maxMemory);
+ h$ *= 1000003;
+ h$ ^= (diffMemory == null) ? 0 : diffMemory.hashCode();
+ return h$;
+ }
}
diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsageRegister.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsageRegister.java
index 38c20b8baf..e01a71948c 100644
--- a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsageRegister.java
+++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsageRegister.java
@@ -27,9 +27,9 @@ public List getRegistry() {
public MemoryUsage getMemoryUsageSnapshot(String key, MemoryUsage previous) {
Long memoryDiff = null;
if (previous != null) {
- memoryDiff = runtime.freeMemory() - previous.freeMemory();
+ memoryDiff = runtime.freeMemory() - previous.getFreeMemory();
}
- return MemoryUsage.create(
+ return new MemoryUsage(
key, runtime.totalMemory(), runtime.freeMemory(), runtime.maxMemory(), memoryDiff);
}
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java
index 489606ac05..ca99d20684 100644
--- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java
@@ -12,7 +12,8 @@ public class DatasetMemoryUsage {
public DatasetMemoryUsage(
String datasetId, MemoryUsage referenceMemoryUsage, MemoryUsage latestMemoryUsage) {
this.datasetId = datasetId;
- this.key = referenceMemoryUsage != null ? referenceMemoryUsage.key() : latestMemoryUsage.key();
+ this.key =
+ referenceMemoryUsage != null ? referenceMemoryUsage.getKey() : latestMemoryUsage.getKey();
this.referenceMemoryUsage = referenceMemoryUsage;
this.latestMemoryUsage = latestMemoryUsage;
}
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
index c6746f3c4d..45f57ec6c2 100644
--- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
@@ -283,24 +283,24 @@ private void compareValidationReportMemoryUsage(
Set keys =
referenceReport.getMemoryUsageRecords() != null
? referenceReport.getMemoryUsageRecords().stream()
- .map(MemoryUsage::key)
+ .map(MemoryUsage::getKey)
.collect(Collectors.toSet())
: Collections.EMPTY_SET;
if (latestReport.getMemoryUsageRecords() != null) {
keys.addAll(
latestReport.getMemoryUsageRecords().stream()
- .map(MemoryUsage::key)
+ .map(MemoryUsage::getKey)
.collect(Collectors.toSet()));
}
Map referenceMap =
referenceReport.getMemoryUsageRecords() != null
? referenceReport.getMemoryUsageRecords().stream()
- .collect(Collectors.toMap(MemoryUsage::key, memoryUsage -> memoryUsage))
+ .collect(Collectors.toMap(MemoryUsage::getKey, memoryUsage -> memoryUsage))
: new HashMap<>();
Map latestMap =
referenceReport.getMemoryUsageRecords() != null
? latestReport.getMemoryUsageRecords().stream()
- .collect(Collectors.toMap(MemoryUsage::key, memoryUsage -> memoryUsage))
+ .collect(Collectors.toMap(MemoryUsage::getKey, memoryUsage -> memoryUsage))
: new HashMap<>();
keys.stream()
.forEachOrdered(
diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java
index f5c4971056..da33a6ccc5 100644
--- a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java
+++ b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java
@@ -23,8 +23,8 @@ public void generateLogString_test() {
Collections.EMPTY_SET,
12.0,
Arrays.asList(
- MemoryUsage.create("key1", baseMemory, baseMemory, 200, 50L),
- MemoryUsage.create("key2", baseMemory, baseMemory, 200, 50L))),
+ new MemoryUsage("key1", baseMemory, baseMemory, 200, 50L),
+ new MemoryUsage("key2", baseMemory, baseMemory, 200, 50L))),
new ValidationReport(Collections.EMPTY_SET, 16.0, Collections.EMPTY_LIST));
// Memory usage decreased
collector.compareValidationReports(
@@ -33,14 +33,14 @@ public void generateLogString_test() {
Collections.EMPTY_SET,
14.0,
Arrays.asList(
- MemoryUsage.create("key3", baseMemory, baseMemory - 1000, 200, 50L),
- MemoryUsage.create("key4", baseMemory, baseMemory - 1000, 200, 50L))),
+ new MemoryUsage("key3", baseMemory, baseMemory - 1000, 200, 50L),
+ new MemoryUsage("key4", baseMemory, baseMemory - 1000, 200, 50L))),
new ValidationReport(
Collections.EMPTY_SET,
18.0,
Arrays.asList(
- MemoryUsage.create("key3", baseMemory, baseMemory - baseMemory / 2, 200, null),
- MemoryUsage.create("key4", baseMemory, baseMemory - baseMemory / 2, 200, null))));
+ new MemoryUsage("key3", baseMemory, baseMemory - baseMemory / 2, 200, null),
+ new MemoryUsage("key4", baseMemory, baseMemory - baseMemory / 2, 200, null))));
// Memory usage decreased
collector.compareValidationReports(
@@ -49,8 +49,8 @@ public void generateLogString_test() {
Collections.EMPTY_SET,
20.0,
Arrays.asList(
- MemoryUsage.create("key3", baseMemory, baseMemory * 2, 200, null),
- MemoryUsage.create("key4", baseMemory, baseMemory * 2, 200, null))),
+ new MemoryUsage("key3", baseMemory, baseMemory * 2, 200, null),
+ new MemoryUsage("key4", baseMemory, baseMemory * 2, 200, null))),
new ValidationReport(Collections.EMPTY_SET, 22.0, Collections.EMPTY_LIST));
// Generating the log string
From b66828fa41069a327b36f900e83dd088bb8c7fbf Mon Sep 17 00:00:00 2001
From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com>
Date: Wed, 2 Oct 2024 09:23:06 -0400
Subject: [PATCH 06/30] fix performance collector
---
.../outputcomparator/io/ValidationPerformanceCollector.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
index 45f57ec6c2..eb4d4e4153 100644
--- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
@@ -285,7 +285,7 @@ private void compareValidationReportMemoryUsage(
? referenceReport.getMemoryUsageRecords().stream()
.map(MemoryUsage::getKey)
.collect(Collectors.toSet())
- : Collections.EMPTY_SET;
+ : new HashSet<>();
if (latestReport.getMemoryUsageRecords() != null) {
keys.addAll(
latestReport.getMemoryUsageRecords().stream()
From 0416d1cba56324dcd9fe73f1d91d0e491c9ff493 Mon Sep 17 00:00:00 2001
From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com>
Date: Wed, 2 Oct 2024 10:08:59 -0400
Subject: [PATCH 07/30] fix npe
---
.../outputcomparator/io/DatasetMemoryUsage.java | 6 ++++--
.../outputcomparator/io/ValidationPerformanceCollector.java | 2 +-
2 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java
index ca99d20684..c837d509f9 100644
--- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java
@@ -12,8 +12,10 @@ public class DatasetMemoryUsage {
public DatasetMemoryUsage(
String datasetId, MemoryUsage referenceMemoryUsage, MemoryUsage latestMemoryUsage) {
this.datasetId = datasetId;
- this.key =
- referenceMemoryUsage != null ? referenceMemoryUsage.getKey() : latestMemoryUsage.getKey();
+ this.key = referenceMemoryUsage != null ? referenceMemoryUsage.getKey() : null;
+ if (key == null) {
+ this.key = latestMemoryUsage.getKey() != null ? latestMemoryUsage.getKey() : null;
+ }
this.referenceMemoryUsage = referenceMemoryUsage;
this.latestMemoryUsage = latestMemoryUsage;
}
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
index eb4d4e4153..74319c2cd0 100644
--- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
@@ -298,7 +298,7 @@ private void compareValidationReportMemoryUsage(
.collect(Collectors.toMap(MemoryUsage::getKey, memoryUsage -> memoryUsage))
: new HashMap<>();
Map latestMap =
- referenceReport.getMemoryUsageRecords() != null
+ latestReport.getMemoryUsageRecords() != null
? latestReport.getMemoryUsageRecords().stream()
.collect(Collectors.toMap(MemoryUsage::getKey, memoryUsage -> memoryUsage))
: new HashMap<>();
From 0cc18335bcc200fc06cc2c844eb8057f77a6e6c1 Mon Sep 17 00:00:00 2001
From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com>
Date: Wed, 2 Oct 2024 11:22:28 -0400
Subject: [PATCH 08/30] support negative memory usage for logging
---
.../gtfsvalidator/performance/MemoryUsage.java | 18 ++++++++----------
1 file changed, 8 insertions(+), 10 deletions(-)
diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsage.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsage.java
index 4e3b01b9b1..f0126c23e9 100644
--- a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsage.java
+++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsage.java
@@ -23,26 +23,24 @@ public MemoryUsage(
this.diffMemory = diffMemory;
}
- public static String convertToHumanReadableMemory(Long size) {
- if (size == null) {
+ public static String convertToHumanReadableMemory(Long bytes) {
+ if (bytes == null) {
return "N/A";
}
- if (size <= 0) {
- return "0";
- }
+ long size = Math.abs(bytes);
if (size < 1024) {
- return size + " bytes";
+ return bytes + " bytes";
}
if (size < 1048576) {
- return TWO_DECIMAL_FORMAT.format(size / 1024.0) + " KiB";
+ return TWO_DECIMAL_FORMAT.format(Math.copySign(size / 1024.0, bytes)) + " KiB";
}
if (size < 1073741824) {
- return TWO_DECIMAL_FORMAT.format(size / 1048576.0) + " MiB";
+ return TWO_DECIMAL_FORMAT.format(Math.copySign(size / 1048576.0, bytes)) + " MiB";
}
if (size < 1099511627776L) {
- return TWO_DECIMAL_FORMAT.format(size / 1073741824.0) + " GiB";
+ return TWO_DECIMAL_FORMAT.format(Math.copySign(size / 1073741824.0, bytes)) + " GiB";
}
- return TWO_DECIMAL_FORMAT.format(size / 1099511627776L) + " TiB";
+ return TWO_DECIMAL_FORMAT.format(Math.copySign(size / 1099511627776L, bytes)) + " TiB";
}
public long usedMemory() {
From f6789c86ac29931c223b8a1f4a09a86e3188c63a Mon Sep 17 00:00:00 2001
From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com>
Date: Wed, 2 Oct 2024 16:42:33 -0400
Subject: [PATCH 09/30] simplifly memory usage report
---
.../io/DatasetMemoryUsage.java | 46 +++--
.../io/MemoryUsageUsedMemoryComparator.java | 30 ----
.../io/UsedMemoryIncreasedComparator.java | 57 ++++++
.../io/ValidationPerformanceCollector.java | 164 +++++++-----------
.../MemoryUsageUsedMemoryComparatorTest.java | 60 +++++++
.../ValidationPerformanceCollectorTest.java | 88 ++++++----
6 files changed, 268 insertions(+), 177 deletions(-)
delete mode 100644 output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/MemoryUsageUsedMemoryComparator.java
create mode 100644 output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java
create mode 100644 output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/MemoryUsageUsedMemoryComparatorTest.java
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java
index c837d509f9..452b7763ac 100644
--- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java
@@ -1,23 +1,37 @@
package org.mobilitydata.gtfsvalidator.outputcomparator.io;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
import org.mobilitydata.gtfsvalidator.performance.MemoryUsage;
public class DatasetMemoryUsage {
private String datasetId;
- private String key;
- private MemoryUsage referenceMemoryUsage;
- private MemoryUsage latestMemoryUsage;
+ private List referenceMemoryUsage;
+ private List latestMemoryUsage;
+ private Map referenceUsedMemoryByKey = Collections.unmodifiableMap(new HashMap<>());
+ private Map latestUsedMemoryByKey = Collections.unmodifiableMap(new HashMap<>());
public DatasetMemoryUsage(
- String datasetId, MemoryUsage referenceMemoryUsage, MemoryUsage latestMemoryUsage) {
+ String datasetId,
+ List referenceMemoryUsage,
+ List latestMemoryUsage) {
this.datasetId = datasetId;
- this.key = referenceMemoryUsage != null ? referenceMemoryUsage.getKey() : null;
- if (key == null) {
- this.key = latestMemoryUsage.getKey() != null ? latestMemoryUsage.getKey() : null;
- }
this.referenceMemoryUsage = referenceMemoryUsage;
this.latestMemoryUsage = latestMemoryUsage;
+ if (referenceMemoryUsage != null) {
+ this.referenceUsedMemoryByKey =
+ referenceMemoryUsage.stream()
+ .collect(Collectors.toUnmodifiableMap(MemoryUsage::getKey, MemoryUsage::usedMemory));
+ }
+ if (latestMemoryUsage != null) {
+ this.latestUsedMemoryByKey =
+ latestMemoryUsage.stream()
+ .collect(Collectors.toUnmodifiableMap(MemoryUsage::getKey, MemoryUsage::usedMemory));
+ }
}
public String getDatasetId() {
@@ -28,23 +42,27 @@ public void setDatasetId(String datasetId) {
this.datasetId = datasetId;
}
- public MemoryUsage getReferenceMemoryUsage() {
+ public List getReferenceMemoryUsage() {
return referenceMemoryUsage;
}
- public void setReferenceMemoryUsage(MemoryUsage referenceMemoryUsage) {
+ public void setReferenceMemoryUsage(List referenceMemoryUsage) {
this.referenceMemoryUsage = referenceMemoryUsage;
}
- public MemoryUsage getLatestMemoryUsage() {
+ public List getLatestMemoryUsage() {
return latestMemoryUsage;
}
- public void setLatestMemoryUsage(MemoryUsage latestMemoryUsage) {
+ public void setLatestMemoryUsage(List latestMemoryUsage) {
this.latestMemoryUsage = latestMemoryUsage;
}
- public String getKey() {
- return key;
+ public Map getReferenceUsedMemoryByKey() {
+ return referenceUsedMemoryByKey;
+ }
+
+ public Map getLatestUsedMemoryByKey() {
+ return latestUsedMemoryByKey;
}
}
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/MemoryUsageUsedMemoryComparator.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/MemoryUsageUsedMemoryComparator.java
deleted file mode 100644
index 1145d46b31..0000000000
--- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/MemoryUsageUsedMemoryComparator.java
+++ /dev/null
@@ -1,30 +0,0 @@
-package org.mobilitydata.gtfsvalidator.outputcomparator.io;
-
-import java.util.Comparator;
-
-/** A comparator for MemoryUsage objects that compares them based on the used memory. */
-public class MemoryUsageUsedMemoryComparator implements Comparator {
-
- @Override
- public int compare(DatasetMemoryUsage o1, DatasetMemoryUsage o2) {
- if (o1 == o2) {
- return 0;
- }
- if (o1 == null || o2 == null) {
- return o1 == null ? -1 : 1;
- }
- if (o1.getReferenceMemoryUsage() == null && o2.getLatestMemoryUsage() == null) {
- return 0;
- }
- if (o1.getReferenceMemoryUsage() == null || o2.getLatestMemoryUsage() == null) {
- return o1.getReferenceMemoryUsage() == null ? -1 : 1;
- }
- if (o1.getReferenceMemoryUsage().usedMemory() < o2.getLatestMemoryUsage().usedMemory()) {
- return -1;
- }
- if (o1.getReferenceMemoryUsage().usedMemory() > o2.getLatestMemoryUsage().usedMemory()) {
- return 1;
- }
- return 0;
- }
-}
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java
new file mode 100644
index 0000000000..eb3c319df6
--- /dev/null
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java
@@ -0,0 +1,57 @@
+package org.mobilitydata.gtfsvalidator.outputcomparator.io;
+
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Comparator to compare two {@link DatasetMemoryUsage} objects based on the difference between the
+ * used memory of the two objects. The difference is calculated by comparing the used memory of the
+ * two objects for each key present in both objects. If a key is present in one object but not in
+ * the other, the key it is ignored. This comparator is used to sort DatasetMemoryUsage by the
+ * minimum difference between the used memory of the two. This means the order is by the dataset
+ * validation that increased the memory.
+ */
+public class UsedMemoryIncreasedComparator implements Comparator {
+
+ @Override
+ public int compare(DatasetMemoryUsage o1, DatasetMemoryUsage o2) {
+ if (o1 == o2) {
+ return 0;
+ }
+ if (o1 == null || o2 == null) {
+ return o1 == null ? -1 : 1;
+ }
+ if (o1.getReferenceMemoryUsage() == null && o2.getLatestMemoryUsage() == null) {
+ return 0;
+ }
+ if (o1.getReferenceMemoryUsage() == null || o2.getLatestMemoryUsage() == null) {
+ return o1.getReferenceMemoryUsage() == null ? -1 : 1;
+ }
+
+ long o1MinDiff =
+ getMinimumDifferenceByKey(o1.getReferenceUsedMemoryByKey(), o1.getLatestUsedMemoryByKey());
+ long o2MinDiff =
+ getMinimumDifferenceByKey(o2.getReferenceUsedMemoryByKey(), o2.getLatestUsedMemoryByKey());
+ return Long.compare(o1MinDiff, o2MinDiff);
+ }
+
+ private long getMinimumDifferenceByKey(
+ Map referenceMemoryUsage, Map latestMemoryUsage) {
+ Set keys = new HashSet<>();
+ keys.addAll(latestMemoryUsage.keySet());
+ keys.addAll(referenceMemoryUsage.keySet());
+ return keys.stream()
+ .filter(key -> latestMemoryUsage.containsKey(key) && referenceMemoryUsage.containsKey(key))
+ .filter(key -> latestMemoryUsage.get(key) - referenceMemoryUsage.get(key) != 0)
+ .mapToLong(key -> referenceMemoryUsage.get(key) - latestMemoryUsage.get(key))
+ .min()
+ .orElse(Long.MAX_VALUE);
+ }
+
+ @Override
+ public Comparator reversed() {
+ return Comparator.super.reversed();
+ }
+}
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
index 74319c2cd0..2bf27795c4 100644
--- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
@@ -1,26 +1,32 @@
package org.mobilitydata.gtfsvalidator.outputcomparator.io;
import java.util.*;
-import java.util.stream.Collectors;
+import java.util.concurrent.atomic.AtomicBoolean;
import org.mobilitydata.gtfsvalidator.model.ValidationReport;
import org.mobilitydata.gtfsvalidator.outputcomparator.model.report.ValidationPerformance;
import org.mobilitydata.gtfsvalidator.performance.MemoryUsage;
public class ValidationPerformanceCollector {
- private static final int MEMORY_USAGE_COMPARE_MAX = 20;
+ public static final int MEMORY_USAGE_COMPARE_MAX = 25;
private final Map referenceTimes;
private final Map latestTimes;
- private final Map>
- largestFirstMemoryUsageBySourceId;
- private final Map>
- smallestFirstMemoryMapBySourceId;
+ private final BoundedPriorityQueue datasetsDecreasedMemoryUsage;
+ private final BoundedPriorityQueue datasetsIncreasedMemoryUsage;
public ValidationPerformanceCollector() {
this.referenceTimes = new HashMap<>();
this.latestTimes = new HashMap<>();
- this.largestFirstMemoryUsageBySourceId = new HashMap<>();
- this.smallestFirstMemoryMapBySourceId = new HashMap<>();
+ this.datasetsDecreasedMemoryUsage =
+ new BoundedPriorityQueue<>(
+ MEMORY_USAGE_COMPARE_MAX,
+ MEMORY_USAGE_COMPARE_MAX,
+ (new UsedMemoryIncreasedComparator().reversed()));
+ this.datasetsIncreasedMemoryUsage =
+ new BoundedPriorityQueue<>(
+ MEMORY_USAGE_COMPARE_MAX,
+ MEMORY_USAGE_COMPARE_MAX,
+ new UsedMemoryIncreasedComparator());
}
public void addReferenceTime(String sourceId, Double time) {
@@ -201,68 +207,60 @@ public String generateLogString() {
.append("\n\n");
}
- if (smallestFirstMemoryMapBySourceId.size() > 0
- || largestFirstMemoryUsageBySourceId.size() > 0) {
- b.append("📜 Memory Consumption\n");
- addMemoryUsageReport(smallestFirstMemoryMapBySourceId, "decreased", b);
- addMemoryUsageReport(largestFirstMemoryUsageBySourceId, "increased", b);
- }
-
b.append("\n\n");
+ if (datasetsIncreasedMemoryUsage.size() > 0 || datasetsDecreasedMemoryUsage.size() > 0) {
+ b.append("\n");
+ b.append("📜 Memory Consumption
\n");
+ addMemoryUsageReport(datasetsIncreasedMemoryUsage, "increased", b);
+ addMemoryUsageReport(datasetsDecreasedMemoryUsage, "decreased", b);
+ b.append(" \n");
+ }
return b.toString();
}
private void addMemoryUsageReport(
- Map> queueMap,
- String order,
- StringBuilder b) {
+ BoundedPriorityQueue queue, String order, StringBuilder b) {
b.append(
String.format(
- "List of %s datasets where memory has %s .
\n",
- MEMORY_USAGE_COMPARE_MAX, order))
+ "List of %s datasets where memory has %s.
", MEMORY_USAGE_COMPARE_MAX, order))
.append("\n")
.append(
- "| Key(Used Memory) | Dataset ID | Reference (s) | Latest (s) | Difference (s) |\n")
+ "| Dataset ID | Snapshot Key(Used Memory) | Reference (s) | Latest (s) | Difference (s) |\n")
.append(
"|-----------------------------|-------------------|----------------|----------------|----------------|\n");
-
- queueMap.keySet().stream()
+ List sortedList = new ArrayList<>(queue);
+ sortedList.sort(queue.comparator());
+ sortedList.stream()
.forEachOrdered(
- sourceId -> {
- var pq = queueMap.get(sourceId);
- List datasetMemoryUsages =
- Arrays.asList(pq.toArray(new DatasetMemoryUsage[pq.size()]));
- Collections.sort(datasetMemoryUsages, pq.comparator());
- generateMemoryLogByKey(datasetMemoryUsages, b);
+ datasetMemoryUsage -> {
+ generateMemoryLogByKey(datasetMemoryUsage, b);
});
}
private static void generateMemoryLogByKey(
- List memoryIncreases, StringBuilder b) {
- memoryIncreases.stream()
- .forEachOrdered(
- item -> {
- String usedMemoryDiff =
- getMemoryDiff(
- item.getReferenceMemoryUsage() != null
- ? item.getReferenceMemoryUsage().usedMemory()
- : null,
- item.getLatestMemoryUsage() != null
- ? item.getLatestMemoryUsage().usedMemory()
- : null);
- b.append(
- String.format(
- "| %s | %s | %s | %s | %s |\n",
- item.getKey(),
- item.getDatasetId(),
- item.getReferenceMemoryUsage() != null
- ? item.getReferenceMemoryUsage().usedMemory()
- : "-",
- item.getLatestMemoryUsage() != null
- ? item.getLatestMemoryUsage().usedMemory()
- : "-",
- usedMemoryDiff));
+ DatasetMemoryUsage datasetMemoryUsage, StringBuilder b) {
+ AtomicBoolean isFirst = new AtomicBoolean(true);
+ Set keys = new HashSet<>();
+ keys.addAll(datasetMemoryUsage.getReferenceUsedMemoryByKey().keySet());
+ keys.addAll(datasetMemoryUsage.getLatestUsedMemoryByKey().keySet());
+ keys.stream()
+ .forEach(
+ key -> {
+ var reference = datasetMemoryUsage.getReferenceUsedMemoryByKey().get(key);
+ var latest = datasetMemoryUsage.getLatestUsedMemoryByKey().get(key);
+ if (reference != null && latest != null) {
+ String usedMemoryDiff = getMemoryDiff(reference, latest);
+ if (isFirst.get()) {
+ b.append(
+ String.format("| %s | | | | |\n", datasetMemoryUsage.getDatasetId()));
+ isFirst.set(false);
+ }
+ b.append(
+ String.format(
+ "| | %s | %s | %s | %s |\n", key, reference, latest, usedMemoryDiff));
+ isFirst.set(false);
+ }
});
}
@@ -280,54 +278,20 @@ public void compareValidationReports(
private void compareValidationReportMemoryUsage(
String sourceId, ValidationReport referenceReport, ValidationReport latestReport) {
- Set keys =
- referenceReport.getMemoryUsageRecords() != null
- ? referenceReport.getMemoryUsageRecords().stream()
- .map(MemoryUsage::getKey)
- .collect(Collectors.toSet())
- : new HashSet<>();
- if (latestReport.getMemoryUsageRecords() != null) {
- keys.addAll(
- latestReport.getMemoryUsageRecords().stream()
- .map(MemoryUsage::getKey)
- .collect(Collectors.toSet()));
+ DatasetMemoryUsage datasetMemoryUsage =
+ new DatasetMemoryUsage(
+ sourceId,
+ referenceReport.getMemoryUsageRecords(),
+ latestReport.getMemoryUsageRecords());
+ if (referenceReport.getMemoryUsageRecords() != null
+ && referenceReport.getMemoryUsageRecords().size() > 0
+ && latestReport.getMemoryUsageRecords() != null
+ && latestReport.getMemoryUsageRecords().size() > 0) {
+ datasetsIncreasedMemoryUsage.offer(datasetMemoryUsage);
+ datasetsDecreasedMemoryUsage.offer(datasetMemoryUsage);
+ } else {
+ // add to not found keys
}
- Map referenceMap =
- referenceReport.getMemoryUsageRecords() != null
- ? referenceReport.getMemoryUsageRecords().stream()
- .collect(Collectors.toMap(MemoryUsage::getKey, memoryUsage -> memoryUsage))
- : new HashMap<>();
- Map latestMap =
- latestReport.getMemoryUsageRecords() != null
- ? latestReport.getMemoryUsageRecords().stream()
- .collect(Collectors.toMap(MemoryUsage::getKey, memoryUsage -> memoryUsage))
- : new HashMap<>();
- keys.stream()
- .forEachOrdered(
- key -> {
- var datasetMemoryUsage =
- new DatasetMemoryUsage(sourceId, referenceMap.get(key), latestMap.get(key));
- BoundedPriorityQueue decreasingQueue =
- largestFirstMemoryUsageBySourceId.get(sourceId);
- BoundedPriorityQueue increasingQueue =
- smallestFirstMemoryMapBySourceId.get(sourceId);
- if (decreasingQueue == null) {
- decreasingQueue =
- new BoundedPriorityQueue<>(
- MEMORY_USAGE_COMPARE_MAX,
- 2,
- (new MemoryUsageUsedMemoryComparator()).reversed());
- largestFirstMemoryUsageBySourceId.put(sourceId, decreasingQueue);
- increasingQueue =
- new BoundedPriorityQueue<>(
- MEMORY_USAGE_COMPARE_MAX, 2, new MemoryUsageUsedMemoryComparator());
- smallestFirstMemoryMapBySourceId.put(sourceId, increasingQueue);
- }
- if (referenceMap.containsKey(key) || latestMap.containsKey(key)) {
- increasingQueue.offer(datasetMemoryUsage);
- decreasingQueue.offer(datasetMemoryUsage);
- }
- });
}
public List toReport() {
diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/MemoryUsageUsedMemoryComparatorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/MemoryUsageUsedMemoryComparatorTest.java
new file mode 100644
index 0000000000..b09e55387d
--- /dev/null
+++ b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/MemoryUsageUsedMemoryComparatorTest.java
@@ -0,0 +1,60 @@
+package org.mobilitydata.gtfsvalidator.outputcomparator.io;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.*;
+import org.junit.Before;
+import org.junit.Test;
+import org.mobilitydata.gtfsvalidator.performance.MemoryUsage;
+
+public class MemoryUsageUsedMemoryComparatorTest {
+
+ private UsedMemoryIncreasedComparator comparator;
+
+ @Before
+ public void setUp() {
+ comparator = new UsedMemoryIncreasedComparator();
+ }
+
+ @Test
+ public void testCompare_equalMemoryUsage() {
+ List referenceMemoryUsage = getMemoryUsage(100L);
+ List latestMemoryUsage = getMemoryUsage(100L);
+ DatasetMemoryUsage o1 =
+ new DatasetMemoryUsage("dataset1", referenceMemoryUsage, latestMemoryUsage);
+ DatasetMemoryUsage o2 =
+ new DatasetMemoryUsage("dataset1", referenceMemoryUsage, latestMemoryUsage);
+ assertEquals(0, comparator.compare(o1, o2));
+ }
+
+ @Test
+ public void testCompare_firstHasMoreMemoryDifference() {
+ List referenceMemoryUsage = getMemoryUsage(100L);
+ List latestMemoryUsage = getMemoryUsage(50L);
+ DatasetMemoryUsage o1 =
+ new DatasetMemoryUsage("dataset1", referenceMemoryUsage, latestMemoryUsage);
+ DatasetMemoryUsage o2 =
+ new DatasetMemoryUsage("dataset1", referenceMemoryUsage, getMemoryUsage(100L));
+ assertEquals(-1, comparator.compare(o1, o2));
+ }
+
+ @Test
+ public void testCompare_firstHasLessMemoryDifference() {
+ List referenceMemoryUsage = getMemoryUsage(100L);
+ List latestMemoryUsage = getMemoryUsage(50L);
+ DatasetMemoryUsage o1 =
+ new DatasetMemoryUsage("dataset1", referenceMemoryUsage, latestMemoryUsage);
+ DatasetMemoryUsage o2 =
+ new DatasetMemoryUsage("dataset1", referenceMemoryUsage, getMemoryUsage(10L));
+ assertEquals(1, comparator.compare(o1, o2));
+ }
+
+ private static List getMemoryUsage(long freeMemory) {
+ MemoryUsage[] referenceMemoryUsage =
+ new MemoryUsage[] {
+ new MemoryUsage("key1", 100L, freeMemory, 100L, 100L),
+ new MemoryUsage("key2", 100L, freeMemory, 100L, 100L),
+ };
+ return Arrays.asList(referenceMemoryUsage);
+ }
+}
diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java
index da33a6ccc5..6fc23cb62c 100644
--- a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java
+++ b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java
@@ -14,45 +14,58 @@ public class ValidationPerformanceCollectorTest {
public void generateLogString_test() {
ValidationPerformanceCollector collector = new ValidationPerformanceCollector();
+ // Adding some sample data
+ collector.addReferenceTime("feed-id-a", 12.0);
+ collector.addReferenceTime("feed-id-a", 14.0);
+ collector.addLatestTime("feed-id-a", 16.0);
+ collector.addLatestTime("feed-id-a", 18.0);
+
+ collector.addReferenceTime("feed-id-b", 20.0);
+ collector.addLatestTime("feed-id-b", 22.0);
+
// Adding some sample data
long baseMemory = 1000000;
// Memory usage latest null
collector.compareValidationReports(
- "feed-id-a",
+ "feed-id-m1",
new ValidationReport(
Collections.EMPTY_SET,
- 12.0,
+ null,
Arrays.asList(
new MemoryUsage("key1", baseMemory, baseMemory, 200, 50L),
new MemoryUsage("key2", baseMemory, baseMemory, 200, 50L))),
new ValidationReport(Collections.EMPTY_SET, 16.0, Collections.EMPTY_LIST));
- // Memory usage decreased
+ // Memory usage increased as there is less free memory
collector.compareValidationReports(
- "feed-id-a",
+ "feed-id-m2",
new ValidationReport(
Collections.EMPTY_SET,
- 14.0,
+ null,
Arrays.asList(
- new MemoryUsage("key3", baseMemory, baseMemory - 1000, 200, 50L),
- new MemoryUsage("key4", baseMemory, baseMemory - 1000, 200, 50L))),
+ new MemoryUsage("key1", baseMemory, baseMemory, 200, 50L),
+ new MemoryUsage("key2", baseMemory, baseMemory, 200, 50L))),
new ValidationReport(
Collections.EMPTY_SET,
- 18.0,
+ null,
Arrays.asList(
- new MemoryUsage("key3", baseMemory, baseMemory - baseMemory / 2, 200, null),
- new MemoryUsage("key4", baseMemory, baseMemory - baseMemory / 2, 200, null))));
+ new MemoryUsage("key1", baseMemory, baseMemory - baseMemory / 2, 200, null),
+ new MemoryUsage("key2", baseMemory, baseMemory - baseMemory / 2, 200, null))));
- // Memory usage decreased
+ // // Memory usage decreased as there is more free memory
collector.compareValidationReports(
- "feed-id-b",
+ "feed-id-m3",
new ValidationReport(
Collections.EMPTY_SET,
- 20.0,
+ null,
+ Arrays.asList(
+ new MemoryUsage("key3", baseMemory, baseMemory + 100, 200, null),
+ new MemoryUsage("key4", baseMemory, baseMemory + 100, 200, null))),
+ new ValidationReport(
+ Collections.EMPTY_SET,
+ null,
Arrays.asList(
new MemoryUsage("key3", baseMemory, baseMemory * 2, 200, null),
- new MemoryUsage("key4", baseMemory, baseMemory * 2, 200, null))),
- new ValidationReport(Collections.EMPTY_SET, 22.0, Collections.EMPTY_LIST));
-
+ new MemoryUsage("key4", baseMemory, baseMemory * 2, 200, null))));
// Generating the log string
String logString = collector.generateLogString();
String expectedLogString =
@@ -71,26 +84,35 @@ public void generateLogString_test() {
+ "| Maximum in Reference Reports | feed-id-b | 20.00 | 22.00 | ⬆️+2.00 |\n"
+ "| Minimum in Latest Reports | feed-id-a | 14.00 | 18.00 | ⬆\uFE0F+4.00 |\n"
+ "| Maximum in Latest Reports | feed-id-b | 20.00 | 22.00 | ⬆️+2.00 |\n"
+ + "#### ⚠️ Warnings\n\n"
+ + "The following dataset IDs are missing validation times either in reference or latest:\n"
+ + "feed-id-m1\n\n"
+ + "\n\n"
+ + "\n"
+ "📜 Memory Consumption
\n"
- + "List of 20 datasets where memory has decreased .
\n\n"
- + "| Key(Used Memory) | Dataset ID | Reference (s) | Latest (s) | Difference (s) |\n"
+ + "List of "
+ + ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX
+ + " datasets where memory has increased.
\n"
+ + "| Dataset ID | Snapshot Key(Used Memory) | Reference (s) | Latest (s) | Difference (s) |\n"
+ "|-----------------------------|-------------------|----------------|----------------|----------------|\n"
- + "| key1 | feed-id-a | 0 | - | N/A |\n"
- + "| key2 | feed-id-a | 0 | - | N/A |\n"
- + "| key4 | feed-id-a | 1000 | 500000 | ⬆️+487.30 KiB |\n"
- + "| key3 | feed-id-a | 1000 | 500000 | ⬆️+487.30 KiB |\n"
- + "| key3 | feed-id-b | -1000000 | - | N/A |\n"
- + "| key4 | feed-id-b | -1000000 | - | N/A |\n"
- + "List of 20 datasets where memory has increased .
\n\n"
- + "| Key(Used Memory) | Dataset ID | Reference (s) | Latest (s) | Difference (s) |\n"
+ + "| feed-id-m2 | | | | |\n"
+ + "| | key1 | 0 | 500000 | ⬆\uFE0F+488.28 KiB |\n"
+ + "| | key2 | 0 | 500000 | ⬆\uFE0F+488.28 KiB |\n"
+ + "| feed-id-m3 | | | | |\n"
+ + "| | key3 | -100 | -1000000 | ⬇\uFE0F-976.46 KiB |\n"
+ + "| | key4 | -100 | -1000000 | ⬇\uFE0F-976.46 KiB |\n"
+ + "List of "
+ + ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX
+ + " datasets where memory has decreased.
\n"
+ + "| Dataset ID | Snapshot Key(Used Memory) | Reference (s) | Latest (s) | Difference (s) |\n"
+ "|-----------------------------|-------------------|----------------|----------------|----------------|\n"
- + "| key3 | feed-id-a | 1000 | 500000 | ⬆️+487.30 KiB |\n"
- + "| key4 | feed-id-a | 1000 | 500000 | ⬆️+487.30 KiB |\n"
- + "| key1 | feed-id-a | 0 | - | N/A |\n"
- + "| key2 | feed-id-a | 0 | - | N/A |\n"
- + "| key3 | feed-id-b | -1000000 | - | N/A |\n"
- + "| key4 | feed-id-b | -1000000 | - | N/A |\n"
- + " \n\n";
+ + "| feed-id-m3 | | | | |\n"
+ + "| | key3 | -100 | -1000000 | ⬇️-976.46 KiB |\n"
+ + "| | key4 | -100 | -1000000 | ⬇️-976.46 KiB |\n"
+ + "| feed-id-m2 | | | | |\n"
+ + "| | key1 | 0 | 500000 | ⬆️+488.28 KiB |\n"
+ + "| | key2 | 0 | 500000 | ⬆️+488.28 KiB |\n"
+ + "\n";
// Assert that the generated log string matches the expected log string
assertThat(logString).isEqualTo(expectedLogString);
}
From 3efd975f174db0993bf0ec6c5b4bbf76c47487c2 Mon Sep 17 00:00:00 2001
From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com>
Date: Wed, 2 Oct 2024 16:57:23 -0400
Subject: [PATCH 10/30] fix compilation issue
---
.../org/mobilitydata/gtfsvalidator/table/GtfsFeedLoader.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsFeedLoader.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsFeedLoader.java
index af46959357..0fcaa738da 100644
--- a/core/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsFeedLoader.java
+++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsFeedLoader.java
@@ -175,7 +175,7 @@ private static void loadTables(
NoticeContainer noticeContainer,
ExecutorService exec,
List> loaderCallables,
- ArrayList> tableContainers)
+ ArrayList> tableContainers)
throws InterruptedException {
for (Future futureContainer : exec.invokeAll(loaderCallables)) {
try {
From 39182dd85f3c080d4d1fa0d46d80391d2a544dd2 Mon Sep 17 00:00:00 2001
From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com>
Date: Thu, 3 Oct 2024 10:16:00 -0400
Subject: [PATCH 11/30] add feeds with no reference
---
.../io/UsedMemoryIncreasedComparator.java | 1 -
.../io/ValidationPerformanceCollector.java | 62 +++++++++++++------
.../ValidationPerformanceCollectorTest.java | 30 +++++----
3 files changed, 61 insertions(+), 32 deletions(-)
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java
index eb3c319df6..9593d89b14 100644
--- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java
@@ -29,7 +29,6 @@ public int compare(DatasetMemoryUsage o1, DatasetMemoryUsage o2) {
if (o1.getReferenceMemoryUsage() == null || o2.getLatestMemoryUsage() == null) {
return o1.getReferenceMemoryUsage() == null ? -1 : 1;
}
-
long o1MinDiff =
getMinimumDifferenceByKey(o1.getReferenceUsedMemoryByKey(), o1.getLatestUsedMemoryByKey());
long o2MinDiff =
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
index 2bf27795c4..752cfde317 100644
--- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
@@ -13,6 +13,7 @@ public class ValidationPerformanceCollector {
private final Map latestTimes;
private final BoundedPriorityQueue datasetsDecreasedMemoryUsage;
private final BoundedPriorityQueue datasetsIncreasedMemoryUsage;
+ private final List datasetsMemoryUsageNoReference;
public ValidationPerformanceCollector() {
this.referenceTimes = new HashMap<>();
@@ -27,6 +28,7 @@ public ValidationPerformanceCollector() {
MEMORY_USAGE_COMPARE_MAX,
MEMORY_USAGE_COMPARE_MAX,
new UsedMemoryIncreasedComparator());
+ this.datasetsMemoryUsageNoReference = new ArrayList<>();
}
public void addReferenceTime(String sourceId, Double time) {
@@ -212,26 +214,42 @@ public String generateLogString() {
if (datasetsIncreasedMemoryUsage.size() > 0 || datasetsDecreasedMemoryUsage.size() > 0) {
b.append("\n");
b.append("📜 Memory Consumption
\n");
- addMemoryUsageReport(datasetsIncreasedMemoryUsage, "increased", b);
- addMemoryUsageReport(datasetsDecreasedMemoryUsage, "decreased", b);
+
+ List increasedMemoryUsages =
+ getDatasetMemoryUsages(datasetsIncreasedMemoryUsage);
+ addMemoryUsageReport(increasedMemoryUsages, "memory has increased", b);
+ List decreasedMemoryUsages =
+ getDatasetMemoryUsages(datasetsDecreasedMemoryUsage);
+ addMemoryUsageReport(decreasedMemoryUsages, "memory has decreased", b);
+ datasetsMemoryUsageNoReference.sort(
+ new Comparator() {
+ @Override
+ public int compare(DatasetMemoryUsage o1, DatasetMemoryUsage o2) {
+ return 0;
+ }
+ });
+ addMemoryUsageReport(datasetsMemoryUsageNoReference, "no reference available", b);
b.append(" \n");
}
return b.toString();
}
+ private List getDatasetMemoryUsages(
+ BoundedPriorityQueue datasetsMemoryUsage) {
+ List increasedMemoryUsages = new ArrayList<>(datasetsMemoryUsage);
+ increasedMemoryUsages.sort(datasetsMemoryUsage.comparator());
+ return increasedMemoryUsages;
+ }
+
private void addMemoryUsageReport(
- BoundedPriorityQueue queue, String order, StringBuilder b) {
- b.append(
- String.format(
- "List of %s datasets where memory has %s.
", MEMORY_USAGE_COMPARE_MAX, order))
+ List memoryUsages, String order, StringBuilder b) {
+ b.append(String.format("List of %s datasets(%s).
", MEMORY_USAGE_COMPARE_MAX, order))
.append("\n")
.append(
"| Dataset ID | Snapshot Key(Used Memory) | Reference (s) | Latest (s) | Difference (s) |\n")
.append(
"|-----------------------------|-------------------|----------------|----------------|----------------|\n");
- List sortedList = new ArrayList<>(queue);
- sortedList.sort(queue.comparator());
- sortedList.stream()
+ memoryUsages.stream()
.forEachOrdered(
datasetMemoryUsage -> {
generateMemoryLogByKey(datasetMemoryUsage, b);
@@ -249,18 +267,22 @@ private static void generateMemoryLogByKey(
key -> {
var reference = datasetMemoryUsage.getReferenceUsedMemoryByKey().get(key);
var latest = datasetMemoryUsage.getLatestUsedMemoryByKey().get(key);
- if (reference != null && latest != null) {
- String usedMemoryDiff = getMemoryDiff(reference, latest);
- if (isFirst.get()) {
- b.append(
- String.format("| %s | | | | |\n", datasetMemoryUsage.getDatasetId()));
- isFirst.set(false);
- }
- b.append(
- String.format(
- "| | %s | %s | %s | %s |\n", key, reference, latest, usedMemoryDiff));
+ if (isFirst.get()) {
+ b.append(String.format("| %s | | | | |\n", datasetMemoryUsage.getDatasetId()));
isFirst.set(false);
}
+ // if (reference != null && latest != null) {
+ String usedMemoryDiff = getMemoryDiff(reference, latest);
+ b.append(
+ String.format(
+ "| | %s | %s | %s | %s |\n",
+ key,
+ reference != null
+ ? MemoryUsage.convertToHumanReadableMemory(reference)
+ : "N/A",
+ latest != null ? MemoryUsage.convertToHumanReadableMemory(latest) : "N/A",
+ usedMemoryDiff));
+ // }
});
}
@@ -290,7 +312,7 @@ private void compareValidationReportMemoryUsage(
datasetsIncreasedMemoryUsage.offer(datasetMemoryUsage);
datasetsDecreasedMemoryUsage.offer(datasetMemoryUsage);
} else {
- // add to not found keys
+ datasetsMemoryUsageNoReference.add(datasetMemoryUsage);
}
}
diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java
index 6fc23cb62c..88803994fd 100644
--- a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java
+++ b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java
@@ -32,7 +32,7 @@ public void generateLogString_test() {
Collections.EMPTY_SET,
null,
Arrays.asList(
- new MemoryUsage("key1", baseMemory, baseMemory, 200, 50L),
+ new MemoryUsage("key1", baseMemory, baseMemory + baseMemory * 10, 200, 50L),
new MemoryUsage("key2", baseMemory, baseMemory, 200, 50L))),
new ValidationReport(Collections.EMPTY_SET, 16.0, Collections.EMPTY_LIST));
// Memory usage increased as there is less free memory
@@ -92,26 +92,34 @@ public void generateLogString_test() {
+ "📜 Memory Consumption\n"
+ "List of "
+ ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX
- + " datasets where memory has increased.
\n"
+ + " datasets(memory has increased).
\n"
+ "| Dataset ID | Snapshot Key(Used Memory) | Reference (s) | Latest (s) | Difference (s) |\n"
+ "|-----------------------------|-------------------|----------------|----------------|----------------|\n"
+ "| feed-id-m2 | | | | |\n"
- + "| | key1 | 0 | 500000 | ⬆\uFE0F+488.28 KiB |\n"
- + "| | key2 | 0 | 500000 | ⬆\uFE0F+488.28 KiB |\n"
+ + "| | key1 | 0 bytes | 488.28 KiB | ⬆\uFE0F+488.28 KiB |\n"
+ + "| | key2 | 0 bytes | 488.28 KiB | ⬆\uFE0F+488.28 KiB |\n"
+ "| feed-id-m3 | | | | |\n"
- + "| | key3 | -100 | -1000000 | ⬇\uFE0F-976.46 KiB |\n"
- + "| | key4 | -100 | -1000000 | ⬇\uFE0F-976.46 KiB |\n"
+ + "| | key3 | -100 bytes | -976.56 KiB | ⬇\uFE0F-976.46 KiB |\n"
+ + "| | key4 | -100 bytes | -976.56 KiB | ⬇\uFE0F-976.46 KiB |\n"
+ "List of "
+ ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX
- + " datasets where memory has decreased.
\n"
+ + " datasets(memory has decreased).\n"
+ "| Dataset ID | Snapshot Key(Used Memory) | Reference (s) | Latest (s) | Difference (s) |\n"
+ "|-----------------------------|-------------------|----------------|----------------|----------------|\n"
+ "| feed-id-m3 | | | | |\n"
- + "| | key3 | -100 | -1000000 | ⬇️-976.46 KiB |\n"
- + "| | key4 | -100 | -1000000 | ⬇️-976.46 KiB |\n"
+ + "| | key3 | -100 bytes | -976.56 KiB | ⬇️-976.46 KiB |\n"
+ + "| | key4 | -100 bytes | -976.56 KiB | ⬇️-976.46 KiB |\n"
+ "| feed-id-m2 | | | | |\n"
- + "| | key1 | 0 | 500000 | ⬆️+488.28 KiB |\n"
- + "| | key2 | 0 | 500000 | ⬆️+488.28 KiB |\n"
+ + "| | key1 | 0 bytes | 488.28 KiB | ⬆️+488.28 KiB |\n"
+ + "| | key2 | 0 bytes | 488.28 KiB | ⬆️+488.28 KiB |\n"
+ + "List of "
+ + ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX
+ + " datasets(no reference available).
\n"
+ + "| Dataset ID | Snapshot Key(Used Memory) | Reference (s) | Latest (s) | Difference (s) |\n"
+ + "|-----------------------------|-------------------|----------------|----------------|----------------|\n"
+ + "| feed-id-m1 | | | | |\n"
+ + "| | key1 | -9.54 MiB | N/A | N/A |\n"
+ + "| | key2 | 0 bytes | N/A | N/A |\n"
+ "\n";
// Assert that the generated log string matches the expected log string
assertThat(logString).isEqualTo(expectedLogString);
From 06fe749fda091729ea929a52327261c7aadbfaed Mon Sep 17 00:00:00 2001
From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com>
Date: Thu, 3 Oct 2024 10:58:29 -0400
Subject: [PATCH 12/30] add no references to the report
---
.../io/ValidationPerformanceCollector.java | 20 ++++++++-----------
1 file changed, 8 insertions(+), 12 deletions(-)
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
index 752cfde317..79f8d9effb 100644
--- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
@@ -211,26 +211,22 @@ public String generateLogString() {
b.append("\n\n");
- if (datasetsIncreasedMemoryUsage.size() > 0 || datasetsDecreasedMemoryUsage.size() > 0) {
- b.append("\n");
- b.append("📜 Memory Consumption
\n");
-
+ b.append("\n");
+ b.append("📜 Memory Consumption
\n");
+ if (datasetsIncreasedMemoryUsage.size() > 0) {
List increasedMemoryUsages =
getDatasetMemoryUsages(datasetsIncreasedMemoryUsage);
addMemoryUsageReport(increasedMemoryUsages, "memory has increased", b);
+ }
+ if (datasetsDecreasedMemoryUsage.size() > 0) {
List decreasedMemoryUsages =
getDatasetMemoryUsages(datasetsDecreasedMemoryUsage);
addMemoryUsageReport(decreasedMemoryUsages, "memory has decreased", b);
- datasetsMemoryUsageNoReference.sort(
- new Comparator() {
- @Override
- public int compare(DatasetMemoryUsage o1, DatasetMemoryUsage o2) {
- return 0;
- }
- });
+ }
+ if (datasetsMemoryUsageNoReference.size() > 0) {
addMemoryUsageReport(datasetsMemoryUsageNoReference, "no reference available", b);
- b.append(" \n");
}
+ b.append(" \n");
return b.toString();
}
From a70accf1af59b92a419b6797e8f4c62dfe53136e Mon Sep 17 00:00:00 2001
From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com>
Date: Thu, 3 Oct 2024 11:17:21 -0400
Subject: [PATCH 13/30] fix failing tests
---
.../io/ValidationPerformanceCollector.java | 34 +++++++++++--------
.../cli/ValidationReportComparatorTest.java | 8 ++++-
2 files changed, 26 insertions(+), 16 deletions(-)
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
index 79f8d9effb..ad7edd1f52 100644
--- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
@@ -211,22 +211,26 @@ public String generateLogString() {
b.append("\n\n");
- b.append("\n");
- b.append("📜 Memory Consumption
\n");
- if (datasetsIncreasedMemoryUsage.size() > 0) {
- List increasedMemoryUsages =
- getDatasetMemoryUsages(datasetsIncreasedMemoryUsage);
- addMemoryUsageReport(increasedMemoryUsages, "memory has increased", b);
- }
- if (datasetsDecreasedMemoryUsage.size() > 0) {
- List decreasedMemoryUsages =
- getDatasetMemoryUsages(datasetsDecreasedMemoryUsage);
- addMemoryUsageReport(decreasedMemoryUsages, "memory has decreased", b);
- }
- if (datasetsMemoryUsageNoReference.size() > 0) {
- addMemoryUsageReport(datasetsMemoryUsageNoReference, "no reference available", b);
+ if (datasetsIncreasedMemoryUsage.size() > 0
+ || datasetsDecreasedMemoryUsage.size() > 0
+ || datasetsMemoryUsageNoReference.size() > 0) {
+ b.append("\n");
+ b.append("📜 Memory Consumption
\n");
+ if (datasetsIncreasedMemoryUsage.size() > 0) {
+ List increasedMemoryUsages =
+ getDatasetMemoryUsages(datasetsIncreasedMemoryUsage);
+ addMemoryUsageReport(increasedMemoryUsages, "memory has increased", b);
+ }
+ if (datasetsDecreasedMemoryUsage.size() > 0) {
+ List decreasedMemoryUsages =
+ getDatasetMemoryUsages(datasetsDecreasedMemoryUsage);
+ addMemoryUsageReport(decreasedMemoryUsages, "memory has decreased", b);
+ }
+ if (datasetsMemoryUsageNoReference.size() > 0) {
+ addMemoryUsageReport(datasetsMemoryUsageNoReference, "no reference available", b);
+ }
+ b.append(" \n");
}
- b.append(" \n");
return b.toString();
}
diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java
index 18e8d66e56..c1a37f86cc 100644
--- a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java
+++ b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java
@@ -141,7 +141,13 @@ public void addedErrorNotice_summaryString() throws Exception {
+ "\n"
+ "| Time Metric | Dataset ID | Reference (s) | Latest (s) | Difference (s) |\n"
+ "|-----------------------------|-------------------|----------------|----------------|----------------|\n"
- + "\n\n\n");
+ + "\n\n"
+ + "\n"
+ + "📜 Memory Consumption
\n"
+ + "List of 25 datasets(no reference available).
\n"
+ + "| Dataset ID | Snapshot Key(Used Memory) | Reference (s) | Latest (s) | Difference (s) |\n"
+ + "|-----------------------------|-------------------|----------------|----------------|----------------|\n"
+ + " \n\n");
}
@Test
From b10320dca505b5fe5bfbc632d8bb4749e4c1336a Mon Sep 17 00:00:00 2001
From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com>
Date: Thu, 3 Oct 2024 14:40:53 -0400
Subject: [PATCH 14/30] fix memory table formatting
---
.../performance/MemoryMonitor.java | 4 ++
.../performance/MemoryMonitorAspect.java | 8 +++-
.../performance/MemoryUsage.java | 17 +++++++
.../io/ValidationPerformanceCollector.java | 48 ++++++++++++-------
.../cli/ValidationReportComparatorTest.java | 6 +--
.../ValidationPerformanceCollectorTest.java | 20 ++++----
6 files changed, 73 insertions(+), 30 deletions(-)
diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryMonitor.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryMonitor.java
index 179ce3cd78..cd05c2a509 100644
--- a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryMonitor.java
+++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryMonitor.java
@@ -5,6 +5,10 @@
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
+/**
+ * Annotation to monitor memory usage of a method. The annotated method should return a {@link
+ * MemoryUsage} object. The key is used to group memory usage of different methods.
+ */
@Target(ElementType.METHOD)
@Retention(RetentionPolicy.RUNTIME)
public @interface MemoryMonitor {
diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryMonitorAspect.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryMonitorAspect.java
index f6baf95945..3c2fdef403 100644
--- a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryMonitorAspect.java
+++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryMonitorAspect.java
@@ -6,10 +6,10 @@
import org.aspectj.lang.annotation.Aspect;
import org.aspectj.lang.reflect.MethodSignature;
+/** Aspect to monitor memory usage of a method. */
@Aspect
public class MemoryMonitorAspect {
- // @Around("@annotation(MemoryMonitor)")
@Around("execution(@org.mobilitydata.gtfsvalidator.performance.MemoryMonitor * *(..))")
public Object monitorMemoryUsage(ProceedingJoinPoint joinPoint) throws Throwable {
String key = extractKey(joinPoint);
@@ -23,6 +23,12 @@ public Object monitorMemoryUsage(ProceedingJoinPoint joinPoint) throws Throwable
}
}
+ /**
+ * Extracts the key from the method signature or the annotation.
+ *
+ * @param joinPoint the join point
+ * @return the key either from the annotation or the method signature.
+ */
private String extractKey(ProceedingJoinPoint joinPoint) {
var method = ((MethodSignature) joinPoint.getSignature()).getMethod();
var memoryMonitor = method.getAnnotation(MemoryMonitor.class);
diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsage.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsage.java
index f0126c23e9..f81321afb3 100644
--- a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsage.java
+++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsage.java
@@ -3,6 +3,7 @@
import java.text.DecimalFormat;
import org.apache.commons.lang3.StringUtils;
+/** Represents memory usage information. */
public class MemoryUsage {
private static final DecimalFormat TWO_DECIMAL_FORMAT = new DecimalFormat("0.00");
@@ -23,6 +24,12 @@ public MemoryUsage(
this.diffMemory = diffMemory;
}
+ /**
+ * Converts bytes to human-readable memory.
+ *
+ * @param bytes
+ * @return human-readable memory, e.g., "1.23 GiB"
+ */
public static String convertToHumanReadableMemory(Long bytes) {
if (bytes == null) {
return "N/A";
@@ -43,10 +50,20 @@ public static String convertToHumanReadableMemory(Long bytes) {
return TWO_DECIMAL_FORMAT.format(Math.copySign(size / 1099511627776L, bytes)) + " TiB";
}
+ /**
+ * The memory used is computed as the difference between the total memory and the free memory.
+ *
+ * @return the memory used.
+ */
public long usedMemory() {
return totalMemory - freeMemory;
}
+ /**
+ * Returns a human-readable string representation of the memory usage.
+ *
+ * @return a human-readable string representation of the memory usage.
+ */
public String humanReadablePrint() {
StringBuffer result = new StringBuffer();
result.append("Memory usage registered");
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
index ad7edd1f52..746d8f53d6 100644
--- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
@@ -219,15 +219,15 @@ public String generateLogString() {
if (datasetsIncreasedMemoryUsage.size() > 0) {
List increasedMemoryUsages =
getDatasetMemoryUsages(datasetsIncreasedMemoryUsage);
- addMemoryUsageReport(increasedMemoryUsages, "memory has increased", b);
+ addMemoryUsageReport(increasedMemoryUsages, "memory has increased", b, true);
}
if (datasetsDecreasedMemoryUsage.size() > 0) {
List decreasedMemoryUsages =
getDatasetMemoryUsages(datasetsDecreasedMemoryUsage);
- addMemoryUsageReport(decreasedMemoryUsages, "memory has decreased", b);
+ addMemoryUsageReport(decreasedMemoryUsages, "memory has decreased", b, true);
}
if (datasetsMemoryUsageNoReference.size() > 0) {
- addMemoryUsageReport(datasetsMemoryUsageNoReference, "no reference available", b);
+ addMemoryUsageReport(datasetsMemoryUsageNoReference, "no reference available", b, false);
}
b.append("\n");
}
@@ -242,22 +242,33 @@ private List getDatasetMemoryUsages(
}
private void addMemoryUsageReport(
- List memoryUsages, String order, StringBuilder b) {
+ List memoryUsages,
+ String order,
+ StringBuilder b,
+ boolean includeDifference) {
b.append(String.format("List of %s datasets(%s).
", MEMORY_USAGE_COMPARE_MAX, order))
- .append("\n")
- .append(
- "| Dataset ID | Snapshot Key(Used Memory) | Reference (s) | Latest (s) | Difference (s) |\n")
+ .append("\n\n")
.append(
- "|-----------------------------|-------------------|----------------|----------------|----------------|\n");
+ "| Dataset ID | Snapshot Key(Used Memory) | Reference | Latest |");
+ if (includeDifference) {
+ b.append(" Difference |");
+ }
+ b.append("\n");
+ b.append(
+ "|-----------------------------|-------------------|----------------|----------------|");
+ if (includeDifference) {
+ b.append("----------------|");
+ }
+ b.append("\n");
memoryUsages.stream()
.forEachOrdered(
datasetMemoryUsage -> {
- generateMemoryLogByKey(datasetMemoryUsage, b);
+ generateMemoryLogByKey(datasetMemoryUsage, b, includeDifference);
});
}
private static void generateMemoryLogByKey(
- DatasetMemoryUsage datasetMemoryUsage, StringBuilder b) {
+ DatasetMemoryUsage datasetMemoryUsage, StringBuilder b, boolean includeDifference) {
AtomicBoolean isFirst = new AtomicBoolean(true);
Set keys = new HashSet<>();
keys.addAll(datasetMemoryUsage.getReferenceUsedMemoryByKey().keySet());
@@ -268,21 +279,26 @@ private static void generateMemoryLogByKey(
var reference = datasetMemoryUsage.getReferenceUsedMemoryByKey().get(key);
var latest = datasetMemoryUsage.getLatestUsedMemoryByKey().get(key);
if (isFirst.get()) {
- b.append(String.format("| %s | | | | |\n", datasetMemoryUsage.getDatasetId()));
+ b.append(String.format("| %s | | | |", datasetMemoryUsage.getDatasetId()));
+ if (includeDifference) {
+ b.append(" |");
+ }
+ b.append("\n");
isFirst.set(false);
}
- // if (reference != null && latest != null) {
String usedMemoryDiff = getMemoryDiff(reference, latest);
b.append(
String.format(
- "| | %s | %s | %s | %s |\n",
+ "| | %s | %s | %s |",
key,
reference != null
? MemoryUsage.convertToHumanReadableMemory(reference)
: "N/A",
- latest != null ? MemoryUsage.convertToHumanReadableMemory(latest) : "N/A",
- usedMemoryDiff));
- // }
+ latest != null ? MemoryUsage.convertToHumanReadableMemory(latest) : "N/A"));
+ if (includeDifference) {
+ b.append(String.format(" %s |", usedMemoryDiff));
+ }
+ b.append("\n");
});
}
diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java
index c1a37f86cc..6cf2ac3a7d 100644
--- a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java
+++ b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java
@@ -144,9 +144,9 @@ public void addedErrorNotice_summaryString() throws Exception {
+ "\n\n"
+ "\n"
+ "📜 Memory Consumption
\n"
- + "List of 25 datasets(no reference available).
\n"
- + "| Dataset ID | Snapshot Key(Used Memory) | Reference (s) | Latest (s) | Difference (s) |\n"
- + "|-----------------------------|-------------------|----------------|----------------|----------------|\n"
+ + "List of 25 datasets(no reference available).
\n\n"
+ + "| Dataset ID | Snapshot Key(Used Memory) | Reference | Latest |\n"
+ + "|-----------------------------|-------------------|----------------|----------------|\n"
+ " \n\n");
}
diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java
index 88803994fd..82c204b20c 100644
--- a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java
+++ b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java
@@ -92,8 +92,8 @@ public void generateLogString_test() {
+ "📜 Memory Consumption\n"
+ "List of "
+ ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX
- + " datasets(memory has increased).
\n"
- + "| Dataset ID | Snapshot Key(Used Memory) | Reference (s) | Latest (s) | Difference (s) |\n"
+ + " datasets(memory has increased).\n\n"
+ + "| Dataset ID | Snapshot Key(Used Memory) | Reference | Latest | Difference |\n"
+ "|-----------------------------|-------------------|----------------|----------------|----------------|\n"
+ "| feed-id-m2 | | | | |\n"
+ "| | key1 | 0 bytes | 488.28 KiB | ⬆\uFE0F+488.28 KiB |\n"
@@ -103,8 +103,8 @@ public void generateLogString_test() {
+ "| | key4 | -100 bytes | -976.56 KiB | ⬇\uFE0F-976.46 KiB |\n"
+ "List of "
+ ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX
- + " datasets(memory has decreased).
\n"
- + "| Dataset ID | Snapshot Key(Used Memory) | Reference (s) | Latest (s) | Difference (s) |\n"
+ + " datasets(memory has decreased).\n\n"
+ + "| Dataset ID | Snapshot Key(Used Memory) | Reference | Latest | Difference |\n"
+ "|-----------------------------|-------------------|----------------|----------------|----------------|\n"
+ "| feed-id-m3 | | | | |\n"
+ "| | key3 | -100 bytes | -976.56 KiB | ⬇️-976.46 KiB |\n"
@@ -114,12 +114,12 @@ public void generateLogString_test() {
+ "| | key2 | 0 bytes | 488.28 KiB | ⬆️+488.28 KiB |\n"
+ "List of "
+ ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX
- + " datasets(no reference available).
\n"
- + "| Dataset ID | Snapshot Key(Used Memory) | Reference (s) | Latest (s) | Difference (s) |\n"
- + "|-----------------------------|-------------------|----------------|----------------|----------------|\n"
- + "| feed-id-m1 | | | | |\n"
- + "| | key1 | -9.54 MiB | N/A | N/A |\n"
- + "| | key2 | 0 bytes | N/A | N/A |\n"
+ + " datasets(no reference available).\n\n"
+ + "| Dataset ID | Snapshot Key(Used Memory) | Reference | Latest |\n"
+ + "|-----------------------------|-------------------|----------------|----------------|\n"
+ + "| feed-id-m1 | | | |\n"
+ + "| | key1 | -9.54 MiB | N/A |\n"
+ + "| | key2 | 0 bytes | N/A |\n"
+ "\n";
// Assert that the generated log string matches the expected log string
assertThat(logString).isEqualTo(expectedLogString);
From 97678eeaa20b80bfa9f8ffc43935fd821fe9129b Mon Sep 17 00:00:00 2001
From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com>
Date: Thu, 3 Oct 2024 15:49:50 -0400
Subject: [PATCH 15/30] sort feeds on the no reference list and limit them to
25 maximum items
---
.../performance/MemoryUsageRegister.java | 34 +++++++++++++++
.../io/DatasetMemoryUsage.java | 4 ++
...stReportUsedMemoryIncreasedComparator.java | 43 +++++++++++++++++++
.../io/UsedMemoryIncreasedComparator.java | 15 ++++---
.../io/ValidationPerformanceCollector.java | 8 +++-
5 files changed, 96 insertions(+), 8 deletions(-)
create mode 100644 output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/LatestReportUsedMemoryIncreasedComparator.java
diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsageRegister.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsageRegister.java
index e01a71948c..21dcb2d658 100644
--- a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsageRegister.java
+++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsageRegister.java
@@ -5,6 +5,7 @@
import java.util.Collections;
import java.util.List;
+/** Register for memory usage snapshots. */
public class MemoryUsageRegister {
private static final FluentLogger logger = FluentLogger.forEnclosingClass();
@@ -16,14 +17,27 @@ private MemoryUsageRegister() {
runtime = Runtime.getRuntime();
}
+ /** @return the singleton instance of the memory usage register. */
public static MemoryUsageRegister getInstance() {
return instance;
}
+ /**
+ * Returns the memory usage registry.
+ *
+ * @return the memory usage registry unmodifiable list.
+ */
public List getRegistry() {
return Collections.unmodifiableList(registry);
}
+ /**
+ * Returns a memory usage snapshot.
+ *
+ * @param key
+ * @param previous
+ * @return
+ */
public MemoryUsage getMemoryUsageSnapshot(String key, MemoryUsage previous) {
Long memoryDiff = null;
if (previous != null) {
@@ -33,23 +47,43 @@ public MemoryUsage getMemoryUsageSnapshot(String key, MemoryUsage previous) {
key, runtime.totalMemory(), runtime.freeMemory(), runtime.maxMemory(), memoryDiff);
}
+ /**
+ * Registers a memory usage snapshot.
+ *
+ * @param key
+ * @return
+ */
public MemoryUsage registerMemoryUsage(String key) {
MemoryUsage memoryUsage = getMemoryUsageSnapshot(key, null);
registerMemoryUsage(memoryUsage);
return memoryUsage;
}
+ /**
+ * Registers a memory usage snapshot.
+ *
+ * @param key
+ * @param previous previous memory usage snapshot used to compute the memory difference between
+ * two snapshots.
+ * @return
+ */
public MemoryUsage registerMemoryUsage(String key, MemoryUsage previous) {
MemoryUsage memoryUsage = getMemoryUsageSnapshot(key, previous);
registerMemoryUsage(memoryUsage);
return memoryUsage;
}
+ /**
+ * Registers a memory usage snapshot.
+ *
+ * @param memoryUsage
+ */
public void registerMemoryUsage(MemoryUsage memoryUsage) {
registry.add(memoryUsage);
logger.atInfo().log(memoryUsage.humanReadablePrint());
}
+ /** Clears the memory usage registry. */
public void clearRegistry() {
registry.clear();
}
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java
index 452b7763ac..ab4c056ffa 100644
--- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java
@@ -7,6 +7,10 @@
import java.util.stream.Collectors;
import org.mobilitydata.gtfsvalidator.performance.MemoryUsage;
+/**
+ * Represents memory usage information for a dataset. This class contains the information associated
+ * with the memory usage of a dataset when running the validation process.
+ */
public class DatasetMemoryUsage {
private String datasetId;
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/LatestReportUsedMemoryIncreasedComparator.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/LatestReportUsedMemoryIncreasedComparator.java
new file mode 100644
index 0000000000..0392da39cb
--- /dev/null
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/LatestReportUsedMemoryIncreasedComparator.java
@@ -0,0 +1,43 @@
+package org.mobilitydata.gtfsvalidator.outputcomparator.io;
+
+import java.util.Comparator;
+import org.mobilitydata.gtfsvalidator.performance.MemoryUsage;
+
+/**
+ * Comparator to compare two {@link DatasetMemoryUsage} objects based on the difference between the
+ * used memory of the two objects based on the {@link DatasetMemoryUsage#getLatestMemoryUsage}.
+ */
+public class LatestReportUsedMemoryIncreasedComparator implements Comparator {
+
+ @Override
+ public int compare(DatasetMemoryUsage o1, DatasetMemoryUsage o2) {
+ if (o1 == o2) {
+ return 0;
+ }
+ if (o1 == null || o2 == null) {
+ return o1 == null ? -1 : 1;
+ }
+ if (o1.getLatestMemoryUsage() == null && o2.getLatestMemoryUsage() == null) {
+ return 0;
+ }
+ if (o1.getLatestMemoryUsage() == null || o2.getLatestMemoryUsage() == null) {
+ return o1.getLatestMemoryUsage() == null ? -1 : 1;
+ }
+ long o1MinDiff =
+ o1.getLatestMemoryUsage().stream()
+ .min(Comparator.comparingLong(MemoryUsage::usedMemory))
+ .get()
+ .usedMemory();
+ long o2MinDiff =
+ o2.getLatestMemoryUsage().stream()
+ .min(Comparator.comparingLong(MemoryUsage::usedMemory))
+ .get()
+ .usedMemory();
+ return Long.compare(o1MinDiff, o2MinDiff);
+ }
+
+ @Override
+ public Comparator reversed() {
+ return Comparator.super.reversed();
+ }
+}
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java
index 9593d89b14..9b37b6bd8e 100644
--- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java
@@ -23,12 +23,18 @@ public int compare(DatasetMemoryUsage o1, DatasetMemoryUsage o2) {
if (o1 == null || o2 == null) {
return o1 == null ? -1 : 1;
}
- if (o1.getReferenceMemoryUsage() == null && o2.getLatestMemoryUsage() == null) {
+ if (o1.getReferenceMemoryUsage() == null
+ && o1.getLatestMemoryUsage() == null
+ && o2.getReferenceMemoryUsage() == null
+ && o2.getLatestMemoryUsage() == null) {
return 0;
}
- if (o1.getReferenceMemoryUsage() == null || o2.getLatestMemoryUsage() == null) {
+ if (o1.getReferenceMemoryUsage() == null || o2.getReferenceMemoryUsage() == null) {
return o1.getReferenceMemoryUsage() == null ? -1 : 1;
}
+ if (o1.getLatestMemoryUsage() == null || o2.getLatestMemoryUsage() == null) {
+ return o1.getLatestMemoryUsage() == null ? -1 : 1;
+ }
long o1MinDiff =
getMinimumDifferenceByKey(o1.getReferenceUsedMemoryByKey(), o1.getLatestUsedMemoryByKey());
long o2MinDiff =
@@ -48,9 +54,4 @@ private long getMinimumDifferenceByKey(
.min()
.orElse(Long.MAX_VALUE);
}
-
- @Override
- public Comparator reversed() {
- return Comparator.super.reversed();
- }
}
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
index 746d8f53d6..0416e0ebbf 100644
--- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
@@ -227,7 +227,13 @@ public String generateLogString() {
addMemoryUsageReport(decreasedMemoryUsages, "memory has decreased", b, true);
}
if (datasetsMemoryUsageNoReference.size() > 0) {
- addMemoryUsageReport(datasetsMemoryUsageNoReference, "no reference available", b, false);
+ datasetsMemoryUsageNoReference.sort(new LatestReportUsedMemoryIncreasedComparator());
+ addMemoryUsageReport(
+ datasetsMemoryUsageNoReference.subList(
+ 0, Math.min(datasetsMemoryUsageNoReference.size(), MEMORY_USAGE_COMPARE_MAX)),
+ "no reference available",
+ b,
+ false);
}
b.append("\n");
}
From 94b90776a9039d156d0ac60de5d1e386d9b5be37 Mon Sep 17 00:00:00 2001
From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com>
Date: Fri, 4 Oct 2024 11:12:25 -0400
Subject: [PATCH 16/30] add documentation and sort memory usage for feed with
no reference
---
docs/ACCEPTANCE_TESTS.md | 16 ++++++++++++++++
...ava => LatestReportUsedMemoryComparator.java} | 16 ++++++++--------
.../io/ValidationPerformanceCollector.java | 2 +-
3 files changed, 25 insertions(+), 9 deletions(-)
rename output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/{LatestReportUsedMemoryIncreasedComparator.java => LatestReportUsedMemoryComparator.java} (68%)
diff --git a/docs/ACCEPTANCE_TESTS.md b/docs/ACCEPTANCE_TESTS.md
index b17d036462..d0d2e1ecc0 100644
--- a/docs/ACCEPTANCE_TESTS.md
+++ b/docs/ACCEPTANCE_TESTS.md
@@ -104,6 +104,22 @@ We follow this process:
+## Performance metrics within the acceptance reports
+
+There are two man metrics added to the acceptance report comment at the PR level, _Validation Time_ and _Memory Consumption_.
+The performance metrics are **not a blocker** as performance might vary due to external factors including GitHub infrastructure performance.
+However, large jumps in performance values should be investigated before approving a PR.
+
+### Validation Time
+The validation time consists in general metrics like average, median, standard deviation, minimums and maximums.
+This metrics can be affected by addition of new validators than introduce a penalty in processing time.
+
+### Memory Consumption
+The memory consumption section contains three tables.
+- The first, list the first 25 datasets that the difference increased memory comparing with the main branch.
+- The second, list the first 25 datasets that the difference decreased memory comparing with the main branch.
+- The third, list(not always visible) the first 25 datasets that were not available for comparison as the main branch didn't contain the memory usage information.
+
## Instructions to run the pipeline
1. Provide code changes by creating a new PR on the [GitHub repository](https://github.com/MobilityData/gtfs-validator);
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/LatestReportUsedMemoryIncreasedComparator.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/LatestReportUsedMemoryComparator.java
similarity index 68%
rename from output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/LatestReportUsedMemoryIncreasedComparator.java
rename to output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/LatestReportUsedMemoryComparator.java
index 0392da39cb..89ea4c699a 100644
--- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/LatestReportUsedMemoryIncreasedComparator.java
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/LatestReportUsedMemoryComparator.java
@@ -4,10 +4,10 @@
import org.mobilitydata.gtfsvalidator.performance.MemoryUsage;
/**
- * Comparator to compare two {@link DatasetMemoryUsage} objects based on the difference between the
- * used memory of the two objects based on the {@link DatasetMemoryUsage#getLatestMemoryUsage}.
+ * Comparator to compare two {@link DatasetMemoryUsage} objects based on used memory of the two
+ * objects, {@link DatasetMemoryUsage#getLatestMemoryUsage}.
*/
-public class LatestReportUsedMemoryIncreasedComparator implements Comparator {
+public class LatestReportUsedMemoryComparator implements Comparator {
@Override
public int compare(DatasetMemoryUsage o1, DatasetMemoryUsage o2) {
@@ -23,17 +23,17 @@ public int compare(DatasetMemoryUsage o1, DatasetMemoryUsage o2) {
if (o1.getLatestMemoryUsage() == null || o2.getLatestMemoryUsage() == null) {
return o1.getLatestMemoryUsage() == null ? -1 : 1;
}
- long o1MinDiff =
+ long o1MaxMemory =
o1.getLatestMemoryUsage().stream()
- .min(Comparator.comparingLong(MemoryUsage::usedMemory))
+ .max(Comparator.comparingLong(MemoryUsage::usedMemory))
.get()
.usedMemory();
- long o2MinDiff =
+ long o2MaxMemory =
o2.getLatestMemoryUsage().stream()
- .min(Comparator.comparingLong(MemoryUsage::usedMemory))
+ .max(Comparator.comparingLong(MemoryUsage::usedMemory))
.get()
.usedMemory();
- return Long.compare(o1MinDiff, o2MinDiff);
+ return Long.compare(o1MaxMemory, o2MaxMemory);
}
@Override
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
index 0416e0ebbf..14ce5b0c46 100644
--- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
@@ -227,7 +227,7 @@ public String generateLogString() {
addMemoryUsageReport(decreasedMemoryUsages, "memory has decreased", b, true);
}
if (datasetsMemoryUsageNoReference.size() > 0) {
- datasetsMemoryUsageNoReference.sort(new LatestReportUsedMemoryIncreasedComparator());
+ datasetsMemoryUsageNoReference.sort(new LatestReportUsedMemoryComparator());
addMemoryUsageReport(
datasetsMemoryUsageNoReference.subList(
0, Math.min(datasetsMemoryUsageNoReference.size(), MEMORY_USAGE_COMPARE_MAX)),
From 98c0275aba1ae9b2000c1413cb84677f41682ad5 Mon Sep 17 00:00:00 2001
From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com>
Date: Fri, 4 Oct 2024 11:40:28 -0400
Subject: [PATCH 17/30] orting from the highest to the lowest memory usage
---
.../outputcomparator/io/ValidationPerformanceCollector.java | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
index 14ce5b0c46..db7468d00f 100644
--- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
@@ -227,7 +227,8 @@ public String generateLogString() {
addMemoryUsageReport(decreasedMemoryUsages, "memory has decreased", b, true);
}
if (datasetsMemoryUsageNoReference.size() > 0) {
- datasetsMemoryUsageNoReference.sort(new LatestReportUsedMemoryComparator());
+ // Sorting from the highest to the lowest memory usage
+ datasetsMemoryUsageNoReference.sort((new LatestReportUsedMemoryComparator()).reversed());
addMemoryUsageReport(
datasetsMemoryUsageNoReference.subList(
0, Math.min(datasetsMemoryUsageNoReference.size(), MEMORY_USAGE_COMPARE_MAX)),
From a16fdee3b92ba0a752d8bfb06ab7880aedb15279 Mon Sep 17 00:00:00 2001
From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com>
Date: Fri, 4 Oct 2024 11:51:11 -0400
Subject: [PATCH 18/30] improve acceptance tests documentation
---
docs/ACCEPTANCE_TESTS.md | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/docs/ACCEPTANCE_TESTS.md b/docs/ACCEPTANCE_TESTS.md
index d0d2e1ecc0..4360522199 100644
--- a/docs/ACCEPTANCE_TESTS.md
+++ b/docs/ACCEPTANCE_TESTS.md
@@ -120,6 +120,12 @@ The memory consumption section contains three tables.
- The second, list the first 25 datasets that the difference decreased memory comparing with the main branch.
- The third, list(not always visible) the first 25 datasets that were not available for comparison as the main branch didn't contain the memory usage information.
+Memory usage is collected in critical points and persists in the JSON report. The added snapshot points are:
+- _GtfsFeedLoader.loadTables_: This is taken after the validator loads all files.
+- _GtfsFeedLoader.executeMultiFileValidators_: This is taken after the validator executed all multi-file validators
+- _org.mobilitydata.gtfsvalidator.table.GtfsFeedLoader.loadAndValidate_: This is taken for the complete load and validation method.
+- _ValidationRunner.run_: This is taken for the complete run of the validator, excluding report generation
+
## Instructions to run the pipeline
1. Provide code changes by creating a new PR on the [GitHub repository](https://github.com/MobilityData/gtfs-validator);
From db3632890f8f8bb574760bc446472f0d01fbf853 Mon Sep 17 00:00:00 2001
From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com>
Date: Fri, 4 Oct 2024 11:53:41 -0400
Subject: [PATCH 19/30] revert acceptance tests sample running
---
.github/workflows/acceptance_test.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/acceptance_test.yml b/.github/workflows/acceptance_test.yml
index ce9c650fbc..4759f4172e 100644
--- a/.github/workflows/acceptance_test.yml
+++ b/.github/workflows/acceptance_test.yml
@@ -127,7 +127,7 @@ jobs:
- name: Set URL matrix
id: set-matrix
run: |
- DATASETS=$(python3 scripts/mobility-database-harvester/harvest_latest_versions.py -d scripts/mobility-database-harvester/datasets_metadata -l gtfs_latest_versions.json -s)
+ DATASETS=$(python3 scripts/mobility-database-harvester/harvest_latest_versions.py -d scripts/mobility-database-harvester/datasets_metadata -l gtfs_latest_versions.json)
echo $DATASETS
echo "matrix=$DATASETS" >> $GITHUB_OUTPUT
- name: Persist metadata
From 2a7a1f683ac89220a4ae41f8092d75711a28b3ab Mon Sep 17 00:00:00 2001
From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com>
Date: Mon, 7 Oct 2024 16:45:20 -0400
Subject: [PATCH 20/30] remove large feeds from exclude list
---
.../mobility-database-harvester/harvest_latest_versions.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/scripts/mobility-database-harvester/harvest_latest_versions.py b/scripts/mobility-database-harvester/harvest_latest_versions.py
index 0ddef96fba..3d250d854b 100644
--- a/scripts/mobility-database-harvester/harvest_latest_versions.py
+++ b/scripts/mobility-database-harvester/harvest_latest_versions.py
@@ -32,9 +32,9 @@
# Sources to exclude because they are too big for the workflow.
SOURCES_TO_EXCLUDE = [
- "de-unknown-rursee-schifffahrt-kg-gtfs-784",
- "de-unknown-ulmer-eisenbahnfreunde-gtfs-1081",
- "no-unknown-agder-kollektivtrafikk-as-gtfs-1078"
+ # "de-unknown-rursee-schifffahrt-kg-gtfs-784",
+ # "de-unknown-ulmer-eisenbahnfreunde-gtfs-1081",
+ # "no-unknown-agder-kollektivtrafikk-as-gtfs-1078"
]
# Google Cloud constants
From 5f644fc4866f22f0d67b01f95e8dcec72e72d071 Mon Sep 17 00:00:00 2001
From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com>
Date: Wed, 9 Oct 2024 13:05:01 -0400
Subject: [PATCH 21/30] fix formatting
---
.../gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java | 1 -
1 file changed, 1 deletion(-)
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java
index 1367615056..01accf7864 100644
--- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java
@@ -1,7 +1,6 @@
package org.mobilitydata.gtfsvalidator.outputcomparator.io;
import com.google.common.flogger.FluentLogger;
-
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
From c7ef80964946371d8eb6a8bf7901bd9525aae7ec Mon Sep 17 00:00:00 2001
From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com>
Date: Wed, 9 Oct 2024 14:44:59 -0400
Subject: [PATCH 22/30] fix ordering
---
.../io/UsedMemoryIncreasedComparator.java | 19 ++++++++++---------
.../io/ValidationPerformanceCollector.java | 2 +-
.../harvest_latest_versions.py | 6 +++---
3 files changed, 14 insertions(+), 13 deletions(-)
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java
index 9b37b6bd8e..af05b3f15b 100644
--- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java
@@ -35,14 +35,15 @@ public int compare(DatasetMemoryUsage o1, DatasetMemoryUsage o2) {
if (o1.getLatestMemoryUsage() == null || o2.getLatestMemoryUsage() == null) {
return o1.getLatestMemoryUsage() == null ? -1 : 1;
}
- long o1MinDiff =
- getMinimumDifferenceByKey(o1.getReferenceUsedMemoryByKey(), o1.getLatestUsedMemoryByKey());
- long o2MinDiff =
- getMinimumDifferenceByKey(o2.getReferenceUsedMemoryByKey(), o2.getLatestUsedMemoryByKey());
- return Long.compare(o1MinDiff, o2MinDiff);
+ long o1MaxDiff =
+ getMaxDifferenceByKey(o1.getReferenceUsedMemoryByKey(), o1.getLatestUsedMemoryByKey());
+ long o2MaxDiff =
+ getMaxDifferenceByKey(o2.getReferenceUsedMemoryByKey(), o2.getLatestUsedMemoryByKey());
+ // Reversing the comparison as we need the major memory usage first in a sorted list
+ return Long.compare(o2MaxDiff, o1MaxDiff);
}
- private long getMinimumDifferenceByKey(
+ private long getMaxDifferenceByKey(
Map referenceMemoryUsage, Map latestMemoryUsage) {
Set keys = new HashSet<>();
keys.addAll(latestMemoryUsage.keySet());
@@ -50,8 +51,8 @@ private long getMinimumDifferenceByKey(
return keys.stream()
.filter(key -> latestMemoryUsage.containsKey(key) && referenceMemoryUsage.containsKey(key))
.filter(key -> latestMemoryUsage.get(key) - referenceMemoryUsage.get(key) != 0)
- .mapToLong(key -> referenceMemoryUsage.get(key) - latestMemoryUsage.get(key))
- .min()
- .orElse(Long.MAX_VALUE);
+ .mapToLong(key -> latestMemoryUsage.get(key) - referenceMemoryUsage.get(key))
+ .max()
+ .orElse(Long.MIN_VALUE);
}
}
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
index db7468d00f..850cb0384a 100644
--- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
@@ -8,7 +8,7 @@
public class ValidationPerformanceCollector {
- public static final int MEMORY_USAGE_COMPARE_MAX = 25;
+ public static final int MEMORY_USAGE_COMPARE_MAX = 10;
private final Map referenceTimes;
private final Map latestTimes;
private final BoundedPriorityQueue datasetsDecreasedMemoryUsage;
diff --git a/scripts/mobility-database-harvester/harvest_latest_versions.py b/scripts/mobility-database-harvester/harvest_latest_versions.py
index 3d250d854b..0ddef96fba 100644
--- a/scripts/mobility-database-harvester/harvest_latest_versions.py
+++ b/scripts/mobility-database-harvester/harvest_latest_versions.py
@@ -32,9 +32,9 @@
# Sources to exclude because they are too big for the workflow.
SOURCES_TO_EXCLUDE = [
- # "de-unknown-rursee-schifffahrt-kg-gtfs-784",
- # "de-unknown-ulmer-eisenbahnfreunde-gtfs-1081",
- # "no-unknown-agder-kollektivtrafikk-as-gtfs-1078"
+ "de-unknown-rursee-schifffahrt-kg-gtfs-784",
+ "de-unknown-ulmer-eisenbahnfreunde-gtfs-1081",
+ "no-unknown-agder-kollektivtrafikk-as-gtfs-1078"
]
# Google Cloud constants
From b50f9bff19dd315bc3f8b41fb10e8ca70326c3c6 Mon Sep 17 00:00:00 2001
From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com>
Date: Wed, 9 Oct 2024 14:52:22 -0400
Subject: [PATCH 23/30] fix unit test
---
.../outputcomparator/cli/ValidationReportComparatorTest.java | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java
index 6cf2ac3a7d..90282a06ff 100644
--- a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java
+++ b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java
@@ -18,6 +18,7 @@
import org.mobilitydata.gtfsvalidator.notice.NoticeContainer;
import org.mobilitydata.gtfsvalidator.notice.ValidationNotice;
import org.mobilitydata.gtfsvalidator.outputcomparator.cli.ValidationReportComparator.Result;
+import org.mobilitydata.gtfsvalidator.outputcomparator.io.ValidationPerformanceCollector;
import org.mobilitydata.gtfsvalidator.outputcomparator.model.SourceUrlContainer;
import org.mobilitydata.gtfsvalidator.outputcomparator.model.report.AcceptanceReport;
import org.mobilitydata.gtfsvalidator.outputcomparator.model.report.AffectedSource;
@@ -144,7 +145,9 @@ public void addedErrorNotice_summaryString() throws Exception {
+ "\n\n"
+ "\n"
+ "📜 Memory Consumption
\n"
- + "List of 25 datasets(no reference available).
\n\n"
+ + "List of "
+ + ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX
+ + " datasets(no reference available).
\n\n"
+ "| Dataset ID | Snapshot Key(Used Memory) | Reference | Latest |\n"
+ "|-----------------------------|-------------------|----------------|----------------|\n"
+ " \n\n");
From 4bd6064872cdbfef5f2bea8519532bf4df35d57e Mon Sep 17 00:00:00 2001
From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com>
Date: Wed, 9 Oct 2024 17:33:00 -0400
Subject: [PATCH 24/30] add decreased memory comparator
---
.../io/BoundedPriorityQueue.java | 56 ------------------
.../io/UsedMemoryDecreasedComparator.java | 57 +++++++++++++++++++
.../io/ValidationPerformanceCollector.java | 52 ++++++-----------
.../cli/ValidationReportComparatorTest.java | 11 +---
4 files changed, 76 insertions(+), 100 deletions(-)
delete mode 100644 output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/BoundedPriorityQueue.java
create mode 100644 output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryDecreasedComparator.java
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/BoundedPriorityQueue.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/BoundedPriorityQueue.java
deleted file mode 100644
index 8e49308ffd..0000000000
--- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/BoundedPriorityQueue.java
+++ /dev/null
@@ -1,56 +0,0 @@
-package org.mobilitydata.gtfsvalidator.outputcomparator.io;
-
-import java.util.Comparator;
-import java.util.PriorityQueue;
-
-/**
- * A bounded priority queue that keeps the N smallest elements. If the queue is full and a new
- * element is offered, the largest element is removed. The smallest element is computed using a
- * comparator or its natural order.
- *
- * @param
- */
-public class BoundedPriorityQueue extends PriorityQueue {
- private final int maxCapacity;
-
- public BoundedPriorityQueue(int maxCapacity) {
- super();
- if (maxCapacity <= 0) {
- throw new IllegalArgumentException("Max capacity must be greater than zero");
- }
- this.maxCapacity = maxCapacity;
- }
-
- public BoundedPriorityQueue(int maxCapacity, int initialCapacity, Comparator comparator) {
- super(initialCapacity, comparator);
- if (maxCapacity <= 0) {
- throw new IllegalArgumentException("Max capacity must be greater than zero");
- }
- this.maxCapacity = maxCapacity;
- }
-
- @Override
- public boolean offer(E e) {
- if (size() >= maxCapacity) {
- E head = peek();
- if (head != null && compare(e, head) > 0) {
- poll();
- } else {
- return false;
- }
- }
- return super.offer(e);
- }
-
- @SuppressWarnings("unchecked")
- private int compare(E a, E b) {
- if (comparator() != null) {
- return comparator().compare(a, b);
- }
- return ((Comparable super E>) a).compareTo(b);
- }
-
- public int getMaxCapacity() {
- return maxCapacity;
- }
-}
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryDecreasedComparator.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryDecreasedComparator.java
new file mode 100644
index 0000000000..899e2169ec
--- /dev/null
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryDecreasedComparator.java
@@ -0,0 +1,57 @@
+package org.mobilitydata.gtfsvalidator.outputcomparator.io;
+
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Comparator to compare two {@link DatasetMemoryUsage} objects based on the difference between the
+ * used memory of the two objects. The difference is calculated by comparing the used memory of the
+ * two objects for each key present in both objects. If a key is present in one object but not in
+ * the other, the key it is ignored. This comparator is used to sort DatasetMemoryUsage by the
+ * minimum difference between the used memory of the two. This means the order is by the dataset
+ * validation that decreased the memory.
+ */
+public class UsedMemoryDecreasedComparator implements Comparator {
+
+ @Override
+ public int compare(DatasetMemoryUsage o1, DatasetMemoryUsage o2) {
+ if (o1 == o2) {
+ return 0;
+ }
+ if (o1 == null || o2 == null) {
+ return o1 == null ? -1 : 1;
+ }
+ if (o1.getReferenceMemoryUsage() == null
+ && o1.getLatestMemoryUsage() == null
+ && o2.getReferenceMemoryUsage() == null
+ && o2.getLatestMemoryUsage() == null) {
+ return 0;
+ }
+ if (o1.getReferenceMemoryUsage() == null || o2.getReferenceMemoryUsage() == null) {
+ return o1.getReferenceMemoryUsage() == null ? -1 : 1;
+ }
+ if (o1.getLatestMemoryUsage() == null || o2.getLatestMemoryUsage() == null) {
+ return o1.getLatestMemoryUsage() == null ? -1 : 1;
+ }
+ long o1MinDiff =
+ getMinDifferenceByKey(o1.getReferenceUsedMemoryByKey(), o1.getLatestUsedMemoryByKey());
+ long o2MinDiff =
+ getMinDifferenceByKey(o2.getReferenceUsedMemoryByKey(), o2.getLatestUsedMemoryByKey());
+ return Long.compare(o1MinDiff, o2MinDiff);
+ }
+
+ private long getMinDifferenceByKey(
+ Map referenceMemoryUsage, Map latestMemoryUsage) {
+ Set keys = new HashSet<>();
+ keys.addAll(latestMemoryUsage.keySet());
+ keys.addAll(referenceMemoryUsage.keySet());
+ return keys.stream()
+ .filter(key -> latestMemoryUsage.containsKey(key) && referenceMemoryUsage.containsKey(key))
+ .filter(key -> latestMemoryUsage.get(key) - referenceMemoryUsage.get(key) != 0)
+ .mapToLong(key -> latestMemoryUsage.get(key) - referenceMemoryUsage.get(key))
+ .max()
+ .orElse(Long.MAX_VALUE);
+ }
+}
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
index 850cb0384a..b66c808b4e 100644
--- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
@@ -11,24 +11,14 @@ public class ValidationPerformanceCollector {
public static final int MEMORY_USAGE_COMPARE_MAX = 10;
private final Map referenceTimes;
private final Map latestTimes;
- private final BoundedPriorityQueue datasetsDecreasedMemoryUsage;
- private final BoundedPriorityQueue datasetsIncreasedMemoryUsage;
private final List datasetsMemoryUsageNoReference;
+ private final List datasetsMemoryUsageWithReference;
public ValidationPerformanceCollector() {
this.referenceTimes = new HashMap<>();
this.latestTimes = new HashMap<>();
- this.datasetsDecreasedMemoryUsage =
- new BoundedPriorityQueue<>(
- MEMORY_USAGE_COMPARE_MAX,
- MEMORY_USAGE_COMPARE_MAX,
- (new UsedMemoryIncreasedComparator().reversed()));
- this.datasetsIncreasedMemoryUsage =
- new BoundedPriorityQueue<>(
- MEMORY_USAGE_COMPARE_MAX,
- MEMORY_USAGE_COMPARE_MAX,
- new UsedMemoryIncreasedComparator());
this.datasetsMemoryUsageNoReference = new ArrayList<>();
+ this.datasetsMemoryUsageWithReference = new ArrayList<>();
}
public void addReferenceTime(String sourceId, Double time) {
@@ -211,24 +201,26 @@ public String generateLogString() {
b.append("\n\n");
- if (datasetsIncreasedMemoryUsage.size() > 0
- || datasetsDecreasedMemoryUsage.size() > 0
- || datasetsMemoryUsageNoReference.size() > 0) {
+ if (datasetsMemoryUsageWithReference.size() > 0) {
b.append("\n");
b.append("📜 Memory Consumption
\n");
- if (datasetsIncreasedMemoryUsage.size() > 0) {
- List increasedMemoryUsages =
- getDatasetMemoryUsages(datasetsIncreasedMemoryUsage);
- addMemoryUsageReport(increasedMemoryUsages, "memory has increased", b, true);
- }
- if (datasetsDecreasedMemoryUsage.size() > 0) {
- List decreasedMemoryUsages =
- getDatasetMemoryUsages(datasetsDecreasedMemoryUsage);
- addMemoryUsageReport(decreasedMemoryUsages, "memory has decreased", b, true);
+ if (datasetsMemoryUsageWithReference.size() > 0) {
+ datasetsMemoryUsageWithReference.sort(new UsedMemoryIncreasedComparator());
+ addMemoryUsageReport(
+ datasetsMemoryUsageWithReference.subList(
+ 0, Math.min(datasetsMemoryUsageWithReference.size(), MEMORY_USAGE_COMPARE_MAX)),
+ "memory has increased",
+ b,
+ true);
+ datasetsMemoryUsageWithReference.sort(new UsedMemoryDecreasedComparator());
+ var decreasedList =
+ datasetsMemoryUsageWithReference.subList(
+ 0, Math.min(datasetsMemoryUsageWithReference.size(), MEMORY_USAGE_COMPARE_MAX));
+ addMemoryUsageReport(decreasedList, "memory has decreased", b, true);
}
if (datasetsMemoryUsageNoReference.size() > 0) {
// Sorting from the highest to the lowest memory usage
- datasetsMemoryUsageNoReference.sort((new LatestReportUsedMemoryComparator()).reversed());
+ datasetsMemoryUsageNoReference.sort(new UsedMemoryDecreasedComparator());
addMemoryUsageReport(
datasetsMemoryUsageNoReference.subList(
0, Math.min(datasetsMemoryUsageNoReference.size(), MEMORY_USAGE_COMPARE_MAX)),
@@ -241,13 +233,6 @@ public String generateLogString() {
return b.toString();
}
- private List getDatasetMemoryUsages(
- BoundedPriorityQueue datasetsMemoryUsage) {
- List increasedMemoryUsages = new ArrayList<>(datasetsMemoryUsage);
- increasedMemoryUsages.sort(datasetsMemoryUsage.comparator());
- return increasedMemoryUsages;
- }
-
private void addMemoryUsageReport(
List memoryUsages,
String order,
@@ -332,8 +317,7 @@ private void compareValidationReportMemoryUsage(
&& referenceReport.getMemoryUsageRecords().size() > 0
&& latestReport.getMemoryUsageRecords() != null
&& latestReport.getMemoryUsageRecords().size() > 0) {
- datasetsIncreasedMemoryUsage.offer(datasetMemoryUsage);
- datasetsDecreasedMemoryUsage.offer(datasetMemoryUsage);
+ datasetsMemoryUsageWithReference.add(datasetMemoryUsage);
} else {
datasetsMemoryUsageNoReference.add(datasetMemoryUsage);
}
diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java
index 90282a06ff..18e8d66e56 100644
--- a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java
+++ b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java
@@ -18,7 +18,6 @@
import org.mobilitydata.gtfsvalidator.notice.NoticeContainer;
import org.mobilitydata.gtfsvalidator.notice.ValidationNotice;
import org.mobilitydata.gtfsvalidator.outputcomparator.cli.ValidationReportComparator.Result;
-import org.mobilitydata.gtfsvalidator.outputcomparator.io.ValidationPerformanceCollector;
import org.mobilitydata.gtfsvalidator.outputcomparator.model.SourceUrlContainer;
import org.mobilitydata.gtfsvalidator.outputcomparator.model.report.AcceptanceReport;
import org.mobilitydata.gtfsvalidator.outputcomparator.model.report.AffectedSource;
@@ -142,15 +141,7 @@ public void addedErrorNotice_summaryString() throws Exception {
+ "\n"
+ "| Time Metric | Dataset ID | Reference (s) | Latest (s) | Difference (s) |\n"
+ "|-----------------------------|-------------------|----------------|----------------|----------------|\n"
- + " \n\n"
- + "\n"
- + "📜 Memory Consumption
\n"
- + "List of "
- + ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX
- + " datasets(no reference available).
\n\n"
- + "| Dataset ID | Snapshot Key(Used Memory) | Reference | Latest |\n"
- + "|-----------------------------|-------------------|----------------|----------------|\n"
- + " \n\n");
+ + "\n\n\n");
}
@Test
From 053cdbea6e1b1e59d393f7dc11e54e76eb277b8b Mon Sep 17 00:00:00 2001
From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com>
Date: Thu, 10 Oct 2024 14:48:44 -0400
Subject: [PATCH 25/30] add memory metrics
---
.../io/ValidationPerformanceCollector.java | 276 +++++++++++++-----
.../ValidationPerformanceCollectorTest.java | 44 ++-
2 files changed, 238 insertions(+), 82 deletions(-)
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
index b66c808b4e..eaa9621c16 100644
--- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
@@ -2,6 +2,8 @@
import java.util.*;
import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.function.Function;
+import java.util.stream.Collectors;
import org.mobilitydata.gtfsvalidator.model.ValidationReport;
import org.mobilitydata.gtfsvalidator.outputcomparator.model.report.ValidationPerformance;
import org.mobilitydata.gtfsvalidator.performance.MemoryUsage;
@@ -9,6 +11,8 @@
public class ValidationPerformanceCollector {
public static final int MEMORY_USAGE_COMPARE_MAX = 10;
+ public static final String MEMORY_PIVOT_KEY =
+ "org.mobilitydata.gtfsvalidator.table.GtfsFeedLoader.loadAndValidate";
private final Map referenceTimes;
private final Map latestTimes;
private final List datasetsMemoryUsageNoReference;
@@ -29,11 +33,11 @@ public void addLatestTime(String sourceId, Double time) {
latestTimes.put(sourceId, time);
}
- private Double computeAverage(List times) {
+ private Double computeAverage(Collection times) {
return times.stream().mapToDouble(Double::doubleValue).average().orElse(Double.NaN);
}
- private Double computeMedian(List times) {
+ private Double computeMedian(Collection times) {
if (times.isEmpty()) {
return Double.NaN;
}
@@ -49,31 +53,37 @@ private Double computeMedian(List times) {
return median;
}
- private Double computeStandardDeviation(List times) {
+ private Double computeStandardDeviation(Collection times) {
double mean = computeAverage(times);
return Math.sqrt(
times.stream().mapToDouble(time -> Math.pow(time - mean, 2)).average().orElse(Double.NaN));
}
- private Double computeMax(List times) {
+ private Double computeMax(Collection times) {
return times.stream().mapToDouble(Double::doubleValue).max().orElse(Double.NaN);
}
- private Double computeMin(List times) {
+ private Double computeMin(Collection times) {
return times.stream().mapToDouble(Double::doubleValue).min().orElse(Double.NaN);
}
- private String formatMetrics(String metric, String datasetId, Double reference, Double latest) {
+ private String formatMetrics(
+ String metric,
+ String datasetId,
+ Double reference,
+ Double latest,
+ Function render) {
String diff;
if (reference.isNaN() || latest.isNaN()) {
diff = "N/A";
} else {
double difference = latest - reference;
String arrow = difference > 0 ? "⬆️+" : "⬇️";
- diff = String.format("%s%.2f", arrow, difference);
+ diff = String.format("%s%s", arrow, render.apply(difference));
}
return String.format(
- "| %s | %s | %.2f | %.2f | %s |\n", metric, datasetId, reference, latest, diff);
+ "| %s | %s | %s | %s | %s |\n",
+ metric, datasetId, render.apply(reference), render.apply(latest), diff);
}
private static String getMemoryDiff(Long reference, Long latest) {
@@ -124,70 +134,8 @@ public String generateLogString() {
allLatestTimes.add(latestTimes);
}
- if (!allReferenceTimes.isEmpty() && !allLatestTimes.isEmpty()) {
- Double avgReference = computeAverage(allReferenceTimes);
- Double avgLatest = computeAverage(allLatestTimes);
- Double medianReference = computeMedian(allReferenceTimes);
- Double medianLatest = computeMedian(allLatestTimes);
- Double stdDevReference = computeStandardDeviation(allReferenceTimes);
- Double stdDevLatest = computeStandardDeviation(allLatestTimes);
-
- b.append(formatMetrics("Average", "--", avgReference, avgLatest))
- .append(formatMetrics("Median", "--", medianReference, medianLatest))
- .append(formatMetrics("Standard Deviation", "--", stdDevReference, stdDevLatest));
- }
-
- if (!allReferenceTimes.isEmpty()) {
- Double minReference = computeMin(allReferenceTimes);
- String minReferenceId =
- referenceTimes.entrySet().stream()
- .filter(entry -> Objects.equals(entry.getValue(), minReference))
- .map(Map.Entry::getKey)
- .findFirst()
- .orElse("N/A");
-
- Double maxReference = computeMax(allReferenceTimes);
- String maxReferenceId =
- referenceTimes.entrySet().stream()
- .filter(entry -> Objects.equals(entry.getValue(), maxReference))
- .map(Map.Entry::getKey)
- .findFirst()
- .orElse("N/A");
-
- Double minLatest = latestTimes.getOrDefault(minReferenceId, Double.NaN);
- Double maxLatest = latestTimes.getOrDefault(maxReferenceId, Double.NaN);
-
- b.append(
- formatMetrics(
- "Minimum in References Reports", minReferenceId, minReference, minLatest))
- .append(
- formatMetrics(
- "Maximum in Reference Reports", maxReferenceId, maxReference, maxLatest));
- }
-
- if (!allLatestTimes.isEmpty()) {
- Double minLatest = computeMin(allLatestTimes);
- String minLatestId =
- latestTimes.entrySet().stream()
- .filter(entry -> Objects.equals(entry.getValue(), minLatest))
- .map(Map.Entry::getKey)
- .findFirst()
- .orElse("N/A");
-
- Double maxLatest = computeMax(allLatestTimes);
- String maxLatestId =
- latestTimes.entrySet().stream()
- .filter(entry -> Objects.equals(entry.getValue(), maxLatest))
- .map(Map.Entry::getKey)
- .findFirst()
- .orElse("N/A");
-
- Double minReference = referenceTimes.getOrDefault(minLatestId, Double.NaN);
- Double maxReference = referenceTimes.getOrDefault(maxLatestId, Double.NaN);
-
- b.append(formatMetrics("Minimum in Latest Reports", minLatestId, minReference, minLatest))
- .append(formatMetrics("Maximum in Latest Reports", maxLatestId, maxReference, maxLatest));
- }
+ generatePerformanceMetricsLog(
+ referenceTimes, latestTimes, b, value -> String.format("%.2f", value));
// Add warning message for feeds that are missing validation times either in reference or latest
if (!warnings.isEmpty()) {
@@ -202,8 +150,48 @@ public String generateLogString() {
b.append("\n\n");
if (datasetsMemoryUsageWithReference.size() > 0) {
+ Map referenceMemoryUsageById =
+ datasetsMemoryUsageWithReference.stream()
+ .filter(
+ datasetMemoryUsage ->
+ datasetMemoryUsage.getReferenceUsedMemoryByKey().get(MEMORY_PIVOT_KEY)
+ != null)
+ .collect(
+ Collectors.toMap(
+ DatasetMemoryUsage::getDatasetId,
+ datasetMemoryUsage ->
+ datasetMemoryUsage
+ .getReferenceUsedMemoryByKey()
+ .get(MEMORY_PIVOT_KEY)
+ .doubleValue()));
+ Map latestMemoryUsageById =
+ datasetsMemoryUsageWithReference.stream()
+ .filter(
+ datasetMemoryUsage ->
+ datasetMemoryUsage.getLatestUsedMemoryByKey().get(MEMORY_PIVOT_KEY) != null)
+ .collect(
+ Collectors.toMap(
+ DatasetMemoryUsage::getDatasetId,
+ datasetMemoryUsage ->
+ datasetMemoryUsage
+ .getLatestUsedMemoryByKey()
+ .get(MEMORY_PIVOT_KEY)
+ .doubleValue()));
+
b.append("\n");
b.append("📜 Memory Consumption
\n");
+
+ b.append(
+ "| Metric | Dataset ID | Reference (s) | Latest (s) | Difference (s) |\n")
+ .append(
+ "|-----------------------------|-------------------|----------------|----------------|----------------|\n");
+
+ generatePerformanceMetricsLog(
+ referenceMemoryUsageById,
+ latestMemoryUsageById,
+ b,
+ ValidationPerformanceCollector::convertToHumanReadableMemory);
+
if (datasetsMemoryUsageWithReference.size() > 0) {
datasetsMemoryUsageWithReference.sort(new UsedMemoryIncreasedComparator());
addMemoryUsageReport(
@@ -233,6 +221,76 @@ public String generateLogString() {
return b.toString();
}
+ private void generatePerformanceMetricsLog(
+ Map references,
+ Map latests,
+ StringBuilder b,
+ Function render) {
+ PerformanceMetrics performanceMetrics = computeMetrics(references, latests);
+ if (!references.isEmpty() && !latests.isEmpty()) {
+ b.append(
+ formatMetrics(
+ "Average",
+ "--",
+ performanceMetrics.avgReference,
+ performanceMetrics.avgLatest,
+ render))
+ .append(
+ formatMetrics(
+ "Median",
+ "--",
+ performanceMetrics.medianReference,
+ performanceMetrics.medianLatest,
+ render))
+ .append(
+ formatMetrics(
+ "Standard Deviation",
+ "--",
+ performanceMetrics.stdDevReference,
+ performanceMetrics.stdDevLatest,
+ render));
+ }
+
+ if (!references.isEmpty()) {
+ Double minLatest = latestTimes.getOrDefault(performanceMetrics.minReferenceId, Double.NaN);
+ Double maxLatest = latestTimes.getOrDefault(performanceMetrics.maxReferenceId, Double.NaN);
+ b.append(
+ formatMetrics(
+ "Minimum in References Reports",
+ performanceMetrics.minReferenceId,
+ performanceMetrics.minReference,
+ minLatest,
+ render))
+ .append(
+ formatMetrics(
+ "Maximum in Reference Reports",
+ performanceMetrics.maxReferenceId,
+ performanceMetrics.maxReference,
+ maxLatest,
+ render));
+ }
+
+ if (!latests.isEmpty()) {
+ Double minReference = referenceTimes.getOrDefault(performanceMetrics.minLatestId, Double.NaN);
+ Double maxReference = referenceTimes.getOrDefault(performanceMetrics.maxLatestId, Double.NaN);
+
+ b.append(
+ formatMetrics(
+ "Minimum in Latest Reports",
+ performanceMetrics.minLatestId,
+ minReference,
+ performanceMetrics.minLatest,
+ render))
+ .append(
+ formatMetrics(
+ "Maximum in Latest Reports",
+ performanceMetrics.maxLatestId,
+ maxReference,
+ performanceMetrics.maxLatest,
+ render));
+ }
+ }
+
private void addMemoryUsageReport(
List memoryUsages,
String order,
@@ -336,4 +394,78 @@ public List toReport() {
}
return affectedSources;
}
+
+ private PerformanceMetrics computeMetrics(
+ Map allReferencesMap, Map allLatestsMap) {
+ Collection allReferences = allReferencesMap.values();
+ Collection allLatest = allLatestsMap.values();
+ PerformanceMetrics performanceMetrics = new PerformanceMetrics();
+ if (!allReferences.isEmpty() && !allLatest.isEmpty()) {
+ performanceMetrics.avgReference = computeAverage(allReferences);
+ performanceMetrics.avgLatest = computeAverage(allLatest);
+ performanceMetrics.medianReference = computeMedian(allReferences);
+ performanceMetrics.medianLatest = computeMedian(allLatest);
+ performanceMetrics.stdDevReference = computeStandardDeviation(allReferences);
+ performanceMetrics.stdDevLatest = computeStandardDeviation(allLatest);
+ }
+
+ if (!allReferences.isEmpty()) {
+ performanceMetrics.minReference = computeMin(allReferences);
+ performanceMetrics.minReferenceId =
+ referenceTimes.entrySet().stream()
+ .filter(entry -> Objects.equals(entry.getValue(), performanceMetrics.minReference))
+ .map(Map.Entry::getKey)
+ .findFirst()
+ .orElse("N/A");
+
+ performanceMetrics.maxReference = computeMax(allReferences);
+ performanceMetrics.maxReferenceId =
+ referenceTimes.entrySet().stream()
+ .filter(entry -> Objects.equals(entry.getValue(), performanceMetrics.maxReference))
+ .map(Map.Entry::getKey)
+ .findFirst()
+ .orElse("N/A");
+ }
+
+ if (!allLatest.isEmpty()) {
+ performanceMetrics.minLatest = computeMin(allLatest);
+ performanceMetrics.minLatestId =
+ latestTimes.entrySet().stream()
+ .filter(entry -> Objects.equals(entry.getValue(), performanceMetrics.minLatest))
+ .map(Map.Entry::getKey)
+ .findFirst()
+ .orElse("N/A");
+
+ performanceMetrics.maxLatest = computeMax(allLatest);
+ performanceMetrics.maxLatestId =
+ latestTimes.entrySet().stream()
+ .filter(entry -> Objects.equals(entry.getValue(), performanceMetrics.maxLatest))
+ .map(Map.Entry::getKey)
+ .findFirst()
+ .orElse("N/A");
+ }
+ return performanceMetrics;
+ }
+
+ private static String convertToHumanReadableMemory(Double bytes) {
+ // Ignoring the decimals in bytes
+ return MemoryUsage.convertToHumanReadableMemory(bytes.longValue());
+ }
+}
+
+class PerformanceMetrics {
+ Double minReference;
+ String minReferenceId;
+ Double maxReference;
+ Double minLatest;
+ Double maxLatest;
+ String minLatestId;
+ String maxLatestId;
+ String maxReferenceId;
+ Double avgReference;
+ Double avgLatest;
+ Double medianReference;
+ Double medianLatest;
+ Double stdDevReference;
+ Double stdDevLatest;
}
diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java
index 82c204b20c..76ccce5d69 100644
--- a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java
+++ b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java
@@ -32,7 +32,12 @@ public void generateLogString_test() {
Collections.EMPTY_SET,
null,
Arrays.asList(
- new MemoryUsage("key1", baseMemory, baseMemory + baseMemory * 10, 200, 50L),
+ new MemoryUsage(
+ ValidationPerformanceCollector.MEMORY_PIVOT_KEY,
+ baseMemory,
+ baseMemory + baseMemory * 10,
+ 200,
+ 50L),
new MemoryUsage("key2", baseMemory, baseMemory, 200, 50L))),
new ValidationReport(Collections.EMPTY_SET, 16.0, Collections.EMPTY_LIST));
// Memory usage increased as there is less free memory
@@ -42,13 +47,23 @@ public void generateLogString_test() {
Collections.EMPTY_SET,
null,
Arrays.asList(
- new MemoryUsage("key1", baseMemory, baseMemory, 200, 50L),
+ new MemoryUsage(
+ ValidationPerformanceCollector.MEMORY_PIVOT_KEY,
+ baseMemory,
+ baseMemory,
+ 200,
+ 50L),
new MemoryUsage("key2", baseMemory, baseMemory, 200, 50L))),
new ValidationReport(
Collections.EMPTY_SET,
null,
Arrays.asList(
- new MemoryUsage("key1", baseMemory, baseMemory - baseMemory / 2, 200, null),
+ new MemoryUsage(
+ ValidationPerformanceCollector.MEMORY_PIVOT_KEY,
+ baseMemory,
+ baseMemory - baseMemory / 2,
+ 200,
+ null),
new MemoryUsage("key2", baseMemory, baseMemory - baseMemory / 2, 200, null))));
// // Memory usage decreased as there is more free memory
@@ -77,12 +92,12 @@ public void generateLogString_test() {
+ "\n"
+ "| Time Metric | Dataset ID | Reference (s) | Latest (s) | Difference (s) |\n"
+ "|-----------------------------|-------------------|----------------|----------------|----------------|\n"
- + "| Average | -- | 17.00 | 20.00 | ⬆\uFE0F+3.00 |\n"
- + "| Median | -- | 17.00 | 20.00 | ⬆\uFE0F+3.00 |\n"
- + "| Standard Deviation | -- | 3.00 | 2.00 | ⬇\uFE0F-1.00 |\n"
+ + "| Average | -- | 17.00 | 18.67 | ⬆\uFE0F+1.67 |\n"
+ + "| Median | -- | 17.00 | 18.00 | ⬆\uFE0F+1.00 |\n"
+ + "| Standard Deviation | -- | 3.00 | 2.49 | ⬇\uFE0F-0.51 |\n"
+ "| Minimum in References Reports | feed-id-a | 14.00 | 18.00 | ⬆\uFE0F+4.00 |\n"
+ "| Maximum in Reference Reports | feed-id-b | 20.00 | 22.00 | ⬆️+2.00 |\n"
- + "| Minimum in Latest Reports | feed-id-a | 14.00 | 18.00 | ⬆\uFE0F+4.00 |\n"
+ + "| Minimum in Latest Reports | feed-id-m1 | NaN | 16.00 | N/A |\n"
+ "| Maximum in Latest Reports | feed-id-b | 20.00 | 22.00 | ⬆️+2.00 |\n"
+ "#### ⚠️ Warnings\n\n"
+ "The following dataset IDs are missing validation times either in reference or latest:\n"
@@ -90,14 +105,23 @@ public void generateLogString_test() {
+ " \n\n"
+ "\n"
+ "📜 Memory Consumption
\n"
+ + "| Metric | Dataset ID | Reference (s) | Latest (s) | Difference (s) |\n"
+ + "|-----------------------------|-------------------|----------------|----------------|----------------|\n"
+ + "| Average | -- | 0 bytes | 488.28 KiB | ⬆️+488.28 KiB |\n"
+ + "| Median | -- | 0 bytes | 488.28 KiB | ⬆️+488.28 KiB |\n"
+ + "| Standard Deviation | -- | 0 bytes | 0 bytes | ⬇️0 bytes |\n"
+ + "| Minimum in References Reports | N/A | 0 bytes | 0 bytes | N/A |\n"
+ + "| Maximum in Reference Reports | N/A | 0 bytes | 0 bytes | N/A |\n"
+ + "| Minimum in Latest Reports | N/A | 0 bytes | 488.28 KiB | N/A |\n"
+ + "| Maximum in Latest Reports | N/A | 0 bytes | 488.28 KiB | N/A |\n"
+ "List of "
+ ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX
+ " datasets(memory has increased).
\n\n"
+ "| Dataset ID | Snapshot Key(Used Memory) | Reference | Latest | Difference |\n"
+ "|-----------------------------|-------------------|----------------|----------------|----------------|\n"
+ "| feed-id-m2 | | | | |\n"
- + "| | key1 | 0 bytes | 488.28 KiB | ⬆\uFE0F+488.28 KiB |\n"
+ "| | key2 | 0 bytes | 488.28 KiB | ⬆\uFE0F+488.28 KiB |\n"
+ + "| | org.mobilitydata.gtfsvalidator.table.GtfsFeedLoader.loadAndValidate | 0 bytes | 488.28 KiB | ⬆\uFE0F+488.28 KiB |\n"
+ "| feed-id-m3 | | | | |\n"
+ "| | key3 | -100 bytes | -976.56 KiB | ⬇\uFE0F-976.46 KiB |\n"
+ "| | key4 | -100 bytes | -976.56 KiB | ⬇\uFE0F-976.46 KiB |\n"
@@ -110,16 +134,16 @@ public void generateLogString_test() {
+ "| | key3 | -100 bytes | -976.56 KiB | ⬇️-976.46 KiB |\n"
+ "| | key4 | -100 bytes | -976.56 KiB | ⬇️-976.46 KiB |\n"
+ "| feed-id-m2 | | | | |\n"
- + "| | key1 | 0 bytes | 488.28 KiB | ⬆️+488.28 KiB |\n"
+ "| | key2 | 0 bytes | 488.28 KiB | ⬆️+488.28 KiB |\n"
+ + "| | org.mobilitydata.gtfsvalidator.table.GtfsFeedLoader.loadAndValidate | 0 bytes | 488.28 KiB | ⬆️+488.28 KiB |\n"
+ "List of "
+ ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX
+ " datasets(no reference available).
\n\n"
+ "| Dataset ID | Snapshot Key(Used Memory) | Reference | Latest |\n"
+ "|-----------------------------|-------------------|----------------|----------------|\n"
+ "| feed-id-m1 | | | |\n"
- + "| | key1 | -9.54 MiB | N/A |\n"
+ "| | key2 | 0 bytes | N/A |\n"
+ + "| | org.mobilitydata.gtfsvalidator.table.GtfsFeedLoader.loadAndValidate | -9.54 MiB | N/A |\n"
+ " \n";
// Assert that the generated log string matches the expected log string
assertThat(logString).isEqualTo(expectedLogString);
From 2513c234897ef135e1e3e6b774008eb95cf2429c Mon Sep 17 00:00:00 2001
From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com>
Date: Thu, 10 Oct 2024 15:39:17 -0400
Subject: [PATCH 26/30] fix comment formatting
---
.../io/ValidationPerformanceCollector.java | 16 ++++++++--------
.../io/ValidationPerformanceCollectorTest.java | 10 +++++-----
2 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
index eaa9621c16..f62dcdfac7 100644
--- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
@@ -179,7 +179,7 @@ public String generateLogString() {
.doubleValue()));
b.append("\n");
- b.append("📜 Memory Consumption
\n");
+ b.append("📜 Memory Consumption
\n\n");
b.append(
"| Metric | Dataset ID | Reference (s) | Latest (s) | Difference (s) |\n")
@@ -396,9 +396,9 @@ public List toReport() {
}
private PerformanceMetrics computeMetrics(
- Map allReferencesMap, Map allLatestsMap) {
- Collection allReferences = allReferencesMap.values();
- Collection allLatest = allLatestsMap.values();
+ Map referencesById, Map latestsById) {
+ Collection allReferences = referencesById.values();
+ Collection allLatest = latestsById.values();
PerformanceMetrics performanceMetrics = new PerformanceMetrics();
if (!allReferences.isEmpty() && !allLatest.isEmpty()) {
performanceMetrics.avgReference = computeAverage(allReferences);
@@ -412,7 +412,7 @@ private PerformanceMetrics computeMetrics(
if (!allReferences.isEmpty()) {
performanceMetrics.minReference = computeMin(allReferences);
performanceMetrics.minReferenceId =
- referenceTimes.entrySet().stream()
+ referencesById.entrySet().stream()
.filter(entry -> Objects.equals(entry.getValue(), performanceMetrics.minReference))
.map(Map.Entry::getKey)
.findFirst()
@@ -420,7 +420,7 @@ private PerformanceMetrics computeMetrics(
performanceMetrics.maxReference = computeMax(allReferences);
performanceMetrics.maxReferenceId =
- referenceTimes.entrySet().stream()
+ referencesById.entrySet().stream()
.filter(entry -> Objects.equals(entry.getValue(), performanceMetrics.maxReference))
.map(Map.Entry::getKey)
.findFirst()
@@ -430,7 +430,7 @@ private PerformanceMetrics computeMetrics(
if (!allLatest.isEmpty()) {
performanceMetrics.minLatest = computeMin(allLatest);
performanceMetrics.minLatestId =
- latestTimes.entrySet().stream()
+ latestsById.entrySet().stream()
.filter(entry -> Objects.equals(entry.getValue(), performanceMetrics.minLatest))
.map(Map.Entry::getKey)
.findFirst()
@@ -438,7 +438,7 @@ private PerformanceMetrics computeMetrics(
performanceMetrics.maxLatest = computeMax(allLatest);
performanceMetrics.maxLatestId =
- latestTimes.entrySet().stream()
+ latestsById.entrySet().stream()
.filter(entry -> Objects.equals(entry.getValue(), performanceMetrics.maxLatest))
.map(Map.Entry::getKey)
.findFirst()
diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java
index 76ccce5d69..a5ba6fb01b 100644
--- a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java
+++ b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java
@@ -104,16 +104,16 @@ public void generateLogString_test() {
+ "feed-id-m1\n\n"
+ " \n\n"
+ "\n"
- + "📜 Memory Consumption
\n"
+ + "📜 Memory Consumption
\n\n"
+ "| Metric | Dataset ID | Reference (s) | Latest (s) | Difference (s) |\n"
+ "|-----------------------------|-------------------|----------------|----------------|----------------|\n"
+ "| Average | -- | 0 bytes | 488.28 KiB | ⬆️+488.28 KiB |\n"
+ "| Median | -- | 0 bytes | 488.28 KiB | ⬆️+488.28 KiB |\n"
+ "| Standard Deviation | -- | 0 bytes | 0 bytes | ⬇️0 bytes |\n"
- + "| Minimum in References Reports | N/A | 0 bytes | 0 bytes | N/A |\n"
- + "| Maximum in Reference Reports | N/A | 0 bytes | 0 bytes | N/A |\n"
- + "| Minimum in Latest Reports | N/A | 0 bytes | 488.28 KiB | N/A |\n"
- + "| Maximum in Latest Reports | N/A | 0 bytes | 488.28 KiB | N/A |\n"
+ + "| Minimum in References Reports | feed-id-m2 | 0 bytes | 0 bytes | N/A |\n"
+ + "| Maximum in Reference Reports | feed-id-m2 | 0 bytes | 0 bytes | N/A |\n"
+ + "| Minimum in Latest Reports | feed-id-m2 | 0 bytes | 488.28 KiB | N/A |\n"
+ + "| Maximum in Latest Reports | feed-id-m2 | 0 bytes | 488.28 KiB | N/A |\n"
+ "List of "
+ ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX
+ " datasets(memory has increased).
\n\n"
From 8a304bb083ef96280aafb1794a42164ecf144417 Mon Sep 17 00:00:00 2001
From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com>
Date: Thu, 10 Oct 2024 17:30:56 -0400
Subject: [PATCH 27/30] fix invalid references
---
.../io/ValidationPerformanceCollector.java | 8 ++++----
.../io/ValidationPerformanceCollectorTest.java | 8 ++++----
2 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
index f62dcdfac7..f0123987ec 100644
--- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
@@ -252,8 +252,8 @@ private void generatePerformanceMetricsLog(
}
if (!references.isEmpty()) {
- Double minLatest = latestTimes.getOrDefault(performanceMetrics.minReferenceId, Double.NaN);
- Double maxLatest = latestTimes.getOrDefault(performanceMetrics.maxReferenceId, Double.NaN);
+ Double minLatest = latests.getOrDefault(performanceMetrics.minReferenceId, Double.NaN);
+ Double maxLatest = latests.getOrDefault(performanceMetrics.maxReferenceId, Double.NaN);
b.append(
formatMetrics(
"Minimum in References Reports",
@@ -271,8 +271,8 @@ private void generatePerformanceMetricsLog(
}
if (!latests.isEmpty()) {
- Double minReference = referenceTimes.getOrDefault(performanceMetrics.minLatestId, Double.NaN);
- Double maxReference = referenceTimes.getOrDefault(performanceMetrics.maxLatestId, Double.NaN);
+ Double minReference = references.getOrDefault(performanceMetrics.minLatestId, Double.NaN);
+ Double maxReference = references.getOrDefault(performanceMetrics.maxLatestId, Double.NaN);
b.append(
formatMetrics(
diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java
index a5ba6fb01b..8ffd3c6fb9 100644
--- a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java
+++ b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java
@@ -110,10 +110,10 @@ public void generateLogString_test() {
+ "| Average | -- | 0 bytes | 488.28 KiB | ⬆️+488.28 KiB |\n"
+ "| Median | -- | 0 bytes | 488.28 KiB | ⬆️+488.28 KiB |\n"
+ "| Standard Deviation | -- | 0 bytes | 0 bytes | ⬇️0 bytes |\n"
- + "| Minimum in References Reports | feed-id-m2 | 0 bytes | 0 bytes | N/A |\n"
- + "| Maximum in Reference Reports | feed-id-m2 | 0 bytes | 0 bytes | N/A |\n"
- + "| Minimum in Latest Reports | feed-id-m2 | 0 bytes | 488.28 KiB | N/A |\n"
- + "| Maximum in Latest Reports | feed-id-m2 | 0 bytes | 488.28 KiB | N/A |\n"
+ + "| Minimum in References Reports | feed-id-m2 | 0 bytes | 488.28 KiB | ⬆\uFE0F+488.28 KiB |\n"
+ + "| Maximum in Reference Reports | feed-id-m2 | 0 bytes | 488.28 KiB | ⬆\uFE0F+488.28 KiB |\n"
+ + "| Minimum in Latest Reports | feed-id-m2 | 0 bytes | 488.28 KiB | ⬆\uFE0F+488.28 KiB |\n"
+ + "| Maximum in Latest Reports | feed-id-m2 | 0 bytes | 488.28 KiB | ⬆\uFE0F+488.28 KiB |\n"
+ "List of "
+ ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX
+ " datasets(memory has increased).
\n\n"
From ce54a4d03ddcfb1b0c80ad6b59ed530c45f7b933 Mon Sep 17 00:00:00 2001
From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com>
Date: Fri, 11 Oct 2024 10:54:27 -0400
Subject: [PATCH 28/30] remove memory full list
---
.../io/ValidationPerformanceCollector.java | 25 ----------------
.../ValidationPerformanceCollectorTest.java | 30 -------------------
2 files changed, 55 deletions(-)
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
index f0123987ec..656a6b017a 100644
--- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
@@ -191,31 +191,6 @@ public String generateLogString() {
latestMemoryUsageById,
b,
ValidationPerformanceCollector::convertToHumanReadableMemory);
-
- if (datasetsMemoryUsageWithReference.size() > 0) {
- datasetsMemoryUsageWithReference.sort(new UsedMemoryIncreasedComparator());
- addMemoryUsageReport(
- datasetsMemoryUsageWithReference.subList(
- 0, Math.min(datasetsMemoryUsageWithReference.size(), MEMORY_USAGE_COMPARE_MAX)),
- "memory has increased",
- b,
- true);
- datasetsMemoryUsageWithReference.sort(new UsedMemoryDecreasedComparator());
- var decreasedList =
- datasetsMemoryUsageWithReference.subList(
- 0, Math.min(datasetsMemoryUsageWithReference.size(), MEMORY_USAGE_COMPARE_MAX));
- addMemoryUsageReport(decreasedList, "memory has decreased", b, true);
- }
- if (datasetsMemoryUsageNoReference.size() > 0) {
- // Sorting from the highest to the lowest memory usage
- datasetsMemoryUsageNoReference.sort(new UsedMemoryDecreasedComparator());
- addMemoryUsageReport(
- datasetsMemoryUsageNoReference.subList(
- 0, Math.min(datasetsMemoryUsageNoReference.size(), MEMORY_USAGE_COMPARE_MAX)),
- "no reference available",
- b,
- false);
- }
b.append(" \n");
}
return b.toString();
diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java
index 8ffd3c6fb9..12a13e31b8 100644
--- a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java
+++ b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java
@@ -114,36 +114,6 @@ public void generateLogString_test() {
+ "| Maximum in Reference Reports | feed-id-m2 | 0 bytes | 488.28 KiB | ⬆\uFE0F+488.28 KiB |\n"
+ "| Minimum in Latest Reports | feed-id-m2 | 0 bytes | 488.28 KiB | ⬆\uFE0F+488.28 KiB |\n"
+ "| Maximum in Latest Reports | feed-id-m2 | 0 bytes | 488.28 KiB | ⬆\uFE0F+488.28 KiB |\n"
- + "List of "
- + ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX
- + " datasets(memory has increased).
\n\n"
- + "| Dataset ID | Snapshot Key(Used Memory) | Reference | Latest | Difference |\n"
- + "|-----------------------------|-------------------|----------------|----------------|----------------|\n"
- + "| feed-id-m2 | | | | |\n"
- + "| | key2 | 0 bytes | 488.28 KiB | ⬆\uFE0F+488.28 KiB |\n"
- + "| | org.mobilitydata.gtfsvalidator.table.GtfsFeedLoader.loadAndValidate | 0 bytes | 488.28 KiB | ⬆\uFE0F+488.28 KiB |\n"
- + "| feed-id-m3 | | | | |\n"
- + "| | key3 | -100 bytes | -976.56 KiB | ⬇\uFE0F-976.46 KiB |\n"
- + "| | key4 | -100 bytes | -976.56 KiB | ⬇\uFE0F-976.46 KiB |\n"
- + "List of "
- + ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX
- + " datasets(memory has decreased).
\n\n"
- + "| Dataset ID | Snapshot Key(Used Memory) | Reference | Latest | Difference |\n"
- + "|-----------------------------|-------------------|----------------|----------------|----------------|\n"
- + "| feed-id-m3 | | | | |\n"
- + "| | key3 | -100 bytes | -976.56 KiB | ⬇️-976.46 KiB |\n"
- + "| | key4 | -100 bytes | -976.56 KiB | ⬇️-976.46 KiB |\n"
- + "| feed-id-m2 | | | | |\n"
- + "| | key2 | 0 bytes | 488.28 KiB | ⬆️+488.28 KiB |\n"
- + "| | org.mobilitydata.gtfsvalidator.table.GtfsFeedLoader.loadAndValidate | 0 bytes | 488.28 KiB | ⬆️+488.28 KiB |\n"
- + "List of "
- + ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX
- + " datasets(no reference available).
\n\n"
- + "| Dataset ID | Snapshot Key(Used Memory) | Reference | Latest |\n"
- + "|-----------------------------|-------------------|----------------|----------------|\n"
- + "| feed-id-m1 | | | |\n"
- + "| | key2 | 0 bytes | N/A |\n"
- + "| | org.mobilitydata.gtfsvalidator.table.GtfsFeedLoader.loadAndValidate | -9.54 MiB | N/A |\n"
+ "\n";
// Assert that the generated log string matches the expected log string
assertThat(logString).isEqualTo(expectedLogString);
From b372c2c2d8ebc5fc47a04f123fd0ced6034b293a Mon Sep 17 00:00:00 2001
From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com>
Date: Fri, 11 Oct 2024 11:41:35 -0400
Subject: [PATCH 29/30] delete unused comparators
---
.../io/UsedMemoryDecreasedComparator.java | 57 ------------------
.../io/UsedMemoryIncreasedComparator.java | 58 ------------------
.../MemoryUsageUsedMemoryComparatorTest.java | 60 -------------------
3 files changed, 175 deletions(-)
delete mode 100644 output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryDecreasedComparator.java
delete mode 100644 output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java
delete mode 100644 output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/MemoryUsageUsedMemoryComparatorTest.java
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryDecreasedComparator.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryDecreasedComparator.java
deleted file mode 100644
index 899e2169ec..0000000000
--- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryDecreasedComparator.java
+++ /dev/null
@@ -1,57 +0,0 @@
-package org.mobilitydata.gtfsvalidator.outputcomparator.io;
-
-import java.util.Comparator;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
-
-/**
- * Comparator to compare two {@link DatasetMemoryUsage} objects based on the difference between the
- * used memory of the two objects. The difference is calculated by comparing the used memory of the
- * two objects for each key present in both objects. If a key is present in one object but not in
- * the other, the key it is ignored. This comparator is used to sort DatasetMemoryUsage by the
- * minimum difference between the used memory of the two. This means the order is by the dataset
- * validation that decreased the memory.
- */
-public class UsedMemoryDecreasedComparator implements Comparator {
-
- @Override
- public int compare(DatasetMemoryUsage o1, DatasetMemoryUsage o2) {
- if (o1 == o2) {
- return 0;
- }
- if (o1 == null || o2 == null) {
- return o1 == null ? -1 : 1;
- }
- if (o1.getReferenceMemoryUsage() == null
- && o1.getLatestMemoryUsage() == null
- && o2.getReferenceMemoryUsage() == null
- && o2.getLatestMemoryUsage() == null) {
- return 0;
- }
- if (o1.getReferenceMemoryUsage() == null || o2.getReferenceMemoryUsage() == null) {
- return o1.getReferenceMemoryUsage() == null ? -1 : 1;
- }
- if (o1.getLatestMemoryUsage() == null || o2.getLatestMemoryUsage() == null) {
- return o1.getLatestMemoryUsage() == null ? -1 : 1;
- }
- long o1MinDiff =
- getMinDifferenceByKey(o1.getReferenceUsedMemoryByKey(), o1.getLatestUsedMemoryByKey());
- long o2MinDiff =
- getMinDifferenceByKey(o2.getReferenceUsedMemoryByKey(), o2.getLatestUsedMemoryByKey());
- return Long.compare(o1MinDiff, o2MinDiff);
- }
-
- private long getMinDifferenceByKey(
- Map referenceMemoryUsage, Map latestMemoryUsage) {
- Set keys = new HashSet<>();
- keys.addAll(latestMemoryUsage.keySet());
- keys.addAll(referenceMemoryUsage.keySet());
- return keys.stream()
- .filter(key -> latestMemoryUsage.containsKey(key) && referenceMemoryUsage.containsKey(key))
- .filter(key -> latestMemoryUsage.get(key) - referenceMemoryUsage.get(key) != 0)
- .mapToLong(key -> latestMemoryUsage.get(key) - referenceMemoryUsage.get(key))
- .max()
- .orElse(Long.MAX_VALUE);
- }
-}
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java
deleted file mode 100644
index af05b3f15b..0000000000
--- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java
+++ /dev/null
@@ -1,58 +0,0 @@
-package org.mobilitydata.gtfsvalidator.outputcomparator.io;
-
-import java.util.Comparator;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
-
-/**
- * Comparator to compare two {@link DatasetMemoryUsage} objects based on the difference between the
- * used memory of the two objects. The difference is calculated by comparing the used memory of the
- * two objects for each key present in both objects. If a key is present in one object but not in
- * the other, the key it is ignored. This comparator is used to sort DatasetMemoryUsage by the
- * minimum difference between the used memory of the two. This means the order is by the dataset
- * validation that increased the memory.
- */
-public class UsedMemoryIncreasedComparator implements Comparator {
-
- @Override
- public int compare(DatasetMemoryUsage o1, DatasetMemoryUsage o2) {
- if (o1 == o2) {
- return 0;
- }
- if (o1 == null || o2 == null) {
- return o1 == null ? -1 : 1;
- }
- if (o1.getReferenceMemoryUsage() == null
- && o1.getLatestMemoryUsage() == null
- && o2.getReferenceMemoryUsage() == null
- && o2.getLatestMemoryUsage() == null) {
- return 0;
- }
- if (o1.getReferenceMemoryUsage() == null || o2.getReferenceMemoryUsage() == null) {
- return o1.getReferenceMemoryUsage() == null ? -1 : 1;
- }
- if (o1.getLatestMemoryUsage() == null || o2.getLatestMemoryUsage() == null) {
- return o1.getLatestMemoryUsage() == null ? -1 : 1;
- }
- long o1MaxDiff =
- getMaxDifferenceByKey(o1.getReferenceUsedMemoryByKey(), o1.getLatestUsedMemoryByKey());
- long o2MaxDiff =
- getMaxDifferenceByKey(o2.getReferenceUsedMemoryByKey(), o2.getLatestUsedMemoryByKey());
- // Reversing the comparison as we need the major memory usage first in a sorted list
- return Long.compare(o2MaxDiff, o1MaxDiff);
- }
-
- private long getMaxDifferenceByKey(
- Map referenceMemoryUsage, Map latestMemoryUsage) {
- Set keys = new HashSet<>();
- keys.addAll(latestMemoryUsage.keySet());
- keys.addAll(referenceMemoryUsage.keySet());
- return keys.stream()
- .filter(key -> latestMemoryUsage.containsKey(key) && referenceMemoryUsage.containsKey(key))
- .filter(key -> latestMemoryUsage.get(key) - referenceMemoryUsage.get(key) != 0)
- .mapToLong(key -> latestMemoryUsage.get(key) - referenceMemoryUsage.get(key))
- .max()
- .orElse(Long.MIN_VALUE);
- }
-}
diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/MemoryUsageUsedMemoryComparatorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/MemoryUsageUsedMemoryComparatorTest.java
deleted file mode 100644
index b09e55387d..0000000000
--- a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/MemoryUsageUsedMemoryComparatorTest.java
+++ /dev/null
@@ -1,60 +0,0 @@
-package org.mobilitydata.gtfsvalidator.outputcomparator.io;
-
-import static org.junit.Assert.assertEquals;
-
-import java.util.*;
-import org.junit.Before;
-import org.junit.Test;
-import org.mobilitydata.gtfsvalidator.performance.MemoryUsage;
-
-public class MemoryUsageUsedMemoryComparatorTest {
-
- private UsedMemoryIncreasedComparator comparator;
-
- @Before
- public void setUp() {
- comparator = new UsedMemoryIncreasedComparator();
- }
-
- @Test
- public void testCompare_equalMemoryUsage() {
- List referenceMemoryUsage = getMemoryUsage(100L);
- List latestMemoryUsage = getMemoryUsage(100L);
- DatasetMemoryUsage o1 =
- new DatasetMemoryUsage("dataset1", referenceMemoryUsage, latestMemoryUsage);
- DatasetMemoryUsage o2 =
- new DatasetMemoryUsage("dataset1", referenceMemoryUsage, latestMemoryUsage);
- assertEquals(0, comparator.compare(o1, o2));
- }
-
- @Test
- public void testCompare_firstHasMoreMemoryDifference() {
- List referenceMemoryUsage = getMemoryUsage(100L);
- List latestMemoryUsage = getMemoryUsage(50L);
- DatasetMemoryUsage o1 =
- new DatasetMemoryUsage("dataset1", referenceMemoryUsage, latestMemoryUsage);
- DatasetMemoryUsage o2 =
- new DatasetMemoryUsage("dataset1", referenceMemoryUsage, getMemoryUsage(100L));
- assertEquals(-1, comparator.compare(o1, o2));
- }
-
- @Test
- public void testCompare_firstHasLessMemoryDifference() {
- List referenceMemoryUsage = getMemoryUsage(100L);
- List latestMemoryUsage = getMemoryUsage(50L);
- DatasetMemoryUsage o1 =
- new DatasetMemoryUsage("dataset1", referenceMemoryUsage, latestMemoryUsage);
- DatasetMemoryUsage o2 =
- new DatasetMemoryUsage("dataset1", referenceMemoryUsage, getMemoryUsage(10L));
- assertEquals(1, comparator.compare(o1, o2));
- }
-
- private static List getMemoryUsage(long freeMemory) {
- MemoryUsage[] referenceMemoryUsage =
- new MemoryUsage[] {
- new MemoryUsage("key1", 100L, freeMemory, 100L, 100L),
- new MemoryUsage("key2", 100L, freeMemory, 100L, 100L),
- };
- return Arrays.asList(referenceMemoryUsage);
- }
-}
From f0bf1eb1c85404d81480ad5133bec4e64aca3bd2 Mon Sep 17 00:00:00 2001
From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com>
Date: Fri, 11 Oct 2024 11:50:54 -0400
Subject: [PATCH 30/30] delete unused code
---
.../io/ValidationPerformanceCollector.java | 63 -------------------
1 file changed, 63 deletions(-)
diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
index 656a6b017a..f4712651a6 100644
--- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
+++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java
@@ -1,7 +1,6 @@
package org.mobilitydata.gtfsvalidator.outputcomparator.io;
import java.util.*;
-import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.mobilitydata.gtfsvalidator.model.ValidationReport;
@@ -10,7 +9,6 @@
public class ValidationPerformanceCollector {
- public static final int MEMORY_USAGE_COMPARE_MAX = 10;
public static final String MEMORY_PIVOT_KEY =
"org.mobilitydata.gtfsvalidator.table.GtfsFeedLoader.loadAndValidate";
private final Map referenceTimes;
@@ -266,67 +264,6 @@ private void generatePerformanceMetricsLog(
}
}
- private void addMemoryUsageReport(
- List memoryUsages,
- String order,
- StringBuilder b,
- boolean includeDifference) {
- b.append(String.format("List of %s datasets(%s).
", MEMORY_USAGE_COMPARE_MAX, order))
- .append("\n\n")
- .append(
- "| Dataset ID | Snapshot Key(Used Memory) | Reference | Latest |");
- if (includeDifference) {
- b.append(" Difference |");
- }
- b.append("\n");
- b.append(
- "|-----------------------------|-------------------|----------------|----------------|");
- if (includeDifference) {
- b.append("----------------|");
- }
- b.append("\n");
- memoryUsages.stream()
- .forEachOrdered(
- datasetMemoryUsage -> {
- generateMemoryLogByKey(datasetMemoryUsage, b, includeDifference);
- });
- }
-
- private static void generateMemoryLogByKey(
- DatasetMemoryUsage datasetMemoryUsage, StringBuilder b, boolean includeDifference) {
- AtomicBoolean isFirst = new AtomicBoolean(true);
- Set keys = new HashSet<>();
- keys.addAll(datasetMemoryUsage.getReferenceUsedMemoryByKey().keySet());
- keys.addAll(datasetMemoryUsage.getLatestUsedMemoryByKey().keySet());
- keys.stream()
- .forEach(
- key -> {
- var reference = datasetMemoryUsage.getReferenceUsedMemoryByKey().get(key);
- var latest = datasetMemoryUsage.getLatestUsedMemoryByKey().get(key);
- if (isFirst.get()) {
- b.append(String.format("| %s | | | |", datasetMemoryUsage.getDatasetId()));
- if (includeDifference) {
- b.append(" |");
- }
- b.append("\n");
- isFirst.set(false);
- }
- String usedMemoryDiff = getMemoryDiff(reference, latest);
- b.append(
- String.format(
- "| | %s | %s | %s |",
- key,
- reference != null
- ? MemoryUsage.convertToHumanReadableMemory(reference)
- : "N/A",
- latest != null ? MemoryUsage.convertToHumanReadableMemory(latest) : "N/A"));
- if (includeDifference) {
- b.append(String.format(" %s |", usedMemoryDiff));
- }
- b.append("\n");
- });
- }
-
public void compareValidationReports(
String sourceId, ValidationReport referenceReport, ValidationReport latestReport) {
if (referenceReport.getValidationTimeSeconds() != null) {