From f16f52e0d781f8c5fa99ca2f2e45c9fcd057de63 Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Mon, 30 Sep 2024 16:33:06 -0400 Subject: [PATCH 01/30] add memory usage records to the JSON report --- core/build.gradle | 3 + .../performance/MemoryMonitor.java | 12 ++ .../performance/MemoryMonitorAspect.java | 33 ++++++ .../performance/MemoryUsage.java | 69 +++++++++++ .../performance/MemoryUsageRegister.java | 56 +++++++++ .../gtfsvalidator/table/GtfsFeedLoader.java | 109 ++++++++++++------ .../report/JsonReportSummary.java | 3 + .../report/model/FeedMetadata.java | 4 + .../runner/ValidationRunner.java | 12 ++ 9 files changed, 267 insertions(+), 34 deletions(-) create mode 100644 core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryMonitor.java create mode 100644 core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryMonitorAspect.java create mode 100644 core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsage.java create mode 100644 core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsageRegister.java diff --git a/core/build.gradle b/core/build.gradle index cccd38adc9..1e02f6c615 100644 --- a/core/build.gradle +++ b/core/build.gradle @@ -18,6 +18,7 @@ plugins { id 'java' id 'maven-publish' id 'signing' + id 'io.freefair.aspectj.post-compile-weaving' version '6.4.1' // Add AspectJ plugin } //publishing { @@ -42,6 +43,8 @@ dependencies { implementation 'com.googlecode.libphonenumber:libphonenumber:8.12.13' implementation 'com.google.flogger:flogger:0.6' implementation 'io.github.classgraph:classgraph:4.8.146' + implementation 'org.aspectj:aspectjrt:1.9.22.1' + implementation 'org.aspectj:aspectjweaver:1.9.22.1' testImplementation 'com.google.flogger:flogger-system-backend:0.6' testImplementation group: 'junit', name: 'junit', version: '4.13' testImplementation "com.google.truth:truth:1.0.1" diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryMonitor.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryMonitor.java new file mode 100644 index 0000000000..179ce3cd78 --- /dev/null +++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryMonitor.java @@ -0,0 +1,12 @@ +package org.mobilitydata.gtfsvalidator.performance; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +@Target(ElementType.METHOD) +@Retention(RetentionPolicy.RUNTIME) +public @interface MemoryMonitor { + String key() default ""; +} diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryMonitorAspect.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryMonitorAspect.java new file mode 100644 index 0000000000..f6baf95945 --- /dev/null +++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryMonitorAspect.java @@ -0,0 +1,33 @@ +package org.mobilitydata.gtfsvalidator.performance; + +import org.apache.commons.lang3.StringUtils; +import org.aspectj.lang.ProceedingJoinPoint; +import org.aspectj.lang.annotation.Around; +import org.aspectj.lang.annotation.Aspect; +import org.aspectj.lang.reflect.MethodSignature; + +@Aspect +public class MemoryMonitorAspect { + + // @Around("@annotation(MemoryMonitor)") + @Around("execution(@org.mobilitydata.gtfsvalidator.performance.MemoryMonitor * *(..))") + public Object monitorMemoryUsage(ProceedingJoinPoint joinPoint) throws Throwable { + String key = extractKey(joinPoint); + MemoryUsage before = MemoryUsageRegister.getInstance().getMemoryUsageSnapshot(key, null); + try { + Object result = joinPoint.proceed(); + return result; + } finally { + MemoryUsage after = MemoryUsageRegister.getInstance().getMemoryUsageSnapshot(key, before); + MemoryUsageRegister.getInstance().registerMemoryUsage(after); + } + } + + private String extractKey(ProceedingJoinPoint joinPoint) { + var method = ((MethodSignature) joinPoint.getSignature()).getMethod(); + var memoryMonitor = method.getAnnotation(MemoryMonitor.class); + return memoryMonitor != null && StringUtils.isNotBlank(memoryMonitor.key()) + ? memoryMonitor.key() + : method.getDeclaringClass().getCanonicalName() + "." + method.getName(); + } +} diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsage.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsage.java new file mode 100644 index 0000000000..fb5982fb49 --- /dev/null +++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsage.java @@ -0,0 +1,69 @@ +package org.mobilitydata.gtfsvalidator.performance; + +import com.google.auto.value.AutoValue; +import java.text.DecimalFormat; +import javax.annotation.Nullable; +import org.apache.commons.lang3.StringUtils; + +@AutoValue +public abstract class MemoryUsage { + private static final DecimalFormat TWO_DECIMAL_FORMAT = new DecimalFormat("0.00"); + + public static MemoryUsage create( + String key, long totalMemory, long freeMemory, long maxMemory, Long memoryDiff) { + return new AutoValue_MemoryUsage(key, totalMemory, freeMemory, maxMemory, memoryDiff); + } + + public static String convertToHumanReadableMemory(Long size) { + if (size == null) { + return "N/A"; + } + if (size <= 0) { + return "0"; + } + if (size < 1024) { + return size + " bytes"; + } + if (size < 1048576) { + return TWO_DECIMAL_FORMAT.format(size / 1024.0) + " KiB"; + } + if (size < 1073741824) { + return TWO_DECIMAL_FORMAT.format(size / 1048576.0) + " MiB"; + } + if (size < 1099511627776L) { + return TWO_DECIMAL_FORMAT.format(size / 1073741824.0) + " GiB"; + } + return TWO_DECIMAL_FORMAT.format(size / 1099511627776L) + " TiB"; + } + + public abstract String key(); + + public abstract long totalMemory(); + + public abstract long freeMemory(); + + public abstract long maxMemory(); + + @Nullable + public abstract Long diffMemory(); + + public long usedMemory() { + return totalMemory() - freeMemory(); + } + + public String humanReadablePrint() { + StringBuffer result = new StringBuffer(); + result.append("Memory usage registered"); + if (StringUtils.isNotBlank(key())) { + result.append(" for key: ").append(key()); + } else { + result.append(":"); + } + result.append(" Max: ").append(convertToHumanReadableMemory(maxMemory())); + result.append(" Total: ").append(convertToHumanReadableMemory(totalMemory())); + result.append(" Free: ").append(convertToHumanReadableMemory(freeMemory())); + result.append(" Used: ").append(convertToHumanReadableMemory(usedMemory())); + result.append(" Diff: ").append(convertToHumanReadableMemory(diffMemory())); + return result.toString(); + } +} diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsageRegister.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsageRegister.java new file mode 100644 index 0000000000..38c20b8baf --- /dev/null +++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsageRegister.java @@ -0,0 +1,56 @@ +package org.mobilitydata.gtfsvalidator.performance; + +import com.google.common.flogger.FluentLogger; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +public class MemoryUsageRegister { + private static final FluentLogger logger = FluentLogger.forEnclosingClass(); + + private static MemoryUsageRegister instance = new MemoryUsageRegister(); + private final Runtime runtime; + private List registry = new ArrayList<>(); + + private MemoryUsageRegister() { + runtime = Runtime.getRuntime(); + } + + public static MemoryUsageRegister getInstance() { + return instance; + } + + public List getRegistry() { + return Collections.unmodifiableList(registry); + } + + public MemoryUsage getMemoryUsageSnapshot(String key, MemoryUsage previous) { + Long memoryDiff = null; + if (previous != null) { + memoryDiff = runtime.freeMemory() - previous.freeMemory(); + } + return MemoryUsage.create( + key, runtime.totalMemory(), runtime.freeMemory(), runtime.maxMemory(), memoryDiff); + } + + public MemoryUsage registerMemoryUsage(String key) { + MemoryUsage memoryUsage = getMemoryUsageSnapshot(key, null); + registerMemoryUsage(memoryUsage); + return memoryUsage; + } + + public MemoryUsage registerMemoryUsage(String key, MemoryUsage previous) { + MemoryUsage memoryUsage = getMemoryUsageSnapshot(key, previous); + registerMemoryUsage(memoryUsage); + return memoryUsage; + } + + public void registerMemoryUsage(MemoryUsage memoryUsage) { + registry.add(memoryUsage); + logger.atInfo().log(memoryUsage.humanReadablePrint()); + } + + public void clearRegistry() { + registry.clear(); + } +} diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsFeedLoader.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsFeedLoader.java index 1edb051184..4dffea4ba0 100644 --- a/core/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsFeedLoader.java +++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsFeedLoader.java @@ -35,6 +35,8 @@ import org.mobilitydata.gtfsvalidator.notice.RuntimeExceptionInLoaderError; import org.mobilitydata.gtfsvalidator.notice.ThreadExecutionError; import org.mobilitydata.gtfsvalidator.notice.UnknownFileNotice; +import org.mobilitydata.gtfsvalidator.performance.MemoryMonitor; +import org.mobilitydata.gtfsvalidator.performance.MemoryUsageRegister; import org.mobilitydata.gtfsvalidator.validator.FileValidator; import org.mobilitydata.gtfsvalidator.validator.ValidatorProvider; import org.mobilitydata.gtfsvalidator.validator.ValidatorUtil; @@ -90,6 +92,7 @@ public List> getMultiFileValidatorsWithParsingErr } @SuppressWarnings("unchecked") + @MemoryMonitor() public GtfsFeedContainer loadAndValidate( GtfsInput gtfsInput, ValidatorProvider validatorProvider, NoticeContainer noticeContainer) throws InterruptedException { @@ -137,47 +140,85 @@ public GtfsFeedContainer loadAndValidate( AnyTableLoader.loadMissingFile(tableDescriptor, validatorProvider, noticeContainer)); } try { - for (Future futureContainer : exec.invokeAll(loaderCallables)) { - try { - TableAndNoticeContainers containers = futureContainer.get(); - tableContainers.add(containers.tableContainer); - noticeContainer.addAll(containers.noticeContainer); - } catch (ExecutionException e) { - // All runtime exceptions should be caught above. - // ExecutionException is not expected to happen. - addThreadExecutionError(e, noticeContainer); - } - } + var beforeLoading = + MemoryUsageRegister.getInstance() + .getMemoryUsageSnapshot("GtfsFeedLoader.loadTables", null); + loadTables(noticeContainer, exec, loaderCallables, tableContainers); + MemoryUsageRegister.getInstance() + .registerMemoryUsage("GtfsFeedLoader.loadTables", beforeLoading); + GtfsFeedContainer feed = new GtfsFeedContainer(tableContainers); - List> validatorCallables = new ArrayList<>(); - // Validators with parser-error dependencies will not be returned here, but instead added to - // the skippedValidators list. - for (FileValidator validator : - validatorProvider.createMultiFileValidators( - feed, multiFileValidatorsWithParsingErrors::add)) { - validatorCallables.add( - () -> { - NoticeContainer validatorNotices = new NoticeContainer(); - ValidatorUtil.safeValidate( - validator::validate, validator.getClass(), validatorNotices); - return validatorNotices; - }); - } - for (Future futureContainer : exec.invokeAll(validatorCallables)) { - try { - noticeContainer.addAll(futureContainer.get()); - } catch (ExecutionException e) { - // All runtime exceptions should be caught above. - // ExecutionException is not expected to happen. - addThreadExecutionError(e, noticeContainer); - } - } + var beforeMultiFileValidators = + MemoryUsageRegister.getInstance() + .getMemoryUsageSnapshot("GtfsFeedLoader.executeMultiFileValidators", null); + executeMultiFileValidators(validatorProvider, noticeContainer, feed, exec); + MemoryUsageRegister.getInstance() + .registerMemoryUsage( + "GtfsFeedLoader.executeMultiFileValidators", beforeMultiFileValidators); + return feed; } finally { exec.shutdown(); } } + private static void loadTables( + NoticeContainer noticeContainer, + ExecutorService exec, + List> loaderCallables, + ArrayList> tableContainers) + throws InterruptedException { + for (Future futureContainer : exec.invokeAll(loaderCallables)) { + try { + TableAndNoticeContainers containers = futureContainer.get(); + tableContainers.add(containers.tableContainer); + noticeContainer.addAll(containers.noticeContainer); + } catch (ExecutionException e) { + // All runtime exceptions should be caught above. + // ExecutionException is not expected to happen. + addThreadExecutionError(e, noticeContainer); + } + } + } + + private void executeMultiFileValidators( + ValidatorProvider validatorProvider, + NoticeContainer noticeContainer, + GtfsFeedContainer feed, + ExecutorService exec) + throws InterruptedException { + List> validatorCallables = new ArrayList<>(); + // Validators with parser-error dependencies will not be returned here, but instead added to + // the skippedValidators list. + for (FileValidator validator : + validatorProvider.createMultiFileValidators( + feed, multiFileValidatorsWithParsingErrors::add)) { + validatorCallables.add( + () -> { + NoticeContainer validatorNotices = new NoticeContainer(); + ValidatorUtil.safeValidate(validator::validate, validator.getClass(), validatorNotices); + return validatorNotices; + }); + } + collectMultiFileValidationNotices(noticeContainer, exec, validatorCallables); + } + + private static void collectMultiFileValidationNotices( + NoticeContainer noticeContainer, + ExecutorService exec, + List> validatorCallables) + throws InterruptedException { + for (Future futureContainer : exec.invokeAll(validatorCallables)) { + try { + noticeContainer.addAll(futureContainer.get()); + } catch (ExecutionException e) { + // All runtime exceptions should be caught above. + // ExecutionException is not expected to happen. + addThreadExecutionError(e, noticeContainer); + } + } + } + /** Adds a ThreadExecutionError to the notice container. */ private static void addThreadExecutionError( ExecutionException e, NoticeContainer noticeContainer) { diff --git a/main/src/main/java/org/mobilitydata/gtfsvalidator/report/JsonReportSummary.java b/main/src/main/java/org/mobilitydata/gtfsvalidator/report/JsonReportSummary.java index b5ba4ed1a9..4e79ff3faa 100644 --- a/main/src/main/java/org/mobilitydata/gtfsvalidator/report/JsonReportSummary.java +++ b/main/src/main/java/org/mobilitydata/gtfsvalidator/report/JsonReportSummary.java @@ -6,6 +6,7 @@ import java.util.Map; import java.util.Set; import java.util.stream.Collectors; +import org.mobilitydata.gtfsvalidator.performance.MemoryUsage; import org.mobilitydata.gtfsvalidator.report.model.AgencyMetadata; import org.mobilitydata.gtfsvalidator.report.model.FeedMetadata; import org.mobilitydata.gtfsvalidator.runner.ValidationRunnerConfig; @@ -34,6 +35,7 @@ public class JsonReportSummary { private List agencies; private Set files; private Double validationTimeSeconds; + public List memoryUsageRecords; @SerializedName("counts") private JsonReportCounts jsonReportCounts; @@ -67,6 +69,7 @@ public JsonReportSummary( if (feedMetadata.feedInfo != null) { this.feedInfo = new JsonReportFeedInfo(feedMetadata.feedInfo); this.validationTimeSeconds = feedMetadata.validationTimeSeconds; + this.memoryUsageRecords = feedMetadata.memoryUsageRecords; } else { logger.atSevere().log( "No feed info for feed " diff --git a/main/src/main/java/org/mobilitydata/gtfsvalidator/report/model/FeedMetadata.java b/main/src/main/java/org/mobilitydata/gtfsvalidator/report/model/FeedMetadata.java index b2a4813eee..9292d644c4 100644 --- a/main/src/main/java/org/mobilitydata/gtfsvalidator/report/model/FeedMetadata.java +++ b/main/src/main/java/org/mobilitydata/gtfsvalidator/report/model/FeedMetadata.java @@ -8,6 +8,8 @@ import java.time.format.DateTimeFormatter; import java.util.*; import java.util.function.Function; +import org.mobilitydata.gtfsvalidator.performance.MemoryUsage; +import org.mobilitydata.gtfsvalidator.performance.MemoryUsageRegister; import org.mobilitydata.gtfsvalidator.table.*; import org.mobilitydata.gtfsvalidator.util.CalendarUtil; import org.mobilitydata.gtfsvalidator.util.ServicePeriod; @@ -54,6 +56,7 @@ public class FeedMetadata { public double validationTimeSeconds; + public List memoryUsageRecords; // List of features that only require checking the presence of one record in the file. private final List> FILE_BASED_FEATURES = List.of( @@ -110,6 +113,7 @@ public static FeedMetadata from(GtfsFeedContainer feedContainer, ImmutableSet Date: Mon, 30 Sep 2024 17:04:51 -0400 Subject: [PATCH 02/30] downgrade aspectj dependecies to be compatible with jdk 11 --- core/build.gradle | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/build.gradle b/core/build.gradle index 1e02f6c615..019cba6bad 100644 --- a/core/build.gradle +++ b/core/build.gradle @@ -18,7 +18,7 @@ plugins { id 'java' id 'maven-publish' id 'signing' - id 'io.freefair.aspectj.post-compile-weaving' version '6.4.1' // Add AspectJ plugin + id 'io.freefair.aspectj.post-compile-weaving' version '6.4.1' } //publishing { @@ -43,8 +43,8 @@ dependencies { implementation 'com.googlecode.libphonenumber:libphonenumber:8.12.13' implementation 'com.google.flogger:flogger:0.6' implementation 'io.github.classgraph:classgraph:4.8.146' - implementation 'org.aspectj:aspectjrt:1.9.22.1' - implementation 'org.aspectj:aspectjweaver:1.9.22.1' + implementation 'org.aspectj:aspectjrt:1.9.20' + implementation 'org.aspectj:aspectjweaver:1.9.20' testImplementation 'com.google.flogger:flogger-system-backend:0.6' testImplementation group: 'junit', name: 'junit', version: '4.13' testImplementation "com.google.truth:truth:1.0.1" From 5f6c71b28f2314e53d3b91fdbf5bdf18ed80f1d1 Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Tue, 1 Oct 2024 16:57:12 -0400 Subject: [PATCH 03/30] add memory usage to validator comparator --- .../io/ValidationReportDeserializer.java | 19 ++- .../gtfsvalidator/model/ValidationReport.java | 14 +- .../io/BoundedPriorityQueue.java | 56 +++++++ .../io/DatasetMemoryUsage.java | 47 ++++++ .../io/MemoryUsageUsedMemoryComparator.java | 30 ++++ .../io/ValidationPerformanceCollector.java | 139 ++++++++++++++++++ .../ValidationPerformanceCollectorTest.java | 65 +++++++- 7 files changed, 356 insertions(+), 14 deletions(-) create mode 100644 output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/BoundedPriorityQueue.java create mode 100644 output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java create mode 100644 output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/MemoryUsageUsedMemoryComparator.java diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/io/ValidationReportDeserializer.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/io/ValidationReportDeserializer.java index 3197be8f81..d088be3285 100644 --- a/core/src/main/java/org/mobilitydata/gtfsvalidator/io/ValidationReportDeserializer.java +++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/io/ValidationReportDeserializer.java @@ -23,16 +23,13 @@ import com.google.gson.JsonElement; import com.google.gson.JsonObject; import java.lang.reflect.Type; -import java.util.Collection; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; import org.mobilitydata.gtfsvalidator.model.NoticeReport; import org.mobilitydata.gtfsvalidator.model.ValidationReport; import org.mobilitydata.gtfsvalidator.notice.Notice; import org.mobilitydata.gtfsvalidator.notice.NoticeContainer; import org.mobilitydata.gtfsvalidator.notice.ResolvedNotice; +import org.mobilitydata.gtfsvalidator.performance.MemoryUsage; /** * Used to (de)serialize a JSON validation report. This represents a validation report as a list of @@ -44,6 +41,7 @@ public class ValidationReportDeserializer implements JsonDeserializer memoryUsageRecords = null; if (rootObject.has(SUMMARY_MEMBER_NAME)) { JsonObject summaryObject = rootObject.getAsJsonObject(SUMMARY_MEMBER_NAME); if (summaryObject.has(VALIDATION_TIME_MEMBER_NAME)) { validationTimeSeconds = summaryObject.get(VALIDATION_TIME_MEMBER_NAME).getAsDouble(); } + if (summaryObject.has(MEMORY_USAGE_RECORDS_MEMBER_NAME)) { + JsonArray memoryUsageArray = summaryObject.getAsJsonArray(MEMORY_USAGE_RECORDS_MEMBER_NAME); + memoryUsageRecords = new ArrayList<>(); + for (JsonElement element : memoryUsageArray) { + MemoryUsage memoryUsage = Notice.GSON.fromJson(element, MemoryUsage.class); + memoryUsageRecords.add(memoryUsage); + } + } } JsonArray noticesArray = rootObject.getAsJsonArray(NOTICES_MEMBER_NAME); for (JsonElement childObject : noticesArray) { notices.add(Notice.GSON.fromJson(childObject, NoticeReport.class)); } - return new ValidationReport(notices, validationTimeSeconds); + return new ValidationReport(notices, validationTimeSeconds, memoryUsageRecords); } public static JsonObject serialize( diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/model/ValidationReport.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/model/ValidationReport.java index 5ea4c76a35..c1a3da670e 100644 --- a/core/src/main/java/org/mobilitydata/gtfsvalidator/model/ValidationReport.java +++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/model/ValidationReport.java @@ -23,8 +23,10 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.Collections; +import java.util.List; import java.util.Set; import org.mobilitydata.gtfsvalidator.io.ValidationReportDeserializer; +import org.mobilitydata.gtfsvalidator.performance.MemoryUsage; /** * Used to (de)serialize a {@code NoticeContainer}. This represents a validation report as a list of @@ -42,6 +44,7 @@ public class ValidationReport { .create(); private final Set notices; private final Double validationTimeSeconds; + private List memoryUsageRecords; /** * Public constructor needed for deserialization by {@code ValidationReportDeserializer}. Only @@ -50,7 +53,7 @@ public class ValidationReport { * @param noticeReports set of {@code NoticeReport}s */ public ValidationReport(Set noticeReports) { - this(noticeReports, null); + this(noticeReports, null, null); } /** @@ -60,9 +63,13 @@ public ValidationReport(Set noticeReports) { * @param noticeReports set of {@code NoticeReport}s * @param validationTimeSeconds the time taken to validate the GTFS dataset */ - public ValidationReport(Set noticeReports, Double validationTimeSeconds) { + public ValidationReport( + Set noticeReports, + Double validationTimeSeconds, + List memoryUsageRecords) { this.notices = Collections.unmodifiableSet(noticeReports); this.validationTimeSeconds = validationTimeSeconds; + this.memoryUsageRecords = memoryUsageRecords; } /** @@ -86,6 +93,9 @@ public Double getValidationTimeSeconds() { return validationTimeSeconds; } + public List getMemoryUsageRecords() { + return memoryUsageRecords; + } /** * Determines if two validation reports are equal regardless of the order of the fields in the set * of {@code NoticeReport}. diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/BoundedPriorityQueue.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/BoundedPriorityQueue.java new file mode 100644 index 0000000000..8e49308ffd --- /dev/null +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/BoundedPriorityQueue.java @@ -0,0 +1,56 @@ +package org.mobilitydata.gtfsvalidator.outputcomparator.io; + +import java.util.Comparator; +import java.util.PriorityQueue; + +/** + * A bounded priority queue that keeps the N smallest elements. If the queue is full and a new + * element is offered, the largest element is removed. The smallest element is computed using a + * comparator or its natural order. + * + * @param + */ +public class BoundedPriorityQueue extends PriorityQueue { + private final int maxCapacity; + + public BoundedPriorityQueue(int maxCapacity) { + super(); + if (maxCapacity <= 0) { + throw new IllegalArgumentException("Max capacity must be greater than zero"); + } + this.maxCapacity = maxCapacity; + } + + public BoundedPriorityQueue(int maxCapacity, int initialCapacity, Comparator comparator) { + super(initialCapacity, comparator); + if (maxCapacity <= 0) { + throw new IllegalArgumentException("Max capacity must be greater than zero"); + } + this.maxCapacity = maxCapacity; + } + + @Override + public boolean offer(E e) { + if (size() >= maxCapacity) { + E head = peek(); + if (head != null && compare(e, head) > 0) { + poll(); + } else { + return false; + } + } + return super.offer(e); + } + + @SuppressWarnings("unchecked") + private int compare(E a, E b) { + if (comparator() != null) { + return comparator().compare(a, b); + } + return ((Comparable) a).compareTo(b); + } + + public int getMaxCapacity() { + return maxCapacity; + } +} diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java new file mode 100644 index 0000000000..489606ac05 --- /dev/null +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java @@ -0,0 +1,47 @@ +package org.mobilitydata.gtfsvalidator.outputcomparator.io; + +import org.mobilitydata.gtfsvalidator.performance.MemoryUsage; + +public class DatasetMemoryUsage { + + private String datasetId; + private String key; + private MemoryUsage referenceMemoryUsage; + private MemoryUsage latestMemoryUsage; + + public DatasetMemoryUsage( + String datasetId, MemoryUsage referenceMemoryUsage, MemoryUsage latestMemoryUsage) { + this.datasetId = datasetId; + this.key = referenceMemoryUsage != null ? referenceMemoryUsage.key() : latestMemoryUsage.key(); + this.referenceMemoryUsage = referenceMemoryUsage; + this.latestMemoryUsage = latestMemoryUsage; + } + + public String getDatasetId() { + return datasetId; + } + + public void setDatasetId(String datasetId) { + this.datasetId = datasetId; + } + + public MemoryUsage getReferenceMemoryUsage() { + return referenceMemoryUsage; + } + + public void setReferenceMemoryUsage(MemoryUsage referenceMemoryUsage) { + this.referenceMemoryUsage = referenceMemoryUsage; + } + + public MemoryUsage getLatestMemoryUsage() { + return latestMemoryUsage; + } + + public void setLatestMemoryUsage(MemoryUsage latestMemoryUsage) { + this.latestMemoryUsage = latestMemoryUsage; + } + + public String getKey() { + return key; + } +} diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/MemoryUsageUsedMemoryComparator.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/MemoryUsageUsedMemoryComparator.java new file mode 100644 index 0000000000..1145d46b31 --- /dev/null +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/MemoryUsageUsedMemoryComparator.java @@ -0,0 +1,30 @@ +package org.mobilitydata.gtfsvalidator.outputcomparator.io; + +import java.util.Comparator; + +/** A comparator for MemoryUsage objects that compares them based on the used memory. */ +public class MemoryUsageUsedMemoryComparator implements Comparator { + + @Override + public int compare(DatasetMemoryUsage o1, DatasetMemoryUsage o2) { + if (o1 == o2) { + return 0; + } + if (o1 == null || o2 == null) { + return o1 == null ? -1 : 1; + } + if (o1.getReferenceMemoryUsage() == null && o2.getLatestMemoryUsage() == null) { + return 0; + } + if (o1.getReferenceMemoryUsage() == null || o2.getLatestMemoryUsage() == null) { + return o1.getReferenceMemoryUsage() == null ? -1 : 1; + } + if (o1.getReferenceMemoryUsage().usedMemory() < o2.getLatestMemoryUsage().usedMemory()) { + return -1; + } + if (o1.getReferenceMemoryUsage().usedMemory() > o2.getLatestMemoryUsage().usedMemory()) { + return 1; + } + return 0; + } +} diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java index eadd1861ee..c6746f3c4d 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java @@ -1,17 +1,26 @@ package org.mobilitydata.gtfsvalidator.outputcomparator.io; import java.util.*; +import java.util.stream.Collectors; import org.mobilitydata.gtfsvalidator.model.ValidationReport; import org.mobilitydata.gtfsvalidator.outputcomparator.model.report.ValidationPerformance; +import org.mobilitydata.gtfsvalidator.performance.MemoryUsage; public class ValidationPerformanceCollector { + private static final int MEMORY_USAGE_COMPARE_MAX = 20; private final Map referenceTimes; private final Map latestTimes; + private final Map> + largestFirstMemoryUsageBySourceId; + private final Map> + smallestFirstMemoryMapBySourceId; public ValidationPerformanceCollector() { this.referenceTimes = new HashMap<>(); this.latestTimes = new HashMap<>(); + this.largestFirstMemoryUsageBySourceId = new HashMap<>(); + this.smallestFirstMemoryMapBySourceId = new HashMap<>(); } public void addReferenceTime(String sourceId, Double time) { @@ -69,6 +78,21 @@ private String formatMetrics(String metric, String datasetId, Double reference, "| %s | %s | %.2f | %.2f | %s |\n", metric, datasetId, reference, latest, diff); } + private static String getMemoryDiff(Long reference, Long latest) { + String diff; + if (reference == null || latest == null) { + diff = "N/A"; + } else { + long difference = latest - reference; + if (difference == 0) { + return "-"; + } + String arrow = difference > 0 ? "⬆️+" : "⬇️"; + diff = String.format("%s%s", arrow, MemoryUsage.convertToHumanReadableMemory(difference)); + } + return diff; + } + public String generateLogString() { StringBuilder b = new StringBuilder(); b.append("### ⏱️ Performance Assessment\n") @@ -176,11 +200,72 @@ public String generateLogString() { .append(String.join(", ", warnings)) .append("\n\n"); } + + if (smallestFirstMemoryMapBySourceId.size() > 0 + || largestFirstMemoryUsageBySourceId.size() > 0) { + b.append("📜 Memory Consumption\n"); + addMemoryUsageReport(smallestFirstMemoryMapBySourceId, "decreased", b); + addMemoryUsageReport(largestFirstMemoryUsageBySourceId, "increased", b); + } + b.append("\n\n"); return b.toString(); } + private void addMemoryUsageReport( + Map> queueMap, + String order, + StringBuilder b) { + b.append( + String.format( + "

List of %s datasets where memory has %s .

\n", + MEMORY_USAGE_COMPARE_MAX, order)) + .append("\n") + .append( + "| Key(Used Memory) | Dataset ID | Reference (s) | Latest (s) | Difference (s) |\n") + .append( + "|-----------------------------|-------------------|----------------|----------------|----------------|\n"); + + queueMap.keySet().stream() + .forEachOrdered( + sourceId -> { + var pq = queueMap.get(sourceId); + List datasetMemoryUsages = + Arrays.asList(pq.toArray(new DatasetMemoryUsage[pq.size()])); + Collections.sort(datasetMemoryUsages, pq.comparator()); + generateMemoryLogByKey(datasetMemoryUsages, b); + }); + } + + private static void generateMemoryLogByKey( + List memoryIncreases, StringBuilder b) { + memoryIncreases.stream() + .forEachOrdered( + item -> { + String usedMemoryDiff = + getMemoryDiff( + item.getReferenceMemoryUsage() != null + ? item.getReferenceMemoryUsage().usedMemory() + : null, + item.getLatestMemoryUsage() != null + ? item.getLatestMemoryUsage().usedMemory() + : null); + b.append( + String.format( + "| %s | %s | %s | %s | %s |\n", + item.getKey(), + item.getDatasetId(), + item.getReferenceMemoryUsage() != null + ? item.getReferenceMemoryUsage().usedMemory() + : "-", + item.getLatestMemoryUsage() != null + ? item.getLatestMemoryUsage().usedMemory() + : "-", + usedMemoryDiff)); + }); + } + public void compareValidationReports( String sourceId, ValidationReport referenceReport, ValidationReport latestReport) { if (referenceReport.getValidationTimeSeconds() != null) { @@ -189,6 +274,60 @@ public void compareValidationReports( if (latestReport.getValidationTimeSeconds() != null) { addLatestTime(sourceId, latestReport.getValidationTimeSeconds()); } + + compareValidationReportMemoryUsage(sourceId, referenceReport, latestReport); + } + + private void compareValidationReportMemoryUsage( + String sourceId, ValidationReport referenceReport, ValidationReport latestReport) { + Set keys = + referenceReport.getMemoryUsageRecords() != null + ? referenceReport.getMemoryUsageRecords().stream() + .map(MemoryUsage::key) + .collect(Collectors.toSet()) + : Collections.EMPTY_SET; + if (latestReport.getMemoryUsageRecords() != null) { + keys.addAll( + latestReport.getMemoryUsageRecords().stream() + .map(MemoryUsage::key) + .collect(Collectors.toSet())); + } + Map referenceMap = + referenceReport.getMemoryUsageRecords() != null + ? referenceReport.getMemoryUsageRecords().stream() + .collect(Collectors.toMap(MemoryUsage::key, memoryUsage -> memoryUsage)) + : new HashMap<>(); + Map latestMap = + referenceReport.getMemoryUsageRecords() != null + ? latestReport.getMemoryUsageRecords().stream() + .collect(Collectors.toMap(MemoryUsage::key, memoryUsage -> memoryUsage)) + : new HashMap<>(); + keys.stream() + .forEachOrdered( + key -> { + var datasetMemoryUsage = + new DatasetMemoryUsage(sourceId, referenceMap.get(key), latestMap.get(key)); + BoundedPriorityQueue decreasingQueue = + largestFirstMemoryUsageBySourceId.get(sourceId); + BoundedPriorityQueue increasingQueue = + smallestFirstMemoryMapBySourceId.get(sourceId); + if (decreasingQueue == null) { + decreasingQueue = + new BoundedPriorityQueue<>( + MEMORY_USAGE_COMPARE_MAX, + 2, + (new MemoryUsageUsedMemoryComparator()).reversed()); + largestFirstMemoryUsageBySourceId.put(sourceId, decreasingQueue); + increasingQueue = + new BoundedPriorityQueue<>( + MEMORY_USAGE_COMPARE_MAX, 2, new MemoryUsageUsedMemoryComparator()); + smallestFirstMemoryMapBySourceId.put(sourceId, increasingQueue); + } + if (referenceMap.containsKey(key) || latestMap.containsKey(key)) { + increasingQueue.offer(datasetMemoryUsage); + decreasingQueue.offer(datasetMemoryUsage); + } + }); } public List toReport() { diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java index 962331fc32..f5c4971056 100644 --- a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java +++ b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java @@ -2,7 +2,11 @@ import static com.google.common.truth.Truth.assertThat; +import java.util.Arrays; +import java.util.Collections; import org.junit.Test; +import org.mobilitydata.gtfsvalidator.model.ValidationReport; +import org.mobilitydata.gtfsvalidator.performance.MemoryUsage; public class ValidationPerformanceCollectorTest { @@ -11,13 +15,43 @@ public void generateLogString_test() { ValidationPerformanceCollector collector = new ValidationPerformanceCollector(); // Adding some sample data - collector.addReferenceTime("feed-id-a", 12.0); - collector.addReferenceTime("feed-id-a", 14.0); - collector.addLatestTime("feed-id-a", 16.0); - collector.addLatestTime("feed-id-a", 18.0); + long baseMemory = 1000000; + // Memory usage latest null + collector.compareValidationReports( + "feed-id-a", + new ValidationReport( + Collections.EMPTY_SET, + 12.0, + Arrays.asList( + MemoryUsage.create("key1", baseMemory, baseMemory, 200, 50L), + MemoryUsage.create("key2", baseMemory, baseMemory, 200, 50L))), + new ValidationReport(Collections.EMPTY_SET, 16.0, Collections.EMPTY_LIST)); + // Memory usage decreased + collector.compareValidationReports( + "feed-id-a", + new ValidationReport( + Collections.EMPTY_SET, + 14.0, + Arrays.asList( + MemoryUsage.create("key3", baseMemory, baseMemory - 1000, 200, 50L), + MemoryUsage.create("key4", baseMemory, baseMemory - 1000, 200, 50L))), + new ValidationReport( + Collections.EMPTY_SET, + 18.0, + Arrays.asList( + MemoryUsage.create("key3", baseMemory, baseMemory - baseMemory / 2, 200, null), + MemoryUsage.create("key4", baseMemory, baseMemory - baseMemory / 2, 200, null)))); - collector.addReferenceTime("feed-id-b", 20.0); - collector.addLatestTime("feed-id-b", 22.0); + // Memory usage decreased + collector.compareValidationReports( + "feed-id-b", + new ValidationReport( + Collections.EMPTY_SET, + 20.0, + Arrays.asList( + MemoryUsage.create("key3", baseMemory, baseMemory * 2, 200, null), + MemoryUsage.create("key4", baseMemory, baseMemory * 2, 200, null))), + new ValidationReport(Collections.EMPTY_SET, 22.0, Collections.EMPTY_LIST)); // Generating the log string String logString = collector.generateLogString(); @@ -37,6 +71,25 @@ public void generateLogString_test() { + "| Maximum in Reference Reports | feed-id-b | 20.00 | 22.00 | ⬆️+2.00 |\n" + "| Minimum in Latest Reports | feed-id-a | 14.00 | 18.00 | ⬆\uFE0F+4.00 |\n" + "| Maximum in Latest Reports | feed-id-b | 20.00 | 22.00 | ⬆️+2.00 |\n" + + "📜 Memory Consumption\n" + + "

List of 20 datasets where memory has decreased .

\n\n" + + "| Key(Used Memory) | Dataset ID | Reference (s) | Latest (s) | Difference (s) |\n" + + "|-----------------------------|-------------------|----------------|----------------|----------------|\n" + + "| key1 | feed-id-a | 0 | - | N/A |\n" + + "| key2 | feed-id-a | 0 | - | N/A |\n" + + "| key4 | feed-id-a | 1000 | 500000 | ⬆️+487.30 KiB |\n" + + "| key3 | feed-id-a | 1000 | 500000 | ⬆️+487.30 KiB |\n" + + "| key3 | feed-id-b | -1000000 | - | N/A |\n" + + "| key4 | feed-id-b | -1000000 | - | N/A |\n" + + "

List of 20 datasets where memory has increased .

\n\n" + + "| Key(Used Memory) | Dataset ID | Reference (s) | Latest (s) | Difference (s) |\n" + + "|-----------------------------|-------------------|----------------|----------------|----------------|\n" + + "| key3 | feed-id-a | 1000 | 500000 | ⬆️+487.30 KiB |\n" + + "| key4 | feed-id-a | 1000 | 500000 | ⬆️+487.30 KiB |\n" + + "| key1 | feed-id-a | 0 | - | N/A |\n" + + "| key2 | feed-id-a | 0 | - | N/A |\n" + + "| key3 | feed-id-b | -1000000 | - | N/A |\n" + + "| key4 | feed-id-b | -1000000 | - | N/A |\n" + "\n\n"; // Assert that the generated log string matches the expected log string assertThat(logString).isEqualTo(expectedLogString); From 848d798d956c9c464d3264df28e2f96138875900 Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Tue, 1 Oct 2024 16:59:13 -0400 Subject: [PATCH 04/30] run acceptance tests with sample data --- .github/workflows/acceptance_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/acceptance_test.yml b/.github/workflows/acceptance_test.yml index 4759f4172e..ce9c650fbc 100644 --- a/.github/workflows/acceptance_test.yml +++ b/.github/workflows/acceptance_test.yml @@ -127,7 +127,7 @@ jobs: - name: Set URL matrix id: set-matrix run: | - DATASETS=$(python3 scripts/mobility-database-harvester/harvest_latest_versions.py -d scripts/mobility-database-harvester/datasets_metadata -l gtfs_latest_versions.json) + DATASETS=$(python3 scripts/mobility-database-harvester/harvest_latest_versions.py -d scripts/mobility-database-harvester/datasets_metadata -l gtfs_latest_versions.json -s) echo $DATASETS echo "matrix=$DATASETS" >> $GITHUB_OUTPUT - name: Persist metadata From e89816ced5c46f7294481997f2d4f5661b1e4a64 Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Tue, 1 Oct 2024 22:01:09 -0400 Subject: [PATCH 05/30] fix memory usage serialization --- .../performance/MemoryUsage.java | 142 +++++++++++++++--- .../performance/MemoryUsageRegister.java | 4 +- .../io/DatasetMemoryUsage.java | 3 +- .../io/ValidationPerformanceCollector.java | 8 +- .../ValidationPerformanceCollectorTest.java | 16 +- 5 files changed, 133 insertions(+), 40 deletions(-) diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsage.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsage.java index fb5982fb49..4e3b01b9b1 100644 --- a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsage.java +++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsage.java @@ -1,17 +1,26 @@ package org.mobilitydata.gtfsvalidator.performance; -import com.google.auto.value.AutoValue; import java.text.DecimalFormat; -import javax.annotation.Nullable; import org.apache.commons.lang3.StringUtils; -@AutoValue -public abstract class MemoryUsage { +public class MemoryUsage { private static final DecimalFormat TWO_DECIMAL_FORMAT = new DecimalFormat("0.00"); - public static MemoryUsage create( - String key, long totalMemory, long freeMemory, long maxMemory, Long memoryDiff) { - return new AutoValue_MemoryUsage(key, totalMemory, freeMemory, maxMemory, memoryDiff); + private String key; + private long totalMemory; + private long freeMemory; + private long maxMemory; + private Long diffMemory; + + public MemoryUsage() {} + + public MemoryUsage( + String key, long totalMemory, long freeMemory, long maxMemory, Long diffMemory) { + this.key = key; + this.totalMemory = totalMemory; + this.freeMemory = freeMemory; + this.maxMemory = maxMemory; + this.diffMemory = diffMemory; } public static String convertToHumanReadableMemory(Long size) { @@ -36,34 +45,117 @@ public static String convertToHumanReadableMemory(Long size) { return TWO_DECIMAL_FORMAT.format(size / 1099511627776L) + " TiB"; } - public abstract String key(); - - public abstract long totalMemory(); - - public abstract long freeMemory(); - - public abstract long maxMemory(); - - @Nullable - public abstract Long diffMemory(); - public long usedMemory() { - return totalMemory() - freeMemory(); + return totalMemory - freeMemory; } public String humanReadablePrint() { StringBuffer result = new StringBuffer(); result.append("Memory usage registered"); - if (StringUtils.isNotBlank(key())) { - result.append(" for key: ").append(key()); + if (StringUtils.isNotBlank(key)) { + result.append(" for key: ").append(key); } else { result.append(":"); } - result.append(" Max: ").append(convertToHumanReadableMemory(maxMemory())); - result.append(" Total: ").append(convertToHumanReadableMemory(totalMemory())); - result.append(" Free: ").append(convertToHumanReadableMemory(freeMemory())); + result.append(" Max: ").append(convertToHumanReadableMemory(maxMemory)); + result.append(" Total: ").append(convertToHumanReadableMemory(totalMemory)); + result.append(" Free: ").append(convertToHumanReadableMemory(freeMemory)); result.append(" Used: ").append(convertToHumanReadableMemory(usedMemory())); - result.append(" Diff: ").append(convertToHumanReadableMemory(diffMemory())); + result.append(" Diff: ").append(convertToHumanReadableMemory(diffMemory)); return result.toString(); } + + public String getKey() { + return key; + } + + public void setKey(String key) { + this.key = key; + } + + public long getTotalMemory() { + return totalMemory; + } + + public void setTotalMemory(long totalMemory) { + this.totalMemory = totalMemory; + } + + public long getFreeMemory() { + return freeMemory; + } + + public void setFreeMemory(long freeMemory) { + this.freeMemory = freeMemory; + } + + public long getMaxMemory() { + return maxMemory; + } + + public void setMaxMemory(long maxMemory) { + this.maxMemory = maxMemory; + } + + public Long getDiffMemory() { + return diffMemory; + } + + public void setDiffMemory(Long diffMemory) { + this.diffMemory = diffMemory; + } + + @Override + public String toString() { + return "MemoryUsage{" + + "key=" + + key + + ", " + + "totalMemory=" + + totalMemory + + ", " + + "freeMemory=" + + freeMemory + + ", " + + "maxMemory=" + + maxMemory + + ", " + + "diffMemory=" + + diffMemory + + "}"; + } + + @Override + public boolean equals(Object o) { + if (o == this) { + return true; + } + if (o instanceof MemoryUsage) { + MemoryUsage that = (MemoryUsage) o; + return this.key.equals(that.getKey()) + && this.totalMemory == that.getTotalMemory() + && this.freeMemory == that.getFreeMemory() + && this.maxMemory == that.getMaxMemory() + && (this.diffMemory == null + ? that.getDiffMemory() == null + : this.getDiffMemory().equals(that.getDiffMemory())); + } + return false; + } + + @Override + public int hashCode() { + int h$ = 1; + h$ *= 1000003; + h$ ^= key.hashCode(); + h$ *= 1000003; + h$ ^= (int) ((totalMemory >>> 32) ^ totalMemory); + h$ *= 1000003; + h$ ^= (int) ((freeMemory >>> 32) ^ freeMemory); + h$ *= 1000003; + h$ ^= (int) ((maxMemory >>> 32) ^ maxMemory); + h$ *= 1000003; + h$ ^= (diffMemory == null) ? 0 : diffMemory.hashCode(); + return h$; + } } diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsageRegister.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsageRegister.java index 38c20b8baf..e01a71948c 100644 --- a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsageRegister.java +++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsageRegister.java @@ -27,9 +27,9 @@ public List getRegistry() { public MemoryUsage getMemoryUsageSnapshot(String key, MemoryUsage previous) { Long memoryDiff = null; if (previous != null) { - memoryDiff = runtime.freeMemory() - previous.freeMemory(); + memoryDiff = runtime.freeMemory() - previous.getFreeMemory(); } - return MemoryUsage.create( + return new MemoryUsage( key, runtime.totalMemory(), runtime.freeMemory(), runtime.maxMemory(), memoryDiff); } diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java index 489606ac05..ca99d20684 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java @@ -12,7 +12,8 @@ public class DatasetMemoryUsage { public DatasetMemoryUsage( String datasetId, MemoryUsage referenceMemoryUsage, MemoryUsage latestMemoryUsage) { this.datasetId = datasetId; - this.key = referenceMemoryUsage != null ? referenceMemoryUsage.key() : latestMemoryUsage.key(); + this.key = + referenceMemoryUsage != null ? referenceMemoryUsage.getKey() : latestMemoryUsage.getKey(); this.referenceMemoryUsage = referenceMemoryUsage; this.latestMemoryUsage = latestMemoryUsage; } diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java index c6746f3c4d..45f57ec6c2 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java @@ -283,24 +283,24 @@ private void compareValidationReportMemoryUsage( Set keys = referenceReport.getMemoryUsageRecords() != null ? referenceReport.getMemoryUsageRecords().stream() - .map(MemoryUsage::key) + .map(MemoryUsage::getKey) .collect(Collectors.toSet()) : Collections.EMPTY_SET; if (latestReport.getMemoryUsageRecords() != null) { keys.addAll( latestReport.getMemoryUsageRecords().stream() - .map(MemoryUsage::key) + .map(MemoryUsage::getKey) .collect(Collectors.toSet())); } Map referenceMap = referenceReport.getMemoryUsageRecords() != null ? referenceReport.getMemoryUsageRecords().stream() - .collect(Collectors.toMap(MemoryUsage::key, memoryUsage -> memoryUsage)) + .collect(Collectors.toMap(MemoryUsage::getKey, memoryUsage -> memoryUsage)) : new HashMap<>(); Map latestMap = referenceReport.getMemoryUsageRecords() != null ? latestReport.getMemoryUsageRecords().stream() - .collect(Collectors.toMap(MemoryUsage::key, memoryUsage -> memoryUsage)) + .collect(Collectors.toMap(MemoryUsage::getKey, memoryUsage -> memoryUsage)) : new HashMap<>(); keys.stream() .forEachOrdered( diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java index f5c4971056..da33a6ccc5 100644 --- a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java +++ b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java @@ -23,8 +23,8 @@ public void generateLogString_test() { Collections.EMPTY_SET, 12.0, Arrays.asList( - MemoryUsage.create("key1", baseMemory, baseMemory, 200, 50L), - MemoryUsage.create("key2", baseMemory, baseMemory, 200, 50L))), + new MemoryUsage("key1", baseMemory, baseMemory, 200, 50L), + new MemoryUsage("key2", baseMemory, baseMemory, 200, 50L))), new ValidationReport(Collections.EMPTY_SET, 16.0, Collections.EMPTY_LIST)); // Memory usage decreased collector.compareValidationReports( @@ -33,14 +33,14 @@ public void generateLogString_test() { Collections.EMPTY_SET, 14.0, Arrays.asList( - MemoryUsage.create("key3", baseMemory, baseMemory - 1000, 200, 50L), - MemoryUsage.create("key4", baseMemory, baseMemory - 1000, 200, 50L))), + new MemoryUsage("key3", baseMemory, baseMemory - 1000, 200, 50L), + new MemoryUsage("key4", baseMemory, baseMemory - 1000, 200, 50L))), new ValidationReport( Collections.EMPTY_SET, 18.0, Arrays.asList( - MemoryUsage.create("key3", baseMemory, baseMemory - baseMemory / 2, 200, null), - MemoryUsage.create("key4", baseMemory, baseMemory - baseMemory / 2, 200, null)))); + new MemoryUsage("key3", baseMemory, baseMemory - baseMemory / 2, 200, null), + new MemoryUsage("key4", baseMemory, baseMemory - baseMemory / 2, 200, null)))); // Memory usage decreased collector.compareValidationReports( @@ -49,8 +49,8 @@ public void generateLogString_test() { Collections.EMPTY_SET, 20.0, Arrays.asList( - MemoryUsage.create("key3", baseMemory, baseMemory * 2, 200, null), - MemoryUsage.create("key4", baseMemory, baseMemory * 2, 200, null))), + new MemoryUsage("key3", baseMemory, baseMemory * 2, 200, null), + new MemoryUsage("key4", baseMemory, baseMemory * 2, 200, null))), new ValidationReport(Collections.EMPTY_SET, 22.0, Collections.EMPTY_LIST)); // Generating the log string From b66828fa41069a327b36f900e83dd088bb8c7fbf Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Wed, 2 Oct 2024 09:23:06 -0400 Subject: [PATCH 06/30] fix performance collector --- .../outputcomparator/io/ValidationPerformanceCollector.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java index 45f57ec6c2..eb4d4e4153 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java @@ -285,7 +285,7 @@ private void compareValidationReportMemoryUsage( ? referenceReport.getMemoryUsageRecords().stream() .map(MemoryUsage::getKey) .collect(Collectors.toSet()) - : Collections.EMPTY_SET; + : new HashSet<>(); if (latestReport.getMemoryUsageRecords() != null) { keys.addAll( latestReport.getMemoryUsageRecords().stream() From 0416d1cba56324dcd9fe73f1d91d0e491c9ff493 Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Wed, 2 Oct 2024 10:08:59 -0400 Subject: [PATCH 07/30] fix npe --- .../outputcomparator/io/DatasetMemoryUsage.java | 6 ++++-- .../outputcomparator/io/ValidationPerformanceCollector.java | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java index ca99d20684..c837d509f9 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java @@ -12,8 +12,10 @@ public class DatasetMemoryUsage { public DatasetMemoryUsage( String datasetId, MemoryUsage referenceMemoryUsage, MemoryUsage latestMemoryUsage) { this.datasetId = datasetId; - this.key = - referenceMemoryUsage != null ? referenceMemoryUsage.getKey() : latestMemoryUsage.getKey(); + this.key = referenceMemoryUsage != null ? referenceMemoryUsage.getKey() : null; + if (key == null) { + this.key = latestMemoryUsage.getKey() != null ? latestMemoryUsage.getKey() : null; + } this.referenceMemoryUsage = referenceMemoryUsage; this.latestMemoryUsage = latestMemoryUsage; } diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java index eb4d4e4153..74319c2cd0 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java @@ -298,7 +298,7 @@ private void compareValidationReportMemoryUsage( .collect(Collectors.toMap(MemoryUsage::getKey, memoryUsage -> memoryUsage)) : new HashMap<>(); Map latestMap = - referenceReport.getMemoryUsageRecords() != null + latestReport.getMemoryUsageRecords() != null ? latestReport.getMemoryUsageRecords().stream() .collect(Collectors.toMap(MemoryUsage::getKey, memoryUsage -> memoryUsage)) : new HashMap<>(); From 0cc18335bcc200fc06cc2c844eb8057f77a6e6c1 Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Wed, 2 Oct 2024 11:22:28 -0400 Subject: [PATCH 08/30] support negative memory usage for logging --- .../gtfsvalidator/performance/MemoryUsage.java | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsage.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsage.java index 4e3b01b9b1..f0126c23e9 100644 --- a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsage.java +++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsage.java @@ -23,26 +23,24 @@ public MemoryUsage( this.diffMemory = diffMemory; } - public static String convertToHumanReadableMemory(Long size) { - if (size == null) { + public static String convertToHumanReadableMemory(Long bytes) { + if (bytes == null) { return "N/A"; } - if (size <= 0) { - return "0"; - } + long size = Math.abs(bytes); if (size < 1024) { - return size + " bytes"; + return bytes + " bytes"; } if (size < 1048576) { - return TWO_DECIMAL_FORMAT.format(size / 1024.0) + " KiB"; + return TWO_DECIMAL_FORMAT.format(Math.copySign(size / 1024.0, bytes)) + " KiB"; } if (size < 1073741824) { - return TWO_DECIMAL_FORMAT.format(size / 1048576.0) + " MiB"; + return TWO_DECIMAL_FORMAT.format(Math.copySign(size / 1048576.0, bytes)) + " MiB"; } if (size < 1099511627776L) { - return TWO_DECIMAL_FORMAT.format(size / 1073741824.0) + " GiB"; + return TWO_DECIMAL_FORMAT.format(Math.copySign(size / 1073741824.0, bytes)) + " GiB"; } - return TWO_DECIMAL_FORMAT.format(size / 1099511627776L) + " TiB"; + return TWO_DECIMAL_FORMAT.format(Math.copySign(size / 1099511627776L, bytes)) + " TiB"; } public long usedMemory() { From f6789c86ac29931c223b8a1f4a09a86e3188c63a Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Wed, 2 Oct 2024 16:42:33 -0400 Subject: [PATCH 09/30] simplifly memory usage report --- .../io/DatasetMemoryUsage.java | 46 +++-- .../io/MemoryUsageUsedMemoryComparator.java | 30 ---- .../io/UsedMemoryIncreasedComparator.java | 57 ++++++ .../io/ValidationPerformanceCollector.java | 164 +++++++----------- .../MemoryUsageUsedMemoryComparatorTest.java | 60 +++++++ .../ValidationPerformanceCollectorTest.java | 88 ++++++---- 6 files changed, 268 insertions(+), 177 deletions(-) delete mode 100644 output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/MemoryUsageUsedMemoryComparator.java create mode 100644 output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java create mode 100644 output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/MemoryUsageUsedMemoryComparatorTest.java diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java index c837d509f9..452b7763ac 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java @@ -1,23 +1,37 @@ package org.mobilitydata.gtfsvalidator.outputcomparator.io; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; import org.mobilitydata.gtfsvalidator.performance.MemoryUsage; public class DatasetMemoryUsage { private String datasetId; - private String key; - private MemoryUsage referenceMemoryUsage; - private MemoryUsage latestMemoryUsage; + private List referenceMemoryUsage; + private List latestMemoryUsage; + private Map referenceUsedMemoryByKey = Collections.unmodifiableMap(new HashMap<>()); + private Map latestUsedMemoryByKey = Collections.unmodifiableMap(new HashMap<>()); public DatasetMemoryUsage( - String datasetId, MemoryUsage referenceMemoryUsage, MemoryUsage latestMemoryUsage) { + String datasetId, + List referenceMemoryUsage, + List latestMemoryUsage) { this.datasetId = datasetId; - this.key = referenceMemoryUsage != null ? referenceMemoryUsage.getKey() : null; - if (key == null) { - this.key = latestMemoryUsage.getKey() != null ? latestMemoryUsage.getKey() : null; - } this.referenceMemoryUsage = referenceMemoryUsage; this.latestMemoryUsage = latestMemoryUsage; + if (referenceMemoryUsage != null) { + this.referenceUsedMemoryByKey = + referenceMemoryUsage.stream() + .collect(Collectors.toUnmodifiableMap(MemoryUsage::getKey, MemoryUsage::usedMemory)); + } + if (latestMemoryUsage != null) { + this.latestUsedMemoryByKey = + latestMemoryUsage.stream() + .collect(Collectors.toUnmodifiableMap(MemoryUsage::getKey, MemoryUsage::usedMemory)); + } } public String getDatasetId() { @@ -28,23 +42,27 @@ public void setDatasetId(String datasetId) { this.datasetId = datasetId; } - public MemoryUsage getReferenceMemoryUsage() { + public List getReferenceMemoryUsage() { return referenceMemoryUsage; } - public void setReferenceMemoryUsage(MemoryUsage referenceMemoryUsage) { + public void setReferenceMemoryUsage(List referenceMemoryUsage) { this.referenceMemoryUsage = referenceMemoryUsage; } - public MemoryUsage getLatestMemoryUsage() { + public List getLatestMemoryUsage() { return latestMemoryUsage; } - public void setLatestMemoryUsage(MemoryUsage latestMemoryUsage) { + public void setLatestMemoryUsage(List latestMemoryUsage) { this.latestMemoryUsage = latestMemoryUsage; } - public String getKey() { - return key; + public Map getReferenceUsedMemoryByKey() { + return referenceUsedMemoryByKey; + } + + public Map getLatestUsedMemoryByKey() { + return latestUsedMemoryByKey; } } diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/MemoryUsageUsedMemoryComparator.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/MemoryUsageUsedMemoryComparator.java deleted file mode 100644 index 1145d46b31..0000000000 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/MemoryUsageUsedMemoryComparator.java +++ /dev/null @@ -1,30 +0,0 @@ -package org.mobilitydata.gtfsvalidator.outputcomparator.io; - -import java.util.Comparator; - -/** A comparator for MemoryUsage objects that compares them based on the used memory. */ -public class MemoryUsageUsedMemoryComparator implements Comparator { - - @Override - public int compare(DatasetMemoryUsage o1, DatasetMemoryUsage o2) { - if (o1 == o2) { - return 0; - } - if (o1 == null || o2 == null) { - return o1 == null ? -1 : 1; - } - if (o1.getReferenceMemoryUsage() == null && o2.getLatestMemoryUsage() == null) { - return 0; - } - if (o1.getReferenceMemoryUsage() == null || o2.getLatestMemoryUsage() == null) { - return o1.getReferenceMemoryUsage() == null ? -1 : 1; - } - if (o1.getReferenceMemoryUsage().usedMemory() < o2.getLatestMemoryUsage().usedMemory()) { - return -1; - } - if (o1.getReferenceMemoryUsage().usedMemory() > o2.getLatestMemoryUsage().usedMemory()) { - return 1; - } - return 0; - } -} diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java new file mode 100644 index 0000000000..eb3c319df6 --- /dev/null +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java @@ -0,0 +1,57 @@ +package org.mobilitydata.gtfsvalidator.outputcomparator.io; + +import java.util.Comparator; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +/** + * Comparator to compare two {@link DatasetMemoryUsage} objects based on the difference between the + * used memory of the two objects. The difference is calculated by comparing the used memory of the + * two objects for each key present in both objects. If a key is present in one object but not in + * the other, the key it is ignored. This comparator is used to sort DatasetMemoryUsage by the + * minimum difference between the used memory of the two. This means the order is by the dataset + * validation that increased the memory. + */ +public class UsedMemoryIncreasedComparator implements Comparator { + + @Override + public int compare(DatasetMemoryUsage o1, DatasetMemoryUsage o2) { + if (o1 == o2) { + return 0; + } + if (o1 == null || o2 == null) { + return o1 == null ? -1 : 1; + } + if (o1.getReferenceMemoryUsage() == null && o2.getLatestMemoryUsage() == null) { + return 0; + } + if (o1.getReferenceMemoryUsage() == null || o2.getLatestMemoryUsage() == null) { + return o1.getReferenceMemoryUsage() == null ? -1 : 1; + } + + long o1MinDiff = + getMinimumDifferenceByKey(o1.getReferenceUsedMemoryByKey(), o1.getLatestUsedMemoryByKey()); + long o2MinDiff = + getMinimumDifferenceByKey(o2.getReferenceUsedMemoryByKey(), o2.getLatestUsedMemoryByKey()); + return Long.compare(o1MinDiff, o2MinDiff); + } + + private long getMinimumDifferenceByKey( + Map referenceMemoryUsage, Map latestMemoryUsage) { + Set keys = new HashSet<>(); + keys.addAll(latestMemoryUsage.keySet()); + keys.addAll(referenceMemoryUsage.keySet()); + return keys.stream() + .filter(key -> latestMemoryUsage.containsKey(key) && referenceMemoryUsage.containsKey(key)) + .filter(key -> latestMemoryUsage.get(key) - referenceMemoryUsage.get(key) != 0) + .mapToLong(key -> referenceMemoryUsage.get(key) - latestMemoryUsage.get(key)) + .min() + .orElse(Long.MAX_VALUE); + } + + @Override + public Comparator reversed() { + return Comparator.super.reversed(); + } +} diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java index 74319c2cd0..2bf27795c4 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java @@ -1,26 +1,32 @@ package org.mobilitydata.gtfsvalidator.outputcomparator.io; import java.util.*; -import java.util.stream.Collectors; +import java.util.concurrent.atomic.AtomicBoolean; import org.mobilitydata.gtfsvalidator.model.ValidationReport; import org.mobilitydata.gtfsvalidator.outputcomparator.model.report.ValidationPerformance; import org.mobilitydata.gtfsvalidator.performance.MemoryUsage; public class ValidationPerformanceCollector { - private static final int MEMORY_USAGE_COMPARE_MAX = 20; + public static final int MEMORY_USAGE_COMPARE_MAX = 25; private final Map referenceTimes; private final Map latestTimes; - private final Map> - largestFirstMemoryUsageBySourceId; - private final Map> - smallestFirstMemoryMapBySourceId; + private final BoundedPriorityQueue datasetsDecreasedMemoryUsage; + private final BoundedPriorityQueue datasetsIncreasedMemoryUsage; public ValidationPerformanceCollector() { this.referenceTimes = new HashMap<>(); this.latestTimes = new HashMap<>(); - this.largestFirstMemoryUsageBySourceId = new HashMap<>(); - this.smallestFirstMemoryMapBySourceId = new HashMap<>(); + this.datasetsDecreasedMemoryUsage = + new BoundedPriorityQueue<>( + MEMORY_USAGE_COMPARE_MAX, + MEMORY_USAGE_COMPARE_MAX, + (new UsedMemoryIncreasedComparator().reversed())); + this.datasetsIncreasedMemoryUsage = + new BoundedPriorityQueue<>( + MEMORY_USAGE_COMPARE_MAX, + MEMORY_USAGE_COMPARE_MAX, + new UsedMemoryIncreasedComparator()); } public void addReferenceTime(String sourceId, Double time) { @@ -201,68 +207,60 @@ public String generateLogString() { .append("\n\n"); } - if (smallestFirstMemoryMapBySourceId.size() > 0 - || largestFirstMemoryUsageBySourceId.size() > 0) { - b.append("📜 Memory Consumption\n"); - addMemoryUsageReport(smallestFirstMemoryMapBySourceId, "decreased", b); - addMemoryUsageReport(largestFirstMemoryUsageBySourceId, "increased", b); - } - b.append("\n\n"); + if (datasetsIncreasedMemoryUsage.size() > 0 || datasetsDecreasedMemoryUsage.size() > 0) { + b.append("
\n"); + b.append("📜 Memory Consumption\n"); + addMemoryUsageReport(datasetsIncreasedMemoryUsage, "increased", b); + addMemoryUsageReport(datasetsDecreasedMemoryUsage, "decreased", b); + b.append("
\n"); + } return b.toString(); } private void addMemoryUsageReport( - Map> queueMap, - String order, - StringBuilder b) { + BoundedPriorityQueue queue, String order, StringBuilder b) { b.append( String.format( - "

List of %s datasets where memory has %s .

\n", - MEMORY_USAGE_COMPARE_MAX, order)) + "

List of %s datasets where memory has %s.

", MEMORY_USAGE_COMPARE_MAX, order)) .append("\n") .append( - "| Key(Used Memory) | Dataset ID | Reference (s) | Latest (s) | Difference (s) |\n") + "| Dataset ID | Snapshot Key(Used Memory) | Reference (s) | Latest (s) | Difference (s) |\n") .append( "|-----------------------------|-------------------|----------------|----------------|----------------|\n"); - - queueMap.keySet().stream() + List sortedList = new ArrayList<>(queue); + sortedList.sort(queue.comparator()); + sortedList.stream() .forEachOrdered( - sourceId -> { - var pq = queueMap.get(sourceId); - List datasetMemoryUsages = - Arrays.asList(pq.toArray(new DatasetMemoryUsage[pq.size()])); - Collections.sort(datasetMemoryUsages, pq.comparator()); - generateMemoryLogByKey(datasetMemoryUsages, b); + datasetMemoryUsage -> { + generateMemoryLogByKey(datasetMemoryUsage, b); }); } private static void generateMemoryLogByKey( - List memoryIncreases, StringBuilder b) { - memoryIncreases.stream() - .forEachOrdered( - item -> { - String usedMemoryDiff = - getMemoryDiff( - item.getReferenceMemoryUsage() != null - ? item.getReferenceMemoryUsage().usedMemory() - : null, - item.getLatestMemoryUsage() != null - ? item.getLatestMemoryUsage().usedMemory() - : null); - b.append( - String.format( - "| %s | %s | %s | %s | %s |\n", - item.getKey(), - item.getDatasetId(), - item.getReferenceMemoryUsage() != null - ? item.getReferenceMemoryUsage().usedMemory() - : "-", - item.getLatestMemoryUsage() != null - ? item.getLatestMemoryUsage().usedMemory() - : "-", - usedMemoryDiff)); + DatasetMemoryUsage datasetMemoryUsage, StringBuilder b) { + AtomicBoolean isFirst = new AtomicBoolean(true); + Set keys = new HashSet<>(); + keys.addAll(datasetMemoryUsage.getReferenceUsedMemoryByKey().keySet()); + keys.addAll(datasetMemoryUsage.getLatestUsedMemoryByKey().keySet()); + keys.stream() + .forEach( + key -> { + var reference = datasetMemoryUsage.getReferenceUsedMemoryByKey().get(key); + var latest = datasetMemoryUsage.getLatestUsedMemoryByKey().get(key); + if (reference != null && latest != null) { + String usedMemoryDiff = getMemoryDiff(reference, latest); + if (isFirst.get()) { + b.append( + String.format("| %s | | | | |\n", datasetMemoryUsage.getDatasetId())); + isFirst.set(false); + } + b.append( + String.format( + "| | %s | %s | %s | %s |\n", key, reference, latest, usedMemoryDiff)); + isFirst.set(false); + } }); } @@ -280,54 +278,20 @@ public void compareValidationReports( private void compareValidationReportMemoryUsage( String sourceId, ValidationReport referenceReport, ValidationReport latestReport) { - Set keys = - referenceReport.getMemoryUsageRecords() != null - ? referenceReport.getMemoryUsageRecords().stream() - .map(MemoryUsage::getKey) - .collect(Collectors.toSet()) - : new HashSet<>(); - if (latestReport.getMemoryUsageRecords() != null) { - keys.addAll( - latestReport.getMemoryUsageRecords().stream() - .map(MemoryUsage::getKey) - .collect(Collectors.toSet())); + DatasetMemoryUsage datasetMemoryUsage = + new DatasetMemoryUsage( + sourceId, + referenceReport.getMemoryUsageRecords(), + latestReport.getMemoryUsageRecords()); + if (referenceReport.getMemoryUsageRecords() != null + && referenceReport.getMemoryUsageRecords().size() > 0 + && latestReport.getMemoryUsageRecords() != null + && latestReport.getMemoryUsageRecords().size() > 0) { + datasetsIncreasedMemoryUsage.offer(datasetMemoryUsage); + datasetsDecreasedMemoryUsage.offer(datasetMemoryUsage); + } else { + // add to not found keys } - Map referenceMap = - referenceReport.getMemoryUsageRecords() != null - ? referenceReport.getMemoryUsageRecords().stream() - .collect(Collectors.toMap(MemoryUsage::getKey, memoryUsage -> memoryUsage)) - : new HashMap<>(); - Map latestMap = - latestReport.getMemoryUsageRecords() != null - ? latestReport.getMemoryUsageRecords().stream() - .collect(Collectors.toMap(MemoryUsage::getKey, memoryUsage -> memoryUsage)) - : new HashMap<>(); - keys.stream() - .forEachOrdered( - key -> { - var datasetMemoryUsage = - new DatasetMemoryUsage(sourceId, referenceMap.get(key), latestMap.get(key)); - BoundedPriorityQueue decreasingQueue = - largestFirstMemoryUsageBySourceId.get(sourceId); - BoundedPriorityQueue increasingQueue = - smallestFirstMemoryMapBySourceId.get(sourceId); - if (decreasingQueue == null) { - decreasingQueue = - new BoundedPriorityQueue<>( - MEMORY_USAGE_COMPARE_MAX, - 2, - (new MemoryUsageUsedMemoryComparator()).reversed()); - largestFirstMemoryUsageBySourceId.put(sourceId, decreasingQueue); - increasingQueue = - new BoundedPriorityQueue<>( - MEMORY_USAGE_COMPARE_MAX, 2, new MemoryUsageUsedMemoryComparator()); - smallestFirstMemoryMapBySourceId.put(sourceId, increasingQueue); - } - if (referenceMap.containsKey(key) || latestMap.containsKey(key)) { - increasingQueue.offer(datasetMemoryUsage); - decreasingQueue.offer(datasetMemoryUsage); - } - }); } public List toReport() { diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/MemoryUsageUsedMemoryComparatorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/MemoryUsageUsedMemoryComparatorTest.java new file mode 100644 index 0000000000..b09e55387d --- /dev/null +++ b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/MemoryUsageUsedMemoryComparatorTest.java @@ -0,0 +1,60 @@ +package org.mobilitydata.gtfsvalidator.outputcomparator.io; + +import static org.junit.Assert.assertEquals; + +import java.util.*; +import org.junit.Before; +import org.junit.Test; +import org.mobilitydata.gtfsvalidator.performance.MemoryUsage; + +public class MemoryUsageUsedMemoryComparatorTest { + + private UsedMemoryIncreasedComparator comparator; + + @Before + public void setUp() { + comparator = new UsedMemoryIncreasedComparator(); + } + + @Test + public void testCompare_equalMemoryUsage() { + List referenceMemoryUsage = getMemoryUsage(100L); + List latestMemoryUsage = getMemoryUsage(100L); + DatasetMemoryUsage o1 = + new DatasetMemoryUsage("dataset1", referenceMemoryUsage, latestMemoryUsage); + DatasetMemoryUsage o2 = + new DatasetMemoryUsage("dataset1", referenceMemoryUsage, latestMemoryUsage); + assertEquals(0, comparator.compare(o1, o2)); + } + + @Test + public void testCompare_firstHasMoreMemoryDifference() { + List referenceMemoryUsage = getMemoryUsage(100L); + List latestMemoryUsage = getMemoryUsage(50L); + DatasetMemoryUsage o1 = + new DatasetMemoryUsage("dataset1", referenceMemoryUsage, latestMemoryUsage); + DatasetMemoryUsage o2 = + new DatasetMemoryUsage("dataset1", referenceMemoryUsage, getMemoryUsage(100L)); + assertEquals(-1, comparator.compare(o1, o2)); + } + + @Test + public void testCompare_firstHasLessMemoryDifference() { + List referenceMemoryUsage = getMemoryUsage(100L); + List latestMemoryUsage = getMemoryUsage(50L); + DatasetMemoryUsage o1 = + new DatasetMemoryUsage("dataset1", referenceMemoryUsage, latestMemoryUsage); + DatasetMemoryUsage o2 = + new DatasetMemoryUsage("dataset1", referenceMemoryUsage, getMemoryUsage(10L)); + assertEquals(1, comparator.compare(o1, o2)); + } + + private static List getMemoryUsage(long freeMemory) { + MemoryUsage[] referenceMemoryUsage = + new MemoryUsage[] { + new MemoryUsage("key1", 100L, freeMemory, 100L, 100L), + new MemoryUsage("key2", 100L, freeMemory, 100L, 100L), + }; + return Arrays.asList(referenceMemoryUsage); + } +} diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java index da33a6ccc5..6fc23cb62c 100644 --- a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java +++ b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java @@ -14,45 +14,58 @@ public class ValidationPerformanceCollectorTest { public void generateLogString_test() { ValidationPerformanceCollector collector = new ValidationPerformanceCollector(); + // Adding some sample data + collector.addReferenceTime("feed-id-a", 12.0); + collector.addReferenceTime("feed-id-a", 14.0); + collector.addLatestTime("feed-id-a", 16.0); + collector.addLatestTime("feed-id-a", 18.0); + + collector.addReferenceTime("feed-id-b", 20.0); + collector.addLatestTime("feed-id-b", 22.0); + // Adding some sample data long baseMemory = 1000000; // Memory usage latest null collector.compareValidationReports( - "feed-id-a", + "feed-id-m1", new ValidationReport( Collections.EMPTY_SET, - 12.0, + null, Arrays.asList( new MemoryUsage("key1", baseMemory, baseMemory, 200, 50L), new MemoryUsage("key2", baseMemory, baseMemory, 200, 50L))), new ValidationReport(Collections.EMPTY_SET, 16.0, Collections.EMPTY_LIST)); - // Memory usage decreased + // Memory usage increased as there is less free memory collector.compareValidationReports( - "feed-id-a", + "feed-id-m2", new ValidationReport( Collections.EMPTY_SET, - 14.0, + null, Arrays.asList( - new MemoryUsage("key3", baseMemory, baseMemory - 1000, 200, 50L), - new MemoryUsage("key4", baseMemory, baseMemory - 1000, 200, 50L))), + new MemoryUsage("key1", baseMemory, baseMemory, 200, 50L), + new MemoryUsage("key2", baseMemory, baseMemory, 200, 50L))), new ValidationReport( Collections.EMPTY_SET, - 18.0, + null, Arrays.asList( - new MemoryUsage("key3", baseMemory, baseMemory - baseMemory / 2, 200, null), - new MemoryUsage("key4", baseMemory, baseMemory - baseMemory / 2, 200, null)))); + new MemoryUsage("key1", baseMemory, baseMemory - baseMemory / 2, 200, null), + new MemoryUsage("key2", baseMemory, baseMemory - baseMemory / 2, 200, null)))); - // Memory usage decreased + // // Memory usage decreased as there is more free memory collector.compareValidationReports( - "feed-id-b", + "feed-id-m3", new ValidationReport( Collections.EMPTY_SET, - 20.0, + null, + Arrays.asList( + new MemoryUsage("key3", baseMemory, baseMemory + 100, 200, null), + new MemoryUsage("key4", baseMemory, baseMemory + 100, 200, null))), + new ValidationReport( + Collections.EMPTY_SET, + null, Arrays.asList( new MemoryUsage("key3", baseMemory, baseMemory * 2, 200, null), - new MemoryUsage("key4", baseMemory, baseMemory * 2, 200, null))), - new ValidationReport(Collections.EMPTY_SET, 22.0, Collections.EMPTY_LIST)); - + new MemoryUsage("key4", baseMemory, baseMemory * 2, 200, null)))); // Generating the log string String logString = collector.generateLogString(); String expectedLogString = @@ -71,26 +84,35 @@ public void generateLogString_test() { + "| Maximum in Reference Reports | feed-id-b | 20.00 | 22.00 | ⬆️+2.00 |\n" + "| Minimum in Latest Reports | feed-id-a | 14.00 | 18.00 | ⬆\uFE0F+4.00 |\n" + "| Maximum in Latest Reports | feed-id-b | 20.00 | 22.00 | ⬆️+2.00 |\n" + + "#### ⚠️ Warnings\n\n" + + "The following dataset IDs are missing validation times either in reference or latest:\n" + + "feed-id-m1\n\n" + + "\n\n" + + "
\n" + "📜 Memory Consumption\n" - + "

List of 20 datasets where memory has decreased .

\n\n" - + "| Key(Used Memory) | Dataset ID | Reference (s) | Latest (s) | Difference (s) |\n" + + "

List of " + + ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX + + " datasets where memory has increased.

\n" + + "| Dataset ID | Snapshot Key(Used Memory) | Reference (s) | Latest (s) | Difference (s) |\n" + "|-----------------------------|-------------------|----------------|----------------|----------------|\n" - + "| key1 | feed-id-a | 0 | - | N/A |\n" - + "| key2 | feed-id-a | 0 | - | N/A |\n" - + "| key4 | feed-id-a | 1000 | 500000 | ⬆️+487.30 KiB |\n" - + "| key3 | feed-id-a | 1000 | 500000 | ⬆️+487.30 KiB |\n" - + "| key3 | feed-id-b | -1000000 | - | N/A |\n" - + "| key4 | feed-id-b | -1000000 | - | N/A |\n" - + "

List of 20 datasets where memory has increased .

\n\n" - + "| Key(Used Memory) | Dataset ID | Reference (s) | Latest (s) | Difference (s) |\n" + + "| feed-id-m2 | | | | |\n" + + "| | key1 | 0 | 500000 | ⬆\uFE0F+488.28 KiB |\n" + + "| | key2 | 0 | 500000 | ⬆\uFE0F+488.28 KiB |\n" + + "| feed-id-m3 | | | | |\n" + + "| | key3 | -100 | -1000000 | ⬇\uFE0F-976.46 KiB |\n" + + "| | key4 | -100 | -1000000 | ⬇\uFE0F-976.46 KiB |\n" + + "

List of " + + ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX + + " datasets where memory has decreased.

\n" + + "| Dataset ID | Snapshot Key(Used Memory) | Reference (s) | Latest (s) | Difference (s) |\n" + "|-----------------------------|-------------------|----------------|----------------|----------------|\n" - + "| key3 | feed-id-a | 1000 | 500000 | ⬆️+487.30 KiB |\n" - + "| key4 | feed-id-a | 1000 | 500000 | ⬆️+487.30 KiB |\n" - + "| key1 | feed-id-a | 0 | - | N/A |\n" - + "| key2 | feed-id-a | 0 | - | N/A |\n" - + "| key3 | feed-id-b | -1000000 | - | N/A |\n" - + "| key4 | feed-id-b | -1000000 | - | N/A |\n" - + "
\n\n"; + + "| feed-id-m3 | | | | |\n" + + "| | key3 | -100 | -1000000 | ⬇️-976.46 KiB |\n" + + "| | key4 | -100 | -1000000 | ⬇️-976.46 KiB |\n" + + "| feed-id-m2 | | | | |\n" + + "| | key1 | 0 | 500000 | ⬆️+488.28 KiB |\n" + + "| | key2 | 0 | 500000 | ⬆️+488.28 KiB |\n" + + "\n"; // Assert that the generated log string matches the expected log string assertThat(logString).isEqualTo(expectedLogString); } From 3efd975f174db0993bf0ec6c5b4bbf76c47487c2 Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Wed, 2 Oct 2024 16:57:23 -0400 Subject: [PATCH 10/30] fix compilation issue --- .../org/mobilitydata/gtfsvalidator/table/GtfsFeedLoader.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsFeedLoader.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsFeedLoader.java index af46959357..0fcaa738da 100644 --- a/core/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsFeedLoader.java +++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsFeedLoader.java @@ -175,7 +175,7 @@ private static void loadTables( NoticeContainer noticeContainer, ExecutorService exec, List> loaderCallables, - ArrayList> tableContainers) + ArrayList> tableContainers) throws InterruptedException { for (Future futureContainer : exec.invokeAll(loaderCallables)) { try { From 39182dd85f3c080d4d1fa0d46d80391d2a544dd2 Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Thu, 3 Oct 2024 10:16:00 -0400 Subject: [PATCH 11/30] add feeds with no reference --- .../io/UsedMemoryIncreasedComparator.java | 1 - .../io/ValidationPerformanceCollector.java | 62 +++++++++++++------ .../ValidationPerformanceCollectorTest.java | 30 +++++---- 3 files changed, 61 insertions(+), 32 deletions(-) diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java index eb3c319df6..9593d89b14 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java @@ -29,7 +29,6 @@ public int compare(DatasetMemoryUsage o1, DatasetMemoryUsage o2) { if (o1.getReferenceMemoryUsage() == null || o2.getLatestMemoryUsage() == null) { return o1.getReferenceMemoryUsage() == null ? -1 : 1; } - long o1MinDiff = getMinimumDifferenceByKey(o1.getReferenceUsedMemoryByKey(), o1.getLatestUsedMemoryByKey()); long o2MinDiff = diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java index 2bf27795c4..752cfde317 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java @@ -13,6 +13,7 @@ public class ValidationPerformanceCollector { private final Map latestTimes; private final BoundedPriorityQueue datasetsDecreasedMemoryUsage; private final BoundedPriorityQueue datasetsIncreasedMemoryUsage; + private final List datasetsMemoryUsageNoReference; public ValidationPerformanceCollector() { this.referenceTimes = new HashMap<>(); @@ -27,6 +28,7 @@ public ValidationPerformanceCollector() { MEMORY_USAGE_COMPARE_MAX, MEMORY_USAGE_COMPARE_MAX, new UsedMemoryIncreasedComparator()); + this.datasetsMemoryUsageNoReference = new ArrayList<>(); } public void addReferenceTime(String sourceId, Double time) { @@ -212,26 +214,42 @@ public String generateLogString() { if (datasetsIncreasedMemoryUsage.size() > 0 || datasetsDecreasedMemoryUsage.size() > 0) { b.append("
\n"); b.append("📜 Memory Consumption\n"); - addMemoryUsageReport(datasetsIncreasedMemoryUsage, "increased", b); - addMemoryUsageReport(datasetsDecreasedMemoryUsage, "decreased", b); + + List increasedMemoryUsages = + getDatasetMemoryUsages(datasetsIncreasedMemoryUsage); + addMemoryUsageReport(increasedMemoryUsages, "memory has increased", b); + List decreasedMemoryUsages = + getDatasetMemoryUsages(datasetsDecreasedMemoryUsage); + addMemoryUsageReport(decreasedMemoryUsages, "memory has decreased", b); + datasetsMemoryUsageNoReference.sort( + new Comparator() { + @Override + public int compare(DatasetMemoryUsage o1, DatasetMemoryUsage o2) { + return 0; + } + }); + addMemoryUsageReport(datasetsMemoryUsageNoReference, "no reference available", b); b.append("
\n"); } return b.toString(); } + private List getDatasetMemoryUsages( + BoundedPriorityQueue datasetsMemoryUsage) { + List increasedMemoryUsages = new ArrayList<>(datasetsMemoryUsage); + increasedMemoryUsages.sort(datasetsMemoryUsage.comparator()); + return increasedMemoryUsages; + } + private void addMemoryUsageReport( - BoundedPriorityQueue queue, String order, StringBuilder b) { - b.append( - String.format( - "

List of %s datasets where memory has %s.

", MEMORY_USAGE_COMPARE_MAX, order)) + List memoryUsages, String order, StringBuilder b) { + b.append(String.format("

List of %s datasets(%s).

", MEMORY_USAGE_COMPARE_MAX, order)) .append("\n") .append( "| Dataset ID | Snapshot Key(Used Memory) | Reference (s) | Latest (s) | Difference (s) |\n") .append( "|-----------------------------|-------------------|----------------|----------------|----------------|\n"); - List sortedList = new ArrayList<>(queue); - sortedList.sort(queue.comparator()); - sortedList.stream() + memoryUsages.stream() .forEachOrdered( datasetMemoryUsage -> { generateMemoryLogByKey(datasetMemoryUsage, b); @@ -249,18 +267,22 @@ private static void generateMemoryLogByKey( key -> { var reference = datasetMemoryUsage.getReferenceUsedMemoryByKey().get(key); var latest = datasetMemoryUsage.getLatestUsedMemoryByKey().get(key); - if (reference != null && latest != null) { - String usedMemoryDiff = getMemoryDiff(reference, latest); - if (isFirst.get()) { - b.append( - String.format("| %s | | | | |\n", datasetMemoryUsage.getDatasetId())); - isFirst.set(false); - } - b.append( - String.format( - "| | %s | %s | %s | %s |\n", key, reference, latest, usedMemoryDiff)); + if (isFirst.get()) { + b.append(String.format("| %s | | | | |\n", datasetMemoryUsage.getDatasetId())); isFirst.set(false); } + // if (reference != null && latest != null) { + String usedMemoryDiff = getMemoryDiff(reference, latest); + b.append( + String.format( + "| | %s | %s | %s | %s |\n", + key, + reference != null + ? MemoryUsage.convertToHumanReadableMemory(reference) + : "N/A", + latest != null ? MemoryUsage.convertToHumanReadableMemory(latest) : "N/A", + usedMemoryDiff)); + // } }); } @@ -290,7 +312,7 @@ private void compareValidationReportMemoryUsage( datasetsIncreasedMemoryUsage.offer(datasetMemoryUsage); datasetsDecreasedMemoryUsage.offer(datasetMemoryUsage); } else { - // add to not found keys + datasetsMemoryUsageNoReference.add(datasetMemoryUsage); } } diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java index 6fc23cb62c..88803994fd 100644 --- a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java +++ b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java @@ -32,7 +32,7 @@ public void generateLogString_test() { Collections.EMPTY_SET, null, Arrays.asList( - new MemoryUsage("key1", baseMemory, baseMemory, 200, 50L), + new MemoryUsage("key1", baseMemory, baseMemory + baseMemory * 10, 200, 50L), new MemoryUsage("key2", baseMemory, baseMemory, 200, 50L))), new ValidationReport(Collections.EMPTY_SET, 16.0, Collections.EMPTY_LIST)); // Memory usage increased as there is less free memory @@ -92,26 +92,34 @@ public void generateLogString_test() { + "📜 Memory Consumption\n" + "

List of " + ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX - + " datasets where memory has increased.

\n" + + " datasets(memory has increased).

\n" + "| Dataset ID | Snapshot Key(Used Memory) | Reference (s) | Latest (s) | Difference (s) |\n" + "|-----------------------------|-------------------|----------------|----------------|----------------|\n" + "| feed-id-m2 | | | | |\n" - + "| | key1 | 0 | 500000 | ⬆\uFE0F+488.28 KiB |\n" - + "| | key2 | 0 | 500000 | ⬆\uFE0F+488.28 KiB |\n" + + "| | key1 | 0 bytes | 488.28 KiB | ⬆\uFE0F+488.28 KiB |\n" + + "| | key2 | 0 bytes | 488.28 KiB | ⬆\uFE0F+488.28 KiB |\n" + "| feed-id-m3 | | | | |\n" - + "| | key3 | -100 | -1000000 | ⬇\uFE0F-976.46 KiB |\n" - + "| | key4 | -100 | -1000000 | ⬇\uFE0F-976.46 KiB |\n" + + "| | key3 | -100 bytes | -976.56 KiB | ⬇\uFE0F-976.46 KiB |\n" + + "| | key4 | -100 bytes | -976.56 KiB | ⬇\uFE0F-976.46 KiB |\n" + "

List of " + ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX - + " datasets where memory has decreased.

\n" + + " datasets(memory has decreased).

\n" + "| Dataset ID | Snapshot Key(Used Memory) | Reference (s) | Latest (s) | Difference (s) |\n" + "|-----------------------------|-------------------|----------------|----------------|----------------|\n" + "| feed-id-m3 | | | | |\n" - + "| | key3 | -100 | -1000000 | ⬇️-976.46 KiB |\n" - + "| | key4 | -100 | -1000000 | ⬇️-976.46 KiB |\n" + + "| | key3 | -100 bytes | -976.56 KiB | ⬇️-976.46 KiB |\n" + + "| | key4 | -100 bytes | -976.56 KiB | ⬇️-976.46 KiB |\n" + "| feed-id-m2 | | | | |\n" - + "| | key1 | 0 | 500000 | ⬆️+488.28 KiB |\n" - + "| | key2 | 0 | 500000 | ⬆️+488.28 KiB |\n" + + "| | key1 | 0 bytes | 488.28 KiB | ⬆️+488.28 KiB |\n" + + "| | key2 | 0 bytes | 488.28 KiB | ⬆️+488.28 KiB |\n" + + "

List of " + + ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX + + " datasets(no reference available).

\n" + + "| Dataset ID | Snapshot Key(Used Memory) | Reference (s) | Latest (s) | Difference (s) |\n" + + "|-----------------------------|-------------------|----------------|----------------|----------------|\n" + + "| feed-id-m1 | | | | |\n" + + "| | key1 | -9.54 MiB | N/A | N/A |\n" + + "| | key2 | 0 bytes | N/A | N/A |\n" + "\n"; // Assert that the generated log string matches the expected log string assertThat(logString).isEqualTo(expectedLogString); From 06fe749fda091729ea929a52327261c7aadbfaed Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Thu, 3 Oct 2024 10:58:29 -0400 Subject: [PATCH 12/30] add no references to the report --- .../io/ValidationPerformanceCollector.java | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java index 752cfde317..79f8d9effb 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java @@ -211,26 +211,22 @@ public String generateLogString() { b.append("\n\n"); - if (datasetsIncreasedMemoryUsage.size() > 0 || datasetsDecreasedMemoryUsage.size() > 0) { - b.append("
\n"); - b.append("📜 Memory Consumption\n"); - + b.append("
\n"); + b.append("📜 Memory Consumption\n"); + if (datasetsIncreasedMemoryUsage.size() > 0) { List increasedMemoryUsages = getDatasetMemoryUsages(datasetsIncreasedMemoryUsage); addMemoryUsageReport(increasedMemoryUsages, "memory has increased", b); + } + if (datasetsDecreasedMemoryUsage.size() > 0) { List decreasedMemoryUsages = getDatasetMemoryUsages(datasetsDecreasedMemoryUsage); addMemoryUsageReport(decreasedMemoryUsages, "memory has decreased", b); - datasetsMemoryUsageNoReference.sort( - new Comparator() { - @Override - public int compare(DatasetMemoryUsage o1, DatasetMemoryUsage o2) { - return 0; - } - }); + } + if (datasetsMemoryUsageNoReference.size() > 0) { addMemoryUsageReport(datasetsMemoryUsageNoReference, "no reference available", b); - b.append("
\n"); } + b.append("
\n"); return b.toString(); } From a70accf1af59b92a419b6797e8f4c62dfe53136e Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Thu, 3 Oct 2024 11:17:21 -0400 Subject: [PATCH 13/30] fix failing tests --- .../io/ValidationPerformanceCollector.java | 34 +++++++++++-------- .../cli/ValidationReportComparatorTest.java | 8 ++++- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java index 79f8d9effb..ad7edd1f52 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java @@ -211,22 +211,26 @@ public String generateLogString() { b.append("\n\n"); - b.append("
\n"); - b.append("📜 Memory Consumption\n"); - if (datasetsIncreasedMemoryUsage.size() > 0) { - List increasedMemoryUsages = - getDatasetMemoryUsages(datasetsIncreasedMemoryUsage); - addMemoryUsageReport(increasedMemoryUsages, "memory has increased", b); - } - if (datasetsDecreasedMemoryUsage.size() > 0) { - List decreasedMemoryUsages = - getDatasetMemoryUsages(datasetsDecreasedMemoryUsage); - addMemoryUsageReport(decreasedMemoryUsages, "memory has decreased", b); - } - if (datasetsMemoryUsageNoReference.size() > 0) { - addMemoryUsageReport(datasetsMemoryUsageNoReference, "no reference available", b); + if (datasetsIncreasedMemoryUsage.size() > 0 + || datasetsDecreasedMemoryUsage.size() > 0 + || datasetsMemoryUsageNoReference.size() > 0) { + b.append("
\n"); + b.append("📜 Memory Consumption\n"); + if (datasetsIncreasedMemoryUsage.size() > 0) { + List increasedMemoryUsages = + getDatasetMemoryUsages(datasetsIncreasedMemoryUsage); + addMemoryUsageReport(increasedMemoryUsages, "memory has increased", b); + } + if (datasetsDecreasedMemoryUsage.size() > 0) { + List decreasedMemoryUsages = + getDatasetMemoryUsages(datasetsDecreasedMemoryUsage); + addMemoryUsageReport(decreasedMemoryUsages, "memory has decreased", b); + } + if (datasetsMemoryUsageNoReference.size() > 0) { + addMemoryUsageReport(datasetsMemoryUsageNoReference, "no reference available", b); + } + b.append("
\n"); } - b.append("
\n"); return b.toString(); } diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java index 18e8d66e56..c1a37f86cc 100644 --- a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java +++ b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java @@ -141,7 +141,13 @@ public void addedErrorNotice_summaryString() throws Exception { + "\n" + "| Time Metric | Dataset ID | Reference (s) | Latest (s) | Difference (s) |\n" + "|-----------------------------|-------------------|----------------|----------------|----------------|\n" - + "\n\n\n"); + + "\n\n" + + "
\n" + + "📜 Memory Consumption\n" + + "

List of 25 datasets(no reference available).

\n" + + "| Dataset ID | Snapshot Key(Used Memory) | Reference (s) | Latest (s) | Difference (s) |\n" + + "|-----------------------------|-------------------|----------------|----------------|----------------|\n" + + "
\n\n"); } @Test From b10320dca505b5fe5bfbc632d8bb4749e4c1336a Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Thu, 3 Oct 2024 14:40:53 -0400 Subject: [PATCH 14/30] fix memory table formatting --- .../performance/MemoryMonitor.java | 4 ++ .../performance/MemoryMonitorAspect.java | 8 +++- .../performance/MemoryUsage.java | 17 +++++++ .../io/ValidationPerformanceCollector.java | 48 ++++++++++++------- .../cli/ValidationReportComparatorTest.java | 6 +-- .../ValidationPerformanceCollectorTest.java | 20 ++++---- 6 files changed, 73 insertions(+), 30 deletions(-) diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryMonitor.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryMonitor.java index 179ce3cd78..cd05c2a509 100644 --- a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryMonitor.java +++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryMonitor.java @@ -5,6 +5,10 @@ import java.lang.annotation.RetentionPolicy; import java.lang.annotation.Target; +/** + * Annotation to monitor memory usage of a method. The annotated method should return a {@link + * MemoryUsage} object. The key is used to group memory usage of different methods. + */ @Target(ElementType.METHOD) @Retention(RetentionPolicy.RUNTIME) public @interface MemoryMonitor { diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryMonitorAspect.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryMonitorAspect.java index f6baf95945..3c2fdef403 100644 --- a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryMonitorAspect.java +++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryMonitorAspect.java @@ -6,10 +6,10 @@ import org.aspectj.lang.annotation.Aspect; import org.aspectj.lang.reflect.MethodSignature; +/** Aspect to monitor memory usage of a method. */ @Aspect public class MemoryMonitorAspect { - // @Around("@annotation(MemoryMonitor)") @Around("execution(@org.mobilitydata.gtfsvalidator.performance.MemoryMonitor * *(..))") public Object monitorMemoryUsage(ProceedingJoinPoint joinPoint) throws Throwable { String key = extractKey(joinPoint); @@ -23,6 +23,12 @@ public Object monitorMemoryUsage(ProceedingJoinPoint joinPoint) throws Throwable } } + /** + * Extracts the key from the method signature or the annotation. + * + * @param joinPoint the join point + * @return the key either from the annotation or the method signature. + */ private String extractKey(ProceedingJoinPoint joinPoint) { var method = ((MethodSignature) joinPoint.getSignature()).getMethod(); var memoryMonitor = method.getAnnotation(MemoryMonitor.class); diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsage.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsage.java index f0126c23e9..f81321afb3 100644 --- a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsage.java +++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsage.java @@ -3,6 +3,7 @@ import java.text.DecimalFormat; import org.apache.commons.lang3.StringUtils; +/** Represents memory usage information. */ public class MemoryUsage { private static final DecimalFormat TWO_DECIMAL_FORMAT = new DecimalFormat("0.00"); @@ -23,6 +24,12 @@ public MemoryUsage( this.diffMemory = diffMemory; } + /** + * Converts bytes to human-readable memory. + * + * @param bytes + * @return human-readable memory, e.g., "1.23 GiB" + */ public static String convertToHumanReadableMemory(Long bytes) { if (bytes == null) { return "N/A"; @@ -43,10 +50,20 @@ public static String convertToHumanReadableMemory(Long bytes) { return TWO_DECIMAL_FORMAT.format(Math.copySign(size / 1099511627776L, bytes)) + " TiB"; } + /** + * The memory used is computed as the difference between the total memory and the free memory. + * + * @return the memory used. + */ public long usedMemory() { return totalMemory - freeMemory; } + /** + * Returns a human-readable string representation of the memory usage. + * + * @return a human-readable string representation of the memory usage. + */ public String humanReadablePrint() { StringBuffer result = new StringBuffer(); result.append("Memory usage registered"); diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java index ad7edd1f52..746d8f53d6 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java @@ -219,15 +219,15 @@ public String generateLogString() { if (datasetsIncreasedMemoryUsage.size() > 0) { List increasedMemoryUsages = getDatasetMemoryUsages(datasetsIncreasedMemoryUsage); - addMemoryUsageReport(increasedMemoryUsages, "memory has increased", b); + addMemoryUsageReport(increasedMemoryUsages, "memory has increased", b, true); } if (datasetsDecreasedMemoryUsage.size() > 0) { List decreasedMemoryUsages = getDatasetMemoryUsages(datasetsDecreasedMemoryUsage); - addMemoryUsageReport(decreasedMemoryUsages, "memory has decreased", b); + addMemoryUsageReport(decreasedMemoryUsages, "memory has decreased", b, true); } if (datasetsMemoryUsageNoReference.size() > 0) { - addMemoryUsageReport(datasetsMemoryUsageNoReference, "no reference available", b); + addMemoryUsageReport(datasetsMemoryUsageNoReference, "no reference available", b, false); } b.append("\n"); } @@ -242,22 +242,33 @@ private List getDatasetMemoryUsages( } private void addMemoryUsageReport( - List memoryUsages, String order, StringBuilder b) { + List memoryUsages, + String order, + StringBuilder b, + boolean includeDifference) { b.append(String.format("

List of %s datasets(%s).

", MEMORY_USAGE_COMPARE_MAX, order)) - .append("\n") - .append( - "| Dataset ID | Snapshot Key(Used Memory) | Reference (s) | Latest (s) | Difference (s) |\n") + .append("\n\n") .append( - "|-----------------------------|-------------------|----------------|----------------|----------------|\n"); + "| Dataset ID | Snapshot Key(Used Memory) | Reference | Latest |"); + if (includeDifference) { + b.append(" Difference |"); + } + b.append("\n"); + b.append( + "|-----------------------------|-------------------|----------------|----------------|"); + if (includeDifference) { + b.append("----------------|"); + } + b.append("\n"); memoryUsages.stream() .forEachOrdered( datasetMemoryUsage -> { - generateMemoryLogByKey(datasetMemoryUsage, b); + generateMemoryLogByKey(datasetMemoryUsage, b, includeDifference); }); } private static void generateMemoryLogByKey( - DatasetMemoryUsage datasetMemoryUsage, StringBuilder b) { + DatasetMemoryUsage datasetMemoryUsage, StringBuilder b, boolean includeDifference) { AtomicBoolean isFirst = new AtomicBoolean(true); Set keys = new HashSet<>(); keys.addAll(datasetMemoryUsage.getReferenceUsedMemoryByKey().keySet()); @@ -268,21 +279,26 @@ private static void generateMemoryLogByKey( var reference = datasetMemoryUsage.getReferenceUsedMemoryByKey().get(key); var latest = datasetMemoryUsage.getLatestUsedMemoryByKey().get(key); if (isFirst.get()) { - b.append(String.format("| %s | | | | |\n", datasetMemoryUsage.getDatasetId())); + b.append(String.format("| %s | | | |", datasetMemoryUsage.getDatasetId())); + if (includeDifference) { + b.append(" |"); + } + b.append("\n"); isFirst.set(false); } - // if (reference != null && latest != null) { String usedMemoryDiff = getMemoryDiff(reference, latest); b.append( String.format( - "| | %s | %s | %s | %s |\n", + "| | %s | %s | %s |", key, reference != null ? MemoryUsage.convertToHumanReadableMemory(reference) : "N/A", - latest != null ? MemoryUsage.convertToHumanReadableMemory(latest) : "N/A", - usedMemoryDiff)); - // } + latest != null ? MemoryUsage.convertToHumanReadableMemory(latest) : "N/A")); + if (includeDifference) { + b.append(String.format(" %s |", usedMemoryDiff)); + } + b.append("\n"); }); } diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java index c1a37f86cc..6cf2ac3a7d 100644 --- a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java +++ b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java @@ -144,9 +144,9 @@ public void addedErrorNotice_summaryString() throws Exception { + "\n\n" + "
\n" + "📜 Memory Consumption\n" - + "

List of 25 datasets(no reference available).

\n" - + "| Dataset ID | Snapshot Key(Used Memory) | Reference (s) | Latest (s) | Difference (s) |\n" - + "|-----------------------------|-------------------|----------------|----------------|----------------|\n" + + "

List of 25 datasets(no reference available).

\n\n" + + "| Dataset ID | Snapshot Key(Used Memory) | Reference | Latest |\n" + + "|-----------------------------|-------------------|----------------|----------------|\n" + "
\n\n"); } diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java index 88803994fd..82c204b20c 100644 --- a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java +++ b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java @@ -92,8 +92,8 @@ public void generateLogString_test() { + "📜 Memory Consumption\n" + "

List of " + ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX - + " datasets(memory has increased).

\n" - + "| Dataset ID | Snapshot Key(Used Memory) | Reference (s) | Latest (s) | Difference (s) |\n" + + " datasets(memory has increased).

\n\n" + + "| Dataset ID | Snapshot Key(Used Memory) | Reference | Latest | Difference |\n" + "|-----------------------------|-------------------|----------------|----------------|----------------|\n" + "| feed-id-m2 | | | | |\n" + "| | key1 | 0 bytes | 488.28 KiB | ⬆\uFE0F+488.28 KiB |\n" @@ -103,8 +103,8 @@ public void generateLogString_test() { + "| | key4 | -100 bytes | -976.56 KiB | ⬇\uFE0F-976.46 KiB |\n" + "

List of " + ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX - + " datasets(memory has decreased).

\n" - + "| Dataset ID | Snapshot Key(Used Memory) | Reference (s) | Latest (s) | Difference (s) |\n" + + " datasets(memory has decreased).

\n\n" + + "| Dataset ID | Snapshot Key(Used Memory) | Reference | Latest | Difference |\n" + "|-----------------------------|-------------------|----------------|----------------|----------------|\n" + "| feed-id-m3 | | | | |\n" + "| | key3 | -100 bytes | -976.56 KiB | ⬇️-976.46 KiB |\n" @@ -114,12 +114,12 @@ public void generateLogString_test() { + "| | key2 | 0 bytes | 488.28 KiB | ⬆️+488.28 KiB |\n" + "

List of " + ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX - + " datasets(no reference available).

\n" - + "| Dataset ID | Snapshot Key(Used Memory) | Reference (s) | Latest (s) | Difference (s) |\n" - + "|-----------------------------|-------------------|----------------|----------------|----------------|\n" - + "| feed-id-m1 | | | | |\n" - + "| | key1 | -9.54 MiB | N/A | N/A |\n" - + "| | key2 | 0 bytes | N/A | N/A |\n" + + " datasets(no reference available).

\n\n" + + "| Dataset ID | Snapshot Key(Used Memory) | Reference | Latest |\n" + + "|-----------------------------|-------------------|----------------|----------------|\n" + + "| feed-id-m1 | | | |\n" + + "| | key1 | -9.54 MiB | N/A |\n" + + "| | key2 | 0 bytes | N/A |\n" + "\n"; // Assert that the generated log string matches the expected log string assertThat(logString).isEqualTo(expectedLogString); From 97678eeaa20b80bfa9f8ffc43935fd821fe9129b Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Thu, 3 Oct 2024 15:49:50 -0400 Subject: [PATCH 15/30] sort feeds on the no reference list and limit them to 25 maximum items --- .../performance/MemoryUsageRegister.java | 34 +++++++++++++++ .../io/DatasetMemoryUsage.java | 4 ++ ...stReportUsedMemoryIncreasedComparator.java | 43 +++++++++++++++++++ .../io/UsedMemoryIncreasedComparator.java | 15 ++++--- .../io/ValidationPerformanceCollector.java | 8 +++- 5 files changed, 96 insertions(+), 8 deletions(-) create mode 100644 output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/LatestReportUsedMemoryIncreasedComparator.java diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsageRegister.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsageRegister.java index e01a71948c..21dcb2d658 100644 --- a/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsageRegister.java +++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/performance/MemoryUsageRegister.java @@ -5,6 +5,7 @@ import java.util.Collections; import java.util.List; +/** Register for memory usage snapshots. */ public class MemoryUsageRegister { private static final FluentLogger logger = FluentLogger.forEnclosingClass(); @@ -16,14 +17,27 @@ private MemoryUsageRegister() { runtime = Runtime.getRuntime(); } + /** @return the singleton instance of the memory usage register. */ public static MemoryUsageRegister getInstance() { return instance; } + /** + * Returns the memory usage registry. + * + * @return the memory usage registry unmodifiable list. + */ public List getRegistry() { return Collections.unmodifiableList(registry); } + /** + * Returns a memory usage snapshot. + * + * @param key + * @param previous + * @return + */ public MemoryUsage getMemoryUsageSnapshot(String key, MemoryUsage previous) { Long memoryDiff = null; if (previous != null) { @@ -33,23 +47,43 @@ public MemoryUsage getMemoryUsageSnapshot(String key, MemoryUsage previous) { key, runtime.totalMemory(), runtime.freeMemory(), runtime.maxMemory(), memoryDiff); } + /** + * Registers a memory usage snapshot. + * + * @param key + * @return + */ public MemoryUsage registerMemoryUsage(String key) { MemoryUsage memoryUsage = getMemoryUsageSnapshot(key, null); registerMemoryUsage(memoryUsage); return memoryUsage; } + /** + * Registers a memory usage snapshot. + * + * @param key + * @param previous previous memory usage snapshot used to compute the memory difference between + * two snapshots. + * @return + */ public MemoryUsage registerMemoryUsage(String key, MemoryUsage previous) { MemoryUsage memoryUsage = getMemoryUsageSnapshot(key, previous); registerMemoryUsage(memoryUsage); return memoryUsage; } + /** + * Registers a memory usage snapshot. + * + * @param memoryUsage + */ public void registerMemoryUsage(MemoryUsage memoryUsage) { registry.add(memoryUsage); logger.atInfo().log(memoryUsage.humanReadablePrint()); } + /** Clears the memory usage registry. */ public void clearRegistry() { registry.clear(); } diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java index 452b7763ac..ab4c056ffa 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java @@ -7,6 +7,10 @@ import java.util.stream.Collectors; import org.mobilitydata.gtfsvalidator.performance.MemoryUsage; +/** + * Represents memory usage information for a dataset. This class contains the information associated + * with the memory usage of a dataset when running the validation process. + */ public class DatasetMemoryUsage { private String datasetId; diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/LatestReportUsedMemoryIncreasedComparator.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/LatestReportUsedMemoryIncreasedComparator.java new file mode 100644 index 0000000000..0392da39cb --- /dev/null +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/LatestReportUsedMemoryIncreasedComparator.java @@ -0,0 +1,43 @@ +package org.mobilitydata.gtfsvalidator.outputcomparator.io; + +import java.util.Comparator; +import org.mobilitydata.gtfsvalidator.performance.MemoryUsage; + +/** + * Comparator to compare two {@link DatasetMemoryUsage} objects based on the difference between the + * used memory of the two objects based on the {@link DatasetMemoryUsage#getLatestMemoryUsage}. + */ +public class LatestReportUsedMemoryIncreasedComparator implements Comparator { + + @Override + public int compare(DatasetMemoryUsage o1, DatasetMemoryUsage o2) { + if (o1 == o2) { + return 0; + } + if (o1 == null || o2 == null) { + return o1 == null ? -1 : 1; + } + if (o1.getLatestMemoryUsage() == null && o2.getLatestMemoryUsage() == null) { + return 0; + } + if (o1.getLatestMemoryUsage() == null || o2.getLatestMemoryUsage() == null) { + return o1.getLatestMemoryUsage() == null ? -1 : 1; + } + long o1MinDiff = + o1.getLatestMemoryUsage().stream() + .min(Comparator.comparingLong(MemoryUsage::usedMemory)) + .get() + .usedMemory(); + long o2MinDiff = + o2.getLatestMemoryUsage().stream() + .min(Comparator.comparingLong(MemoryUsage::usedMemory)) + .get() + .usedMemory(); + return Long.compare(o1MinDiff, o2MinDiff); + } + + @Override + public Comparator reversed() { + return Comparator.super.reversed(); + } +} diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java index 9593d89b14..9b37b6bd8e 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java @@ -23,12 +23,18 @@ public int compare(DatasetMemoryUsage o1, DatasetMemoryUsage o2) { if (o1 == null || o2 == null) { return o1 == null ? -1 : 1; } - if (o1.getReferenceMemoryUsage() == null && o2.getLatestMemoryUsage() == null) { + if (o1.getReferenceMemoryUsage() == null + && o1.getLatestMemoryUsage() == null + && o2.getReferenceMemoryUsage() == null + && o2.getLatestMemoryUsage() == null) { return 0; } - if (o1.getReferenceMemoryUsage() == null || o2.getLatestMemoryUsage() == null) { + if (o1.getReferenceMemoryUsage() == null || o2.getReferenceMemoryUsage() == null) { return o1.getReferenceMemoryUsage() == null ? -1 : 1; } + if (o1.getLatestMemoryUsage() == null || o2.getLatestMemoryUsage() == null) { + return o1.getLatestMemoryUsage() == null ? -1 : 1; + } long o1MinDiff = getMinimumDifferenceByKey(o1.getReferenceUsedMemoryByKey(), o1.getLatestUsedMemoryByKey()); long o2MinDiff = @@ -48,9 +54,4 @@ private long getMinimumDifferenceByKey( .min() .orElse(Long.MAX_VALUE); } - - @Override - public Comparator reversed() { - return Comparator.super.reversed(); - } } diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java index 746d8f53d6..0416e0ebbf 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java @@ -227,7 +227,13 @@ public String generateLogString() { addMemoryUsageReport(decreasedMemoryUsages, "memory has decreased", b, true); } if (datasetsMemoryUsageNoReference.size() > 0) { - addMemoryUsageReport(datasetsMemoryUsageNoReference, "no reference available", b, false); + datasetsMemoryUsageNoReference.sort(new LatestReportUsedMemoryIncreasedComparator()); + addMemoryUsageReport( + datasetsMemoryUsageNoReference.subList( + 0, Math.min(datasetsMemoryUsageNoReference.size(), MEMORY_USAGE_COMPARE_MAX)), + "no reference available", + b, + false); } b.append("\n"); } From 94b90776a9039d156d0ac60de5d1e386d9b5be37 Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Fri, 4 Oct 2024 11:12:25 -0400 Subject: [PATCH 16/30] add documentation and sort memory usage for feed with no reference --- docs/ACCEPTANCE_TESTS.md | 16 ++++++++++++++++ ...ava => LatestReportUsedMemoryComparator.java} | 16 ++++++++-------- .../io/ValidationPerformanceCollector.java | 2 +- 3 files changed, 25 insertions(+), 9 deletions(-) rename output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/{LatestReportUsedMemoryIncreasedComparator.java => LatestReportUsedMemoryComparator.java} (68%) diff --git a/docs/ACCEPTANCE_TESTS.md b/docs/ACCEPTANCE_TESTS.md index b17d036462..d0d2e1ecc0 100644 --- a/docs/ACCEPTANCE_TESTS.md +++ b/docs/ACCEPTANCE_TESTS.md @@ -104,6 +104,22 @@ We follow this process: +## Performance metrics within the acceptance reports + +There are two man metrics added to the acceptance report comment at the PR level, _Validation Time_ and _Memory Consumption_. +The performance metrics are **not a blocker** as performance might vary due to external factors including GitHub infrastructure performance. +However, large jumps in performance values should be investigated before approving a PR. + +### Validation Time +The validation time consists in general metrics like average, median, standard deviation, minimums and maximums. +This metrics can be affected by addition of new validators than introduce a penalty in processing time. + +### Memory Consumption +The memory consumption section contains three tables. +- The first, list the first 25 datasets that the difference increased memory comparing with the main branch. +- The second, list the first 25 datasets that the difference decreased memory comparing with the main branch. +- The third, list(not always visible) the first 25 datasets that were not available for comparison as the main branch didn't contain the memory usage information. + ## Instructions to run the pipeline 1. Provide code changes by creating a new PR on the [GitHub repository](https://github.com/MobilityData/gtfs-validator); diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/LatestReportUsedMemoryIncreasedComparator.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/LatestReportUsedMemoryComparator.java similarity index 68% rename from output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/LatestReportUsedMemoryIncreasedComparator.java rename to output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/LatestReportUsedMemoryComparator.java index 0392da39cb..89ea4c699a 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/LatestReportUsedMemoryIncreasedComparator.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/LatestReportUsedMemoryComparator.java @@ -4,10 +4,10 @@ import org.mobilitydata.gtfsvalidator.performance.MemoryUsage; /** - * Comparator to compare two {@link DatasetMemoryUsage} objects based on the difference between the - * used memory of the two objects based on the {@link DatasetMemoryUsage#getLatestMemoryUsage}. + * Comparator to compare two {@link DatasetMemoryUsage} objects based on used memory of the two + * objects, {@link DatasetMemoryUsage#getLatestMemoryUsage}. */ -public class LatestReportUsedMemoryIncreasedComparator implements Comparator { +public class LatestReportUsedMemoryComparator implements Comparator { @Override public int compare(DatasetMemoryUsage o1, DatasetMemoryUsage o2) { @@ -23,17 +23,17 @@ public int compare(DatasetMemoryUsage o1, DatasetMemoryUsage o2) { if (o1.getLatestMemoryUsage() == null || o2.getLatestMemoryUsage() == null) { return o1.getLatestMemoryUsage() == null ? -1 : 1; } - long o1MinDiff = + long o1MaxMemory = o1.getLatestMemoryUsage().stream() - .min(Comparator.comparingLong(MemoryUsage::usedMemory)) + .max(Comparator.comparingLong(MemoryUsage::usedMemory)) .get() .usedMemory(); - long o2MinDiff = + long o2MaxMemory = o2.getLatestMemoryUsage().stream() - .min(Comparator.comparingLong(MemoryUsage::usedMemory)) + .max(Comparator.comparingLong(MemoryUsage::usedMemory)) .get() .usedMemory(); - return Long.compare(o1MinDiff, o2MinDiff); + return Long.compare(o1MaxMemory, o2MaxMemory); } @Override diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java index 0416e0ebbf..14ce5b0c46 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java @@ -227,7 +227,7 @@ public String generateLogString() { addMemoryUsageReport(decreasedMemoryUsages, "memory has decreased", b, true); } if (datasetsMemoryUsageNoReference.size() > 0) { - datasetsMemoryUsageNoReference.sort(new LatestReportUsedMemoryIncreasedComparator()); + datasetsMemoryUsageNoReference.sort(new LatestReportUsedMemoryComparator()); addMemoryUsageReport( datasetsMemoryUsageNoReference.subList( 0, Math.min(datasetsMemoryUsageNoReference.size(), MEMORY_USAGE_COMPARE_MAX)), From 98c0275aba1ae9b2000c1413cb84677f41682ad5 Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Fri, 4 Oct 2024 11:40:28 -0400 Subject: [PATCH 17/30] orting from the highest to the lowest memory usage --- .../outputcomparator/io/ValidationPerformanceCollector.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java index 14ce5b0c46..db7468d00f 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java @@ -227,7 +227,8 @@ public String generateLogString() { addMemoryUsageReport(decreasedMemoryUsages, "memory has decreased", b, true); } if (datasetsMemoryUsageNoReference.size() > 0) { - datasetsMemoryUsageNoReference.sort(new LatestReportUsedMemoryComparator()); + // Sorting from the highest to the lowest memory usage + datasetsMemoryUsageNoReference.sort((new LatestReportUsedMemoryComparator()).reversed()); addMemoryUsageReport( datasetsMemoryUsageNoReference.subList( 0, Math.min(datasetsMemoryUsageNoReference.size(), MEMORY_USAGE_COMPARE_MAX)), From a16fdee3b92ba0a752d8bfb06ab7880aedb15279 Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Fri, 4 Oct 2024 11:51:11 -0400 Subject: [PATCH 18/30] improve acceptance tests documentation --- docs/ACCEPTANCE_TESTS.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/ACCEPTANCE_TESTS.md b/docs/ACCEPTANCE_TESTS.md index d0d2e1ecc0..4360522199 100644 --- a/docs/ACCEPTANCE_TESTS.md +++ b/docs/ACCEPTANCE_TESTS.md @@ -120,6 +120,12 @@ The memory consumption section contains three tables. - The second, list the first 25 datasets that the difference decreased memory comparing with the main branch. - The third, list(not always visible) the first 25 datasets that were not available for comparison as the main branch didn't contain the memory usage information. +Memory usage is collected in critical points and persists in the JSON report. The added snapshot points are: +- _GtfsFeedLoader.loadTables_: This is taken after the validator loads all files. +- _GtfsFeedLoader.executeMultiFileValidators_: This is taken after the validator executed all multi-file validators +- _org.mobilitydata.gtfsvalidator.table.GtfsFeedLoader.loadAndValidate_: This is taken for the complete load and validation method. +- _ValidationRunner.run_: This is taken for the complete run of the validator, excluding report generation + ## Instructions to run the pipeline 1. Provide code changes by creating a new PR on the [GitHub repository](https://github.com/MobilityData/gtfs-validator); From db3632890f8f8bb574760bc446472f0d01fbf853 Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Fri, 4 Oct 2024 11:53:41 -0400 Subject: [PATCH 19/30] revert acceptance tests sample running --- .github/workflows/acceptance_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/acceptance_test.yml b/.github/workflows/acceptance_test.yml index ce9c650fbc..4759f4172e 100644 --- a/.github/workflows/acceptance_test.yml +++ b/.github/workflows/acceptance_test.yml @@ -127,7 +127,7 @@ jobs: - name: Set URL matrix id: set-matrix run: | - DATASETS=$(python3 scripts/mobility-database-harvester/harvest_latest_versions.py -d scripts/mobility-database-harvester/datasets_metadata -l gtfs_latest_versions.json -s) + DATASETS=$(python3 scripts/mobility-database-harvester/harvest_latest_versions.py -d scripts/mobility-database-harvester/datasets_metadata -l gtfs_latest_versions.json) echo $DATASETS echo "matrix=$DATASETS" >> $GITHUB_OUTPUT - name: Persist metadata From 2a7a1f683ac89220a4ae41f8092d75711a28b3ab Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Mon, 7 Oct 2024 16:45:20 -0400 Subject: [PATCH 20/30] remove large feeds from exclude list --- .../mobility-database-harvester/harvest_latest_versions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/mobility-database-harvester/harvest_latest_versions.py b/scripts/mobility-database-harvester/harvest_latest_versions.py index 0ddef96fba..3d250d854b 100644 --- a/scripts/mobility-database-harvester/harvest_latest_versions.py +++ b/scripts/mobility-database-harvester/harvest_latest_versions.py @@ -32,9 +32,9 @@ # Sources to exclude because they are too big for the workflow. SOURCES_TO_EXCLUDE = [ - "de-unknown-rursee-schifffahrt-kg-gtfs-784", - "de-unknown-ulmer-eisenbahnfreunde-gtfs-1081", - "no-unknown-agder-kollektivtrafikk-as-gtfs-1078" + # "de-unknown-rursee-schifffahrt-kg-gtfs-784", + # "de-unknown-ulmer-eisenbahnfreunde-gtfs-1081", + # "no-unknown-agder-kollektivtrafikk-as-gtfs-1078" ] # Google Cloud constants From 5f644fc4866f22f0d67b01f95e8dcec72e72d071 Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Wed, 9 Oct 2024 13:05:01 -0400 Subject: [PATCH 21/30] fix formatting --- .../gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java | 1 - 1 file changed, 1 deletion(-) diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java index 1367615056..01accf7864 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/DatasetMemoryUsage.java @@ -1,7 +1,6 @@ package org.mobilitydata.gtfsvalidator.outputcomparator.io; import com.google.common.flogger.FluentLogger; - import java.util.Collections; import java.util.HashMap; import java.util.List; From c7ef80964946371d8eb6a8bf7901bd9525aae7ec Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Wed, 9 Oct 2024 14:44:59 -0400 Subject: [PATCH 22/30] fix ordering --- .../io/UsedMemoryIncreasedComparator.java | 19 ++++++++++--------- .../io/ValidationPerformanceCollector.java | 2 +- .../harvest_latest_versions.py | 6 +++--- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java index 9b37b6bd8e..af05b3f15b 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java @@ -35,14 +35,15 @@ public int compare(DatasetMemoryUsage o1, DatasetMemoryUsage o2) { if (o1.getLatestMemoryUsage() == null || o2.getLatestMemoryUsage() == null) { return o1.getLatestMemoryUsage() == null ? -1 : 1; } - long o1MinDiff = - getMinimumDifferenceByKey(o1.getReferenceUsedMemoryByKey(), o1.getLatestUsedMemoryByKey()); - long o2MinDiff = - getMinimumDifferenceByKey(o2.getReferenceUsedMemoryByKey(), o2.getLatestUsedMemoryByKey()); - return Long.compare(o1MinDiff, o2MinDiff); + long o1MaxDiff = + getMaxDifferenceByKey(o1.getReferenceUsedMemoryByKey(), o1.getLatestUsedMemoryByKey()); + long o2MaxDiff = + getMaxDifferenceByKey(o2.getReferenceUsedMemoryByKey(), o2.getLatestUsedMemoryByKey()); + // Reversing the comparison as we need the major memory usage first in a sorted list + return Long.compare(o2MaxDiff, o1MaxDiff); } - private long getMinimumDifferenceByKey( + private long getMaxDifferenceByKey( Map referenceMemoryUsage, Map latestMemoryUsage) { Set keys = new HashSet<>(); keys.addAll(latestMemoryUsage.keySet()); @@ -50,8 +51,8 @@ private long getMinimumDifferenceByKey( return keys.stream() .filter(key -> latestMemoryUsage.containsKey(key) && referenceMemoryUsage.containsKey(key)) .filter(key -> latestMemoryUsage.get(key) - referenceMemoryUsage.get(key) != 0) - .mapToLong(key -> referenceMemoryUsage.get(key) - latestMemoryUsage.get(key)) - .min() - .orElse(Long.MAX_VALUE); + .mapToLong(key -> latestMemoryUsage.get(key) - referenceMemoryUsage.get(key)) + .max() + .orElse(Long.MIN_VALUE); } } diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java index db7468d00f..850cb0384a 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java @@ -8,7 +8,7 @@ public class ValidationPerformanceCollector { - public static final int MEMORY_USAGE_COMPARE_MAX = 25; + public static final int MEMORY_USAGE_COMPARE_MAX = 10; private final Map referenceTimes; private final Map latestTimes; private final BoundedPriorityQueue datasetsDecreasedMemoryUsage; diff --git a/scripts/mobility-database-harvester/harvest_latest_versions.py b/scripts/mobility-database-harvester/harvest_latest_versions.py index 3d250d854b..0ddef96fba 100644 --- a/scripts/mobility-database-harvester/harvest_latest_versions.py +++ b/scripts/mobility-database-harvester/harvest_latest_versions.py @@ -32,9 +32,9 @@ # Sources to exclude because they are too big for the workflow. SOURCES_TO_EXCLUDE = [ - # "de-unknown-rursee-schifffahrt-kg-gtfs-784", - # "de-unknown-ulmer-eisenbahnfreunde-gtfs-1081", - # "no-unknown-agder-kollektivtrafikk-as-gtfs-1078" + "de-unknown-rursee-schifffahrt-kg-gtfs-784", + "de-unknown-ulmer-eisenbahnfreunde-gtfs-1081", + "no-unknown-agder-kollektivtrafikk-as-gtfs-1078" ] # Google Cloud constants From b50f9bff19dd315bc3f8b41fb10e8ca70326c3c6 Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Wed, 9 Oct 2024 14:52:22 -0400 Subject: [PATCH 23/30] fix unit test --- .../outputcomparator/cli/ValidationReportComparatorTest.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java index 6cf2ac3a7d..90282a06ff 100644 --- a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java +++ b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java @@ -18,6 +18,7 @@ import org.mobilitydata.gtfsvalidator.notice.NoticeContainer; import org.mobilitydata.gtfsvalidator.notice.ValidationNotice; import org.mobilitydata.gtfsvalidator.outputcomparator.cli.ValidationReportComparator.Result; +import org.mobilitydata.gtfsvalidator.outputcomparator.io.ValidationPerformanceCollector; import org.mobilitydata.gtfsvalidator.outputcomparator.model.SourceUrlContainer; import org.mobilitydata.gtfsvalidator.outputcomparator.model.report.AcceptanceReport; import org.mobilitydata.gtfsvalidator.outputcomparator.model.report.AffectedSource; @@ -144,7 +145,9 @@ public void addedErrorNotice_summaryString() throws Exception { + "\n\n" + "
\n" + "📜 Memory Consumption\n" - + "

List of 25 datasets(no reference available).

\n\n" + + "

List of " + + ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX + + " datasets(no reference available).

\n\n" + "| Dataset ID | Snapshot Key(Used Memory) | Reference | Latest |\n" + "|-----------------------------|-------------------|----------------|----------------|\n" + "
\n\n"); From 4bd6064872cdbfef5f2bea8519532bf4df35d57e Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Wed, 9 Oct 2024 17:33:00 -0400 Subject: [PATCH 24/30] add decreased memory comparator --- .../io/BoundedPriorityQueue.java | 56 ------------------ .../io/UsedMemoryDecreasedComparator.java | 57 +++++++++++++++++++ .../io/ValidationPerformanceCollector.java | 52 ++++++----------- .../cli/ValidationReportComparatorTest.java | 11 +--- 4 files changed, 76 insertions(+), 100 deletions(-) delete mode 100644 output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/BoundedPriorityQueue.java create mode 100644 output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryDecreasedComparator.java diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/BoundedPriorityQueue.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/BoundedPriorityQueue.java deleted file mode 100644 index 8e49308ffd..0000000000 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/BoundedPriorityQueue.java +++ /dev/null @@ -1,56 +0,0 @@ -package org.mobilitydata.gtfsvalidator.outputcomparator.io; - -import java.util.Comparator; -import java.util.PriorityQueue; - -/** - * A bounded priority queue that keeps the N smallest elements. If the queue is full and a new - * element is offered, the largest element is removed. The smallest element is computed using a - * comparator or its natural order. - * - * @param - */ -public class BoundedPriorityQueue extends PriorityQueue { - private final int maxCapacity; - - public BoundedPriorityQueue(int maxCapacity) { - super(); - if (maxCapacity <= 0) { - throw new IllegalArgumentException("Max capacity must be greater than zero"); - } - this.maxCapacity = maxCapacity; - } - - public BoundedPriorityQueue(int maxCapacity, int initialCapacity, Comparator comparator) { - super(initialCapacity, comparator); - if (maxCapacity <= 0) { - throw new IllegalArgumentException("Max capacity must be greater than zero"); - } - this.maxCapacity = maxCapacity; - } - - @Override - public boolean offer(E e) { - if (size() >= maxCapacity) { - E head = peek(); - if (head != null && compare(e, head) > 0) { - poll(); - } else { - return false; - } - } - return super.offer(e); - } - - @SuppressWarnings("unchecked") - private int compare(E a, E b) { - if (comparator() != null) { - return comparator().compare(a, b); - } - return ((Comparable) a).compareTo(b); - } - - public int getMaxCapacity() { - return maxCapacity; - } -} diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryDecreasedComparator.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryDecreasedComparator.java new file mode 100644 index 0000000000..899e2169ec --- /dev/null +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryDecreasedComparator.java @@ -0,0 +1,57 @@ +package org.mobilitydata.gtfsvalidator.outputcomparator.io; + +import java.util.Comparator; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +/** + * Comparator to compare two {@link DatasetMemoryUsage} objects based on the difference between the + * used memory of the two objects. The difference is calculated by comparing the used memory of the + * two objects for each key present in both objects. If a key is present in one object but not in + * the other, the key it is ignored. This comparator is used to sort DatasetMemoryUsage by the + * minimum difference between the used memory of the two. This means the order is by the dataset + * validation that decreased the memory. + */ +public class UsedMemoryDecreasedComparator implements Comparator { + + @Override + public int compare(DatasetMemoryUsage o1, DatasetMemoryUsage o2) { + if (o1 == o2) { + return 0; + } + if (o1 == null || o2 == null) { + return o1 == null ? -1 : 1; + } + if (o1.getReferenceMemoryUsage() == null + && o1.getLatestMemoryUsage() == null + && o2.getReferenceMemoryUsage() == null + && o2.getLatestMemoryUsage() == null) { + return 0; + } + if (o1.getReferenceMemoryUsage() == null || o2.getReferenceMemoryUsage() == null) { + return o1.getReferenceMemoryUsage() == null ? -1 : 1; + } + if (o1.getLatestMemoryUsage() == null || o2.getLatestMemoryUsage() == null) { + return o1.getLatestMemoryUsage() == null ? -1 : 1; + } + long o1MinDiff = + getMinDifferenceByKey(o1.getReferenceUsedMemoryByKey(), o1.getLatestUsedMemoryByKey()); + long o2MinDiff = + getMinDifferenceByKey(o2.getReferenceUsedMemoryByKey(), o2.getLatestUsedMemoryByKey()); + return Long.compare(o1MinDiff, o2MinDiff); + } + + private long getMinDifferenceByKey( + Map referenceMemoryUsage, Map latestMemoryUsage) { + Set keys = new HashSet<>(); + keys.addAll(latestMemoryUsage.keySet()); + keys.addAll(referenceMemoryUsage.keySet()); + return keys.stream() + .filter(key -> latestMemoryUsage.containsKey(key) && referenceMemoryUsage.containsKey(key)) + .filter(key -> latestMemoryUsage.get(key) - referenceMemoryUsage.get(key) != 0) + .mapToLong(key -> latestMemoryUsage.get(key) - referenceMemoryUsage.get(key)) + .max() + .orElse(Long.MAX_VALUE); + } +} diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java index 850cb0384a..b66c808b4e 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java @@ -11,24 +11,14 @@ public class ValidationPerformanceCollector { public static final int MEMORY_USAGE_COMPARE_MAX = 10; private final Map referenceTimes; private final Map latestTimes; - private final BoundedPriorityQueue datasetsDecreasedMemoryUsage; - private final BoundedPriorityQueue datasetsIncreasedMemoryUsage; private final List datasetsMemoryUsageNoReference; + private final List datasetsMemoryUsageWithReference; public ValidationPerformanceCollector() { this.referenceTimes = new HashMap<>(); this.latestTimes = new HashMap<>(); - this.datasetsDecreasedMemoryUsage = - new BoundedPriorityQueue<>( - MEMORY_USAGE_COMPARE_MAX, - MEMORY_USAGE_COMPARE_MAX, - (new UsedMemoryIncreasedComparator().reversed())); - this.datasetsIncreasedMemoryUsage = - new BoundedPriorityQueue<>( - MEMORY_USAGE_COMPARE_MAX, - MEMORY_USAGE_COMPARE_MAX, - new UsedMemoryIncreasedComparator()); this.datasetsMemoryUsageNoReference = new ArrayList<>(); + this.datasetsMemoryUsageWithReference = new ArrayList<>(); } public void addReferenceTime(String sourceId, Double time) { @@ -211,24 +201,26 @@ public String generateLogString() { b.append("\n\n"); - if (datasetsIncreasedMemoryUsage.size() > 0 - || datasetsDecreasedMemoryUsage.size() > 0 - || datasetsMemoryUsageNoReference.size() > 0) { + if (datasetsMemoryUsageWithReference.size() > 0) { b.append("
\n"); b.append("📜 Memory Consumption\n"); - if (datasetsIncreasedMemoryUsage.size() > 0) { - List increasedMemoryUsages = - getDatasetMemoryUsages(datasetsIncreasedMemoryUsage); - addMemoryUsageReport(increasedMemoryUsages, "memory has increased", b, true); - } - if (datasetsDecreasedMemoryUsage.size() > 0) { - List decreasedMemoryUsages = - getDatasetMemoryUsages(datasetsDecreasedMemoryUsage); - addMemoryUsageReport(decreasedMemoryUsages, "memory has decreased", b, true); + if (datasetsMemoryUsageWithReference.size() > 0) { + datasetsMemoryUsageWithReference.sort(new UsedMemoryIncreasedComparator()); + addMemoryUsageReport( + datasetsMemoryUsageWithReference.subList( + 0, Math.min(datasetsMemoryUsageWithReference.size(), MEMORY_USAGE_COMPARE_MAX)), + "memory has increased", + b, + true); + datasetsMemoryUsageWithReference.sort(new UsedMemoryDecreasedComparator()); + var decreasedList = + datasetsMemoryUsageWithReference.subList( + 0, Math.min(datasetsMemoryUsageWithReference.size(), MEMORY_USAGE_COMPARE_MAX)); + addMemoryUsageReport(decreasedList, "memory has decreased", b, true); } if (datasetsMemoryUsageNoReference.size() > 0) { // Sorting from the highest to the lowest memory usage - datasetsMemoryUsageNoReference.sort((new LatestReportUsedMemoryComparator()).reversed()); + datasetsMemoryUsageNoReference.sort(new UsedMemoryDecreasedComparator()); addMemoryUsageReport( datasetsMemoryUsageNoReference.subList( 0, Math.min(datasetsMemoryUsageNoReference.size(), MEMORY_USAGE_COMPARE_MAX)), @@ -241,13 +233,6 @@ public String generateLogString() { return b.toString(); } - private List getDatasetMemoryUsages( - BoundedPriorityQueue datasetsMemoryUsage) { - List increasedMemoryUsages = new ArrayList<>(datasetsMemoryUsage); - increasedMemoryUsages.sort(datasetsMemoryUsage.comparator()); - return increasedMemoryUsages; - } - private void addMemoryUsageReport( List memoryUsages, String order, @@ -332,8 +317,7 @@ private void compareValidationReportMemoryUsage( && referenceReport.getMemoryUsageRecords().size() > 0 && latestReport.getMemoryUsageRecords() != null && latestReport.getMemoryUsageRecords().size() > 0) { - datasetsIncreasedMemoryUsage.offer(datasetMemoryUsage); - datasetsDecreasedMemoryUsage.offer(datasetMemoryUsage); + datasetsMemoryUsageWithReference.add(datasetMemoryUsage); } else { datasetsMemoryUsageNoReference.add(datasetMemoryUsage); } diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java index 90282a06ff..18e8d66e56 100644 --- a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java +++ b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java @@ -18,7 +18,6 @@ import org.mobilitydata.gtfsvalidator.notice.NoticeContainer; import org.mobilitydata.gtfsvalidator.notice.ValidationNotice; import org.mobilitydata.gtfsvalidator.outputcomparator.cli.ValidationReportComparator.Result; -import org.mobilitydata.gtfsvalidator.outputcomparator.io.ValidationPerformanceCollector; import org.mobilitydata.gtfsvalidator.outputcomparator.model.SourceUrlContainer; import org.mobilitydata.gtfsvalidator.outputcomparator.model.report.AcceptanceReport; import org.mobilitydata.gtfsvalidator.outputcomparator.model.report.AffectedSource; @@ -142,15 +141,7 @@ public void addedErrorNotice_summaryString() throws Exception { + "\n" + "| Time Metric | Dataset ID | Reference (s) | Latest (s) | Difference (s) |\n" + "|-----------------------------|-------------------|----------------|----------------|----------------|\n" - + "
\n\n" - + "
\n" - + "📜 Memory Consumption\n" - + "

List of " - + ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX - + " datasets(no reference available).

\n\n" - + "| Dataset ID | Snapshot Key(Used Memory) | Reference | Latest |\n" - + "|-----------------------------|-------------------|----------------|----------------|\n" - + "
\n\n"); + + "\n\n\n"); } @Test From 053cdbea6e1b1e59d393f7dc11e54e76eb277b8b Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Thu, 10 Oct 2024 14:48:44 -0400 Subject: [PATCH 25/30] add memory metrics --- .../io/ValidationPerformanceCollector.java | 276 +++++++++++++----- .../ValidationPerformanceCollectorTest.java | 44 ++- 2 files changed, 238 insertions(+), 82 deletions(-) diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java index b66c808b4e..eaa9621c16 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java @@ -2,6 +2,8 @@ import java.util.*; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Function; +import java.util.stream.Collectors; import org.mobilitydata.gtfsvalidator.model.ValidationReport; import org.mobilitydata.gtfsvalidator.outputcomparator.model.report.ValidationPerformance; import org.mobilitydata.gtfsvalidator.performance.MemoryUsage; @@ -9,6 +11,8 @@ public class ValidationPerformanceCollector { public static final int MEMORY_USAGE_COMPARE_MAX = 10; + public static final String MEMORY_PIVOT_KEY = + "org.mobilitydata.gtfsvalidator.table.GtfsFeedLoader.loadAndValidate"; private final Map referenceTimes; private final Map latestTimes; private final List datasetsMemoryUsageNoReference; @@ -29,11 +33,11 @@ public void addLatestTime(String sourceId, Double time) { latestTimes.put(sourceId, time); } - private Double computeAverage(List times) { + private Double computeAverage(Collection times) { return times.stream().mapToDouble(Double::doubleValue).average().orElse(Double.NaN); } - private Double computeMedian(List times) { + private Double computeMedian(Collection times) { if (times.isEmpty()) { return Double.NaN; } @@ -49,31 +53,37 @@ private Double computeMedian(List times) { return median; } - private Double computeStandardDeviation(List times) { + private Double computeStandardDeviation(Collection times) { double mean = computeAverage(times); return Math.sqrt( times.stream().mapToDouble(time -> Math.pow(time - mean, 2)).average().orElse(Double.NaN)); } - private Double computeMax(List times) { + private Double computeMax(Collection times) { return times.stream().mapToDouble(Double::doubleValue).max().orElse(Double.NaN); } - private Double computeMin(List times) { + private Double computeMin(Collection times) { return times.stream().mapToDouble(Double::doubleValue).min().orElse(Double.NaN); } - private String formatMetrics(String metric, String datasetId, Double reference, Double latest) { + private String formatMetrics( + String metric, + String datasetId, + Double reference, + Double latest, + Function render) { String diff; if (reference.isNaN() || latest.isNaN()) { diff = "N/A"; } else { double difference = latest - reference; String arrow = difference > 0 ? "⬆️+" : "⬇️"; - diff = String.format("%s%.2f", arrow, difference); + diff = String.format("%s%s", arrow, render.apply(difference)); } return String.format( - "| %s | %s | %.2f | %.2f | %s |\n", metric, datasetId, reference, latest, diff); + "| %s | %s | %s | %s | %s |\n", + metric, datasetId, render.apply(reference), render.apply(latest), diff); } private static String getMemoryDiff(Long reference, Long latest) { @@ -124,70 +134,8 @@ public String generateLogString() { allLatestTimes.add(latestTimes); } - if (!allReferenceTimes.isEmpty() && !allLatestTimes.isEmpty()) { - Double avgReference = computeAverage(allReferenceTimes); - Double avgLatest = computeAverage(allLatestTimes); - Double medianReference = computeMedian(allReferenceTimes); - Double medianLatest = computeMedian(allLatestTimes); - Double stdDevReference = computeStandardDeviation(allReferenceTimes); - Double stdDevLatest = computeStandardDeviation(allLatestTimes); - - b.append(formatMetrics("Average", "--", avgReference, avgLatest)) - .append(formatMetrics("Median", "--", medianReference, medianLatest)) - .append(formatMetrics("Standard Deviation", "--", stdDevReference, stdDevLatest)); - } - - if (!allReferenceTimes.isEmpty()) { - Double minReference = computeMin(allReferenceTimes); - String minReferenceId = - referenceTimes.entrySet().stream() - .filter(entry -> Objects.equals(entry.getValue(), minReference)) - .map(Map.Entry::getKey) - .findFirst() - .orElse("N/A"); - - Double maxReference = computeMax(allReferenceTimes); - String maxReferenceId = - referenceTimes.entrySet().stream() - .filter(entry -> Objects.equals(entry.getValue(), maxReference)) - .map(Map.Entry::getKey) - .findFirst() - .orElse("N/A"); - - Double minLatest = latestTimes.getOrDefault(minReferenceId, Double.NaN); - Double maxLatest = latestTimes.getOrDefault(maxReferenceId, Double.NaN); - - b.append( - formatMetrics( - "Minimum in References Reports", minReferenceId, minReference, minLatest)) - .append( - formatMetrics( - "Maximum in Reference Reports", maxReferenceId, maxReference, maxLatest)); - } - - if (!allLatestTimes.isEmpty()) { - Double minLatest = computeMin(allLatestTimes); - String minLatestId = - latestTimes.entrySet().stream() - .filter(entry -> Objects.equals(entry.getValue(), minLatest)) - .map(Map.Entry::getKey) - .findFirst() - .orElse("N/A"); - - Double maxLatest = computeMax(allLatestTimes); - String maxLatestId = - latestTimes.entrySet().stream() - .filter(entry -> Objects.equals(entry.getValue(), maxLatest)) - .map(Map.Entry::getKey) - .findFirst() - .orElse("N/A"); - - Double minReference = referenceTimes.getOrDefault(minLatestId, Double.NaN); - Double maxReference = referenceTimes.getOrDefault(maxLatestId, Double.NaN); - - b.append(formatMetrics("Minimum in Latest Reports", minLatestId, minReference, minLatest)) - .append(formatMetrics("Maximum in Latest Reports", maxLatestId, maxReference, maxLatest)); - } + generatePerformanceMetricsLog( + referenceTimes, latestTimes, b, value -> String.format("%.2f", value)); // Add warning message for feeds that are missing validation times either in reference or latest if (!warnings.isEmpty()) { @@ -202,8 +150,48 @@ public String generateLogString() { b.append("\n\n"); if (datasetsMemoryUsageWithReference.size() > 0) { + Map referenceMemoryUsageById = + datasetsMemoryUsageWithReference.stream() + .filter( + datasetMemoryUsage -> + datasetMemoryUsage.getReferenceUsedMemoryByKey().get(MEMORY_PIVOT_KEY) + != null) + .collect( + Collectors.toMap( + DatasetMemoryUsage::getDatasetId, + datasetMemoryUsage -> + datasetMemoryUsage + .getReferenceUsedMemoryByKey() + .get(MEMORY_PIVOT_KEY) + .doubleValue())); + Map latestMemoryUsageById = + datasetsMemoryUsageWithReference.stream() + .filter( + datasetMemoryUsage -> + datasetMemoryUsage.getLatestUsedMemoryByKey().get(MEMORY_PIVOT_KEY) != null) + .collect( + Collectors.toMap( + DatasetMemoryUsage::getDatasetId, + datasetMemoryUsage -> + datasetMemoryUsage + .getLatestUsedMemoryByKey() + .get(MEMORY_PIVOT_KEY) + .doubleValue())); + b.append("
\n"); b.append("📜 Memory Consumption\n"); + + b.append( + "| Metric | Dataset ID | Reference (s) | Latest (s) | Difference (s) |\n") + .append( + "|-----------------------------|-------------------|----------------|----------------|----------------|\n"); + + generatePerformanceMetricsLog( + referenceMemoryUsageById, + latestMemoryUsageById, + b, + ValidationPerformanceCollector::convertToHumanReadableMemory); + if (datasetsMemoryUsageWithReference.size() > 0) { datasetsMemoryUsageWithReference.sort(new UsedMemoryIncreasedComparator()); addMemoryUsageReport( @@ -233,6 +221,76 @@ public String generateLogString() { return b.toString(); } + private void generatePerformanceMetricsLog( + Map references, + Map latests, + StringBuilder b, + Function render) { + PerformanceMetrics performanceMetrics = computeMetrics(references, latests); + if (!references.isEmpty() && !latests.isEmpty()) { + b.append( + formatMetrics( + "Average", + "--", + performanceMetrics.avgReference, + performanceMetrics.avgLatest, + render)) + .append( + formatMetrics( + "Median", + "--", + performanceMetrics.medianReference, + performanceMetrics.medianLatest, + render)) + .append( + formatMetrics( + "Standard Deviation", + "--", + performanceMetrics.stdDevReference, + performanceMetrics.stdDevLatest, + render)); + } + + if (!references.isEmpty()) { + Double minLatest = latestTimes.getOrDefault(performanceMetrics.minReferenceId, Double.NaN); + Double maxLatest = latestTimes.getOrDefault(performanceMetrics.maxReferenceId, Double.NaN); + b.append( + formatMetrics( + "Minimum in References Reports", + performanceMetrics.minReferenceId, + performanceMetrics.minReference, + minLatest, + render)) + .append( + formatMetrics( + "Maximum in Reference Reports", + performanceMetrics.maxReferenceId, + performanceMetrics.maxReference, + maxLatest, + render)); + } + + if (!latests.isEmpty()) { + Double minReference = referenceTimes.getOrDefault(performanceMetrics.minLatestId, Double.NaN); + Double maxReference = referenceTimes.getOrDefault(performanceMetrics.maxLatestId, Double.NaN); + + b.append( + formatMetrics( + "Minimum in Latest Reports", + performanceMetrics.minLatestId, + minReference, + performanceMetrics.minLatest, + render)) + .append( + formatMetrics( + "Maximum in Latest Reports", + performanceMetrics.maxLatestId, + maxReference, + performanceMetrics.maxLatest, + render)); + } + } + private void addMemoryUsageReport( List memoryUsages, String order, @@ -336,4 +394,78 @@ public List toReport() { } return affectedSources; } + + private PerformanceMetrics computeMetrics( + Map allReferencesMap, Map allLatestsMap) { + Collection allReferences = allReferencesMap.values(); + Collection allLatest = allLatestsMap.values(); + PerformanceMetrics performanceMetrics = new PerformanceMetrics(); + if (!allReferences.isEmpty() && !allLatest.isEmpty()) { + performanceMetrics.avgReference = computeAverage(allReferences); + performanceMetrics.avgLatest = computeAverage(allLatest); + performanceMetrics.medianReference = computeMedian(allReferences); + performanceMetrics.medianLatest = computeMedian(allLatest); + performanceMetrics.stdDevReference = computeStandardDeviation(allReferences); + performanceMetrics.stdDevLatest = computeStandardDeviation(allLatest); + } + + if (!allReferences.isEmpty()) { + performanceMetrics.minReference = computeMin(allReferences); + performanceMetrics.minReferenceId = + referenceTimes.entrySet().stream() + .filter(entry -> Objects.equals(entry.getValue(), performanceMetrics.minReference)) + .map(Map.Entry::getKey) + .findFirst() + .orElse("N/A"); + + performanceMetrics.maxReference = computeMax(allReferences); + performanceMetrics.maxReferenceId = + referenceTimes.entrySet().stream() + .filter(entry -> Objects.equals(entry.getValue(), performanceMetrics.maxReference)) + .map(Map.Entry::getKey) + .findFirst() + .orElse("N/A"); + } + + if (!allLatest.isEmpty()) { + performanceMetrics.minLatest = computeMin(allLatest); + performanceMetrics.minLatestId = + latestTimes.entrySet().stream() + .filter(entry -> Objects.equals(entry.getValue(), performanceMetrics.minLatest)) + .map(Map.Entry::getKey) + .findFirst() + .orElse("N/A"); + + performanceMetrics.maxLatest = computeMax(allLatest); + performanceMetrics.maxLatestId = + latestTimes.entrySet().stream() + .filter(entry -> Objects.equals(entry.getValue(), performanceMetrics.maxLatest)) + .map(Map.Entry::getKey) + .findFirst() + .orElse("N/A"); + } + return performanceMetrics; + } + + private static String convertToHumanReadableMemory(Double bytes) { + // Ignoring the decimals in bytes + return MemoryUsage.convertToHumanReadableMemory(bytes.longValue()); + } +} + +class PerformanceMetrics { + Double minReference; + String minReferenceId; + Double maxReference; + Double minLatest; + Double maxLatest; + String minLatestId; + String maxLatestId; + String maxReferenceId; + Double avgReference; + Double avgLatest; + Double medianReference; + Double medianLatest; + Double stdDevReference; + Double stdDevLatest; } diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java index 82c204b20c..76ccce5d69 100644 --- a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java +++ b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java @@ -32,7 +32,12 @@ public void generateLogString_test() { Collections.EMPTY_SET, null, Arrays.asList( - new MemoryUsage("key1", baseMemory, baseMemory + baseMemory * 10, 200, 50L), + new MemoryUsage( + ValidationPerformanceCollector.MEMORY_PIVOT_KEY, + baseMemory, + baseMemory + baseMemory * 10, + 200, + 50L), new MemoryUsage("key2", baseMemory, baseMemory, 200, 50L))), new ValidationReport(Collections.EMPTY_SET, 16.0, Collections.EMPTY_LIST)); // Memory usage increased as there is less free memory @@ -42,13 +47,23 @@ public void generateLogString_test() { Collections.EMPTY_SET, null, Arrays.asList( - new MemoryUsage("key1", baseMemory, baseMemory, 200, 50L), + new MemoryUsage( + ValidationPerformanceCollector.MEMORY_PIVOT_KEY, + baseMemory, + baseMemory, + 200, + 50L), new MemoryUsage("key2", baseMemory, baseMemory, 200, 50L))), new ValidationReport( Collections.EMPTY_SET, null, Arrays.asList( - new MemoryUsage("key1", baseMemory, baseMemory - baseMemory / 2, 200, null), + new MemoryUsage( + ValidationPerformanceCollector.MEMORY_PIVOT_KEY, + baseMemory, + baseMemory - baseMemory / 2, + 200, + null), new MemoryUsage("key2", baseMemory, baseMemory - baseMemory / 2, 200, null)))); // // Memory usage decreased as there is more free memory @@ -77,12 +92,12 @@ public void generateLogString_test() { + "\n" + "| Time Metric | Dataset ID | Reference (s) | Latest (s) | Difference (s) |\n" + "|-----------------------------|-------------------|----------------|----------------|----------------|\n" - + "| Average | -- | 17.00 | 20.00 | ⬆\uFE0F+3.00 |\n" - + "| Median | -- | 17.00 | 20.00 | ⬆\uFE0F+3.00 |\n" - + "| Standard Deviation | -- | 3.00 | 2.00 | ⬇\uFE0F-1.00 |\n" + + "| Average | -- | 17.00 | 18.67 | ⬆\uFE0F+1.67 |\n" + + "| Median | -- | 17.00 | 18.00 | ⬆\uFE0F+1.00 |\n" + + "| Standard Deviation | -- | 3.00 | 2.49 | ⬇\uFE0F-0.51 |\n" + "| Minimum in References Reports | feed-id-a | 14.00 | 18.00 | ⬆\uFE0F+4.00 |\n" + "| Maximum in Reference Reports | feed-id-b | 20.00 | 22.00 | ⬆️+2.00 |\n" - + "| Minimum in Latest Reports | feed-id-a | 14.00 | 18.00 | ⬆\uFE0F+4.00 |\n" + + "| Minimum in Latest Reports | feed-id-m1 | NaN | 16.00 | N/A |\n" + "| Maximum in Latest Reports | feed-id-b | 20.00 | 22.00 | ⬆️+2.00 |\n" + "#### ⚠️ Warnings\n\n" + "The following dataset IDs are missing validation times either in reference or latest:\n" @@ -90,14 +105,23 @@ public void generateLogString_test() { + "
\n\n" + "
\n" + "📜 Memory Consumption\n" + + "| Metric | Dataset ID | Reference (s) | Latest (s) | Difference (s) |\n" + + "|-----------------------------|-------------------|----------------|----------------|----------------|\n" + + "| Average | -- | 0 bytes | 488.28 KiB | ⬆️+488.28 KiB |\n" + + "| Median | -- | 0 bytes | 488.28 KiB | ⬆️+488.28 KiB |\n" + + "| Standard Deviation | -- | 0 bytes | 0 bytes | ⬇️0 bytes |\n" + + "| Minimum in References Reports | N/A | 0 bytes | 0 bytes | N/A |\n" + + "| Maximum in Reference Reports | N/A | 0 bytes | 0 bytes | N/A |\n" + + "| Minimum in Latest Reports | N/A | 0 bytes | 488.28 KiB | N/A |\n" + + "| Maximum in Latest Reports | N/A | 0 bytes | 488.28 KiB | N/A |\n" + "

List of " + ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX + " datasets(memory has increased).

\n\n" + "| Dataset ID | Snapshot Key(Used Memory) | Reference | Latest | Difference |\n" + "|-----------------------------|-------------------|----------------|----------------|----------------|\n" + "| feed-id-m2 | | | | |\n" - + "| | key1 | 0 bytes | 488.28 KiB | ⬆\uFE0F+488.28 KiB |\n" + "| | key2 | 0 bytes | 488.28 KiB | ⬆\uFE0F+488.28 KiB |\n" + + "| | org.mobilitydata.gtfsvalidator.table.GtfsFeedLoader.loadAndValidate | 0 bytes | 488.28 KiB | ⬆\uFE0F+488.28 KiB |\n" + "| feed-id-m3 | | | | |\n" + "| | key3 | -100 bytes | -976.56 KiB | ⬇\uFE0F-976.46 KiB |\n" + "| | key4 | -100 bytes | -976.56 KiB | ⬇\uFE0F-976.46 KiB |\n" @@ -110,16 +134,16 @@ public void generateLogString_test() { + "| | key3 | -100 bytes | -976.56 KiB | ⬇️-976.46 KiB |\n" + "| | key4 | -100 bytes | -976.56 KiB | ⬇️-976.46 KiB |\n" + "| feed-id-m2 | | | | |\n" - + "| | key1 | 0 bytes | 488.28 KiB | ⬆️+488.28 KiB |\n" + "| | key2 | 0 bytes | 488.28 KiB | ⬆️+488.28 KiB |\n" + + "| | org.mobilitydata.gtfsvalidator.table.GtfsFeedLoader.loadAndValidate | 0 bytes | 488.28 KiB | ⬆️+488.28 KiB |\n" + "

List of " + ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX + " datasets(no reference available).

\n\n" + "| Dataset ID | Snapshot Key(Used Memory) | Reference | Latest |\n" + "|-----------------------------|-------------------|----------------|----------------|\n" + "| feed-id-m1 | | | |\n" - + "| | key1 | -9.54 MiB | N/A |\n" + "| | key2 | 0 bytes | N/A |\n" + + "| | org.mobilitydata.gtfsvalidator.table.GtfsFeedLoader.loadAndValidate | -9.54 MiB | N/A |\n" + "
\n"; // Assert that the generated log string matches the expected log string assertThat(logString).isEqualTo(expectedLogString); From 2513c234897ef135e1e3e6b774008eb95cf2429c Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Thu, 10 Oct 2024 15:39:17 -0400 Subject: [PATCH 26/30] fix comment formatting --- .../io/ValidationPerformanceCollector.java | 16 ++++++++-------- .../io/ValidationPerformanceCollectorTest.java | 10 +++++----- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java index eaa9621c16..f62dcdfac7 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java @@ -179,7 +179,7 @@ public String generateLogString() { .doubleValue())); b.append("
\n"); - b.append("📜 Memory Consumption\n"); + b.append("📜 Memory Consumption\n\n"); b.append( "| Metric | Dataset ID | Reference (s) | Latest (s) | Difference (s) |\n") @@ -396,9 +396,9 @@ public List toReport() { } private PerformanceMetrics computeMetrics( - Map allReferencesMap, Map allLatestsMap) { - Collection allReferences = allReferencesMap.values(); - Collection allLatest = allLatestsMap.values(); + Map referencesById, Map latestsById) { + Collection allReferences = referencesById.values(); + Collection allLatest = latestsById.values(); PerformanceMetrics performanceMetrics = new PerformanceMetrics(); if (!allReferences.isEmpty() && !allLatest.isEmpty()) { performanceMetrics.avgReference = computeAverage(allReferences); @@ -412,7 +412,7 @@ private PerformanceMetrics computeMetrics( if (!allReferences.isEmpty()) { performanceMetrics.minReference = computeMin(allReferences); performanceMetrics.minReferenceId = - referenceTimes.entrySet().stream() + referencesById.entrySet().stream() .filter(entry -> Objects.equals(entry.getValue(), performanceMetrics.minReference)) .map(Map.Entry::getKey) .findFirst() @@ -420,7 +420,7 @@ private PerformanceMetrics computeMetrics( performanceMetrics.maxReference = computeMax(allReferences); performanceMetrics.maxReferenceId = - referenceTimes.entrySet().stream() + referencesById.entrySet().stream() .filter(entry -> Objects.equals(entry.getValue(), performanceMetrics.maxReference)) .map(Map.Entry::getKey) .findFirst() @@ -430,7 +430,7 @@ private PerformanceMetrics computeMetrics( if (!allLatest.isEmpty()) { performanceMetrics.minLatest = computeMin(allLatest); performanceMetrics.minLatestId = - latestTimes.entrySet().stream() + latestsById.entrySet().stream() .filter(entry -> Objects.equals(entry.getValue(), performanceMetrics.minLatest)) .map(Map.Entry::getKey) .findFirst() @@ -438,7 +438,7 @@ private PerformanceMetrics computeMetrics( performanceMetrics.maxLatest = computeMax(allLatest); performanceMetrics.maxLatestId = - latestTimes.entrySet().stream() + latestsById.entrySet().stream() .filter(entry -> Objects.equals(entry.getValue(), performanceMetrics.maxLatest)) .map(Map.Entry::getKey) .findFirst() diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java index 76ccce5d69..a5ba6fb01b 100644 --- a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java +++ b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java @@ -104,16 +104,16 @@ public void generateLogString_test() { + "feed-id-m1\n\n" + "
\n\n" + "
\n" - + "📜 Memory Consumption\n" + + "📜 Memory Consumption\n\n" + "| Metric | Dataset ID | Reference (s) | Latest (s) | Difference (s) |\n" + "|-----------------------------|-------------------|----------------|----------------|----------------|\n" + "| Average | -- | 0 bytes | 488.28 KiB | ⬆️+488.28 KiB |\n" + "| Median | -- | 0 bytes | 488.28 KiB | ⬆️+488.28 KiB |\n" + "| Standard Deviation | -- | 0 bytes | 0 bytes | ⬇️0 bytes |\n" - + "| Minimum in References Reports | N/A | 0 bytes | 0 bytes | N/A |\n" - + "| Maximum in Reference Reports | N/A | 0 bytes | 0 bytes | N/A |\n" - + "| Minimum in Latest Reports | N/A | 0 bytes | 488.28 KiB | N/A |\n" - + "| Maximum in Latest Reports | N/A | 0 bytes | 488.28 KiB | N/A |\n" + + "| Minimum in References Reports | feed-id-m2 | 0 bytes | 0 bytes | N/A |\n" + + "| Maximum in Reference Reports | feed-id-m2 | 0 bytes | 0 bytes | N/A |\n" + + "| Minimum in Latest Reports | feed-id-m2 | 0 bytes | 488.28 KiB | N/A |\n" + + "| Maximum in Latest Reports | feed-id-m2 | 0 bytes | 488.28 KiB | N/A |\n" + "

List of " + ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX + " datasets(memory has increased).

\n\n" From 8a304bb083ef96280aafb1794a42164ecf144417 Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Thu, 10 Oct 2024 17:30:56 -0400 Subject: [PATCH 27/30] fix invalid references --- .../io/ValidationPerformanceCollector.java | 8 ++++---- .../io/ValidationPerformanceCollectorTest.java | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java index f62dcdfac7..f0123987ec 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java @@ -252,8 +252,8 @@ private void generatePerformanceMetricsLog( } if (!references.isEmpty()) { - Double minLatest = latestTimes.getOrDefault(performanceMetrics.minReferenceId, Double.NaN); - Double maxLatest = latestTimes.getOrDefault(performanceMetrics.maxReferenceId, Double.NaN); + Double minLatest = latests.getOrDefault(performanceMetrics.minReferenceId, Double.NaN); + Double maxLatest = latests.getOrDefault(performanceMetrics.maxReferenceId, Double.NaN); b.append( formatMetrics( "Minimum in References Reports", @@ -271,8 +271,8 @@ private void generatePerformanceMetricsLog( } if (!latests.isEmpty()) { - Double minReference = referenceTimes.getOrDefault(performanceMetrics.minLatestId, Double.NaN); - Double maxReference = referenceTimes.getOrDefault(performanceMetrics.maxLatestId, Double.NaN); + Double minReference = references.getOrDefault(performanceMetrics.minLatestId, Double.NaN); + Double maxReference = references.getOrDefault(performanceMetrics.maxLatestId, Double.NaN); b.append( formatMetrics( diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java index a5ba6fb01b..8ffd3c6fb9 100644 --- a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java +++ b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java @@ -110,10 +110,10 @@ public void generateLogString_test() { + "| Average | -- | 0 bytes | 488.28 KiB | ⬆️+488.28 KiB |\n" + "| Median | -- | 0 bytes | 488.28 KiB | ⬆️+488.28 KiB |\n" + "| Standard Deviation | -- | 0 bytes | 0 bytes | ⬇️0 bytes |\n" - + "| Minimum in References Reports | feed-id-m2 | 0 bytes | 0 bytes | N/A |\n" - + "| Maximum in Reference Reports | feed-id-m2 | 0 bytes | 0 bytes | N/A |\n" - + "| Minimum in Latest Reports | feed-id-m2 | 0 bytes | 488.28 KiB | N/A |\n" - + "| Maximum in Latest Reports | feed-id-m2 | 0 bytes | 488.28 KiB | N/A |\n" + + "| Minimum in References Reports | feed-id-m2 | 0 bytes | 488.28 KiB | ⬆\uFE0F+488.28 KiB |\n" + + "| Maximum in Reference Reports | feed-id-m2 | 0 bytes | 488.28 KiB | ⬆\uFE0F+488.28 KiB |\n" + + "| Minimum in Latest Reports | feed-id-m2 | 0 bytes | 488.28 KiB | ⬆\uFE0F+488.28 KiB |\n" + + "| Maximum in Latest Reports | feed-id-m2 | 0 bytes | 488.28 KiB | ⬆\uFE0F+488.28 KiB |\n" + "

List of " + ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX + " datasets(memory has increased).

\n\n" From ce54a4d03ddcfb1b0c80ad6b59ed530c45f7b933 Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Fri, 11 Oct 2024 10:54:27 -0400 Subject: [PATCH 28/30] remove memory full list --- .../io/ValidationPerformanceCollector.java | 25 ---------------- .../ValidationPerformanceCollectorTest.java | 30 ------------------- 2 files changed, 55 deletions(-) diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java index f0123987ec..656a6b017a 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java @@ -191,31 +191,6 @@ public String generateLogString() { latestMemoryUsageById, b, ValidationPerformanceCollector::convertToHumanReadableMemory); - - if (datasetsMemoryUsageWithReference.size() > 0) { - datasetsMemoryUsageWithReference.sort(new UsedMemoryIncreasedComparator()); - addMemoryUsageReport( - datasetsMemoryUsageWithReference.subList( - 0, Math.min(datasetsMemoryUsageWithReference.size(), MEMORY_USAGE_COMPARE_MAX)), - "memory has increased", - b, - true); - datasetsMemoryUsageWithReference.sort(new UsedMemoryDecreasedComparator()); - var decreasedList = - datasetsMemoryUsageWithReference.subList( - 0, Math.min(datasetsMemoryUsageWithReference.size(), MEMORY_USAGE_COMPARE_MAX)); - addMemoryUsageReport(decreasedList, "memory has decreased", b, true); - } - if (datasetsMemoryUsageNoReference.size() > 0) { - // Sorting from the highest to the lowest memory usage - datasetsMemoryUsageNoReference.sort(new UsedMemoryDecreasedComparator()); - addMemoryUsageReport( - datasetsMemoryUsageNoReference.subList( - 0, Math.min(datasetsMemoryUsageNoReference.size(), MEMORY_USAGE_COMPARE_MAX)), - "no reference available", - b, - false); - } b.append("
\n"); } return b.toString(); diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java index 8ffd3c6fb9..12a13e31b8 100644 --- a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java +++ b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java @@ -114,36 +114,6 @@ public void generateLogString_test() { + "| Maximum in Reference Reports | feed-id-m2 | 0 bytes | 488.28 KiB | ⬆\uFE0F+488.28 KiB |\n" + "| Minimum in Latest Reports | feed-id-m2 | 0 bytes | 488.28 KiB | ⬆\uFE0F+488.28 KiB |\n" + "| Maximum in Latest Reports | feed-id-m2 | 0 bytes | 488.28 KiB | ⬆\uFE0F+488.28 KiB |\n" - + "

List of " - + ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX - + " datasets(memory has increased).

\n\n" - + "| Dataset ID | Snapshot Key(Used Memory) | Reference | Latest | Difference |\n" - + "|-----------------------------|-------------------|----------------|----------------|----------------|\n" - + "| feed-id-m2 | | | | |\n" - + "| | key2 | 0 bytes | 488.28 KiB | ⬆\uFE0F+488.28 KiB |\n" - + "| | org.mobilitydata.gtfsvalidator.table.GtfsFeedLoader.loadAndValidate | 0 bytes | 488.28 KiB | ⬆\uFE0F+488.28 KiB |\n" - + "| feed-id-m3 | | | | |\n" - + "| | key3 | -100 bytes | -976.56 KiB | ⬇\uFE0F-976.46 KiB |\n" - + "| | key4 | -100 bytes | -976.56 KiB | ⬇\uFE0F-976.46 KiB |\n" - + "

List of " - + ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX - + " datasets(memory has decreased).

\n\n" - + "| Dataset ID | Snapshot Key(Used Memory) | Reference | Latest | Difference |\n" - + "|-----------------------------|-------------------|----------------|----------------|----------------|\n" - + "| feed-id-m3 | | | | |\n" - + "| | key3 | -100 bytes | -976.56 KiB | ⬇️-976.46 KiB |\n" - + "| | key4 | -100 bytes | -976.56 KiB | ⬇️-976.46 KiB |\n" - + "| feed-id-m2 | | | | |\n" - + "| | key2 | 0 bytes | 488.28 KiB | ⬆️+488.28 KiB |\n" - + "| | org.mobilitydata.gtfsvalidator.table.GtfsFeedLoader.loadAndValidate | 0 bytes | 488.28 KiB | ⬆️+488.28 KiB |\n" - + "

List of " - + ValidationPerformanceCollector.MEMORY_USAGE_COMPARE_MAX - + " datasets(no reference available).

\n\n" - + "| Dataset ID | Snapshot Key(Used Memory) | Reference | Latest |\n" - + "|-----------------------------|-------------------|----------------|----------------|\n" - + "| feed-id-m1 | | | |\n" - + "| | key2 | 0 bytes | N/A |\n" - + "| | org.mobilitydata.gtfsvalidator.table.GtfsFeedLoader.loadAndValidate | -9.54 MiB | N/A |\n" + "\n"; // Assert that the generated log string matches the expected log string assertThat(logString).isEqualTo(expectedLogString); From b372c2c2d8ebc5fc47a04f123fd0ced6034b293a Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Fri, 11 Oct 2024 11:41:35 -0400 Subject: [PATCH 29/30] delete unused comparators --- .../io/UsedMemoryDecreasedComparator.java | 57 ------------------ .../io/UsedMemoryIncreasedComparator.java | 58 ------------------ .../MemoryUsageUsedMemoryComparatorTest.java | 60 ------------------- 3 files changed, 175 deletions(-) delete mode 100644 output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryDecreasedComparator.java delete mode 100644 output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java delete mode 100644 output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/MemoryUsageUsedMemoryComparatorTest.java diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryDecreasedComparator.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryDecreasedComparator.java deleted file mode 100644 index 899e2169ec..0000000000 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryDecreasedComparator.java +++ /dev/null @@ -1,57 +0,0 @@ -package org.mobilitydata.gtfsvalidator.outputcomparator.io; - -import java.util.Comparator; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; - -/** - * Comparator to compare two {@link DatasetMemoryUsage} objects based on the difference between the - * used memory of the two objects. The difference is calculated by comparing the used memory of the - * two objects for each key present in both objects. If a key is present in one object but not in - * the other, the key it is ignored. This comparator is used to sort DatasetMemoryUsage by the - * minimum difference between the used memory of the two. This means the order is by the dataset - * validation that decreased the memory. - */ -public class UsedMemoryDecreasedComparator implements Comparator { - - @Override - public int compare(DatasetMemoryUsage o1, DatasetMemoryUsage o2) { - if (o1 == o2) { - return 0; - } - if (o1 == null || o2 == null) { - return o1 == null ? -1 : 1; - } - if (o1.getReferenceMemoryUsage() == null - && o1.getLatestMemoryUsage() == null - && o2.getReferenceMemoryUsage() == null - && o2.getLatestMemoryUsage() == null) { - return 0; - } - if (o1.getReferenceMemoryUsage() == null || o2.getReferenceMemoryUsage() == null) { - return o1.getReferenceMemoryUsage() == null ? -1 : 1; - } - if (o1.getLatestMemoryUsage() == null || o2.getLatestMemoryUsage() == null) { - return o1.getLatestMemoryUsage() == null ? -1 : 1; - } - long o1MinDiff = - getMinDifferenceByKey(o1.getReferenceUsedMemoryByKey(), o1.getLatestUsedMemoryByKey()); - long o2MinDiff = - getMinDifferenceByKey(o2.getReferenceUsedMemoryByKey(), o2.getLatestUsedMemoryByKey()); - return Long.compare(o1MinDiff, o2MinDiff); - } - - private long getMinDifferenceByKey( - Map referenceMemoryUsage, Map latestMemoryUsage) { - Set keys = new HashSet<>(); - keys.addAll(latestMemoryUsage.keySet()); - keys.addAll(referenceMemoryUsage.keySet()); - return keys.stream() - .filter(key -> latestMemoryUsage.containsKey(key) && referenceMemoryUsage.containsKey(key)) - .filter(key -> latestMemoryUsage.get(key) - referenceMemoryUsage.get(key) != 0) - .mapToLong(key -> latestMemoryUsage.get(key) - referenceMemoryUsage.get(key)) - .max() - .orElse(Long.MAX_VALUE); - } -} diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java deleted file mode 100644 index af05b3f15b..0000000000 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/UsedMemoryIncreasedComparator.java +++ /dev/null @@ -1,58 +0,0 @@ -package org.mobilitydata.gtfsvalidator.outputcomparator.io; - -import java.util.Comparator; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; - -/** - * Comparator to compare two {@link DatasetMemoryUsage} objects based on the difference between the - * used memory of the two objects. The difference is calculated by comparing the used memory of the - * two objects for each key present in both objects. If a key is present in one object but not in - * the other, the key it is ignored. This comparator is used to sort DatasetMemoryUsage by the - * minimum difference between the used memory of the two. This means the order is by the dataset - * validation that increased the memory. - */ -public class UsedMemoryIncreasedComparator implements Comparator { - - @Override - public int compare(DatasetMemoryUsage o1, DatasetMemoryUsage o2) { - if (o1 == o2) { - return 0; - } - if (o1 == null || o2 == null) { - return o1 == null ? -1 : 1; - } - if (o1.getReferenceMemoryUsage() == null - && o1.getLatestMemoryUsage() == null - && o2.getReferenceMemoryUsage() == null - && o2.getLatestMemoryUsage() == null) { - return 0; - } - if (o1.getReferenceMemoryUsage() == null || o2.getReferenceMemoryUsage() == null) { - return o1.getReferenceMemoryUsage() == null ? -1 : 1; - } - if (o1.getLatestMemoryUsage() == null || o2.getLatestMemoryUsage() == null) { - return o1.getLatestMemoryUsage() == null ? -1 : 1; - } - long o1MaxDiff = - getMaxDifferenceByKey(o1.getReferenceUsedMemoryByKey(), o1.getLatestUsedMemoryByKey()); - long o2MaxDiff = - getMaxDifferenceByKey(o2.getReferenceUsedMemoryByKey(), o2.getLatestUsedMemoryByKey()); - // Reversing the comparison as we need the major memory usage first in a sorted list - return Long.compare(o2MaxDiff, o1MaxDiff); - } - - private long getMaxDifferenceByKey( - Map referenceMemoryUsage, Map latestMemoryUsage) { - Set keys = new HashSet<>(); - keys.addAll(latestMemoryUsage.keySet()); - keys.addAll(referenceMemoryUsage.keySet()); - return keys.stream() - .filter(key -> latestMemoryUsage.containsKey(key) && referenceMemoryUsage.containsKey(key)) - .filter(key -> latestMemoryUsage.get(key) - referenceMemoryUsage.get(key) != 0) - .mapToLong(key -> latestMemoryUsage.get(key) - referenceMemoryUsage.get(key)) - .max() - .orElse(Long.MIN_VALUE); - } -} diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/MemoryUsageUsedMemoryComparatorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/MemoryUsageUsedMemoryComparatorTest.java deleted file mode 100644 index b09e55387d..0000000000 --- a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/MemoryUsageUsedMemoryComparatorTest.java +++ /dev/null @@ -1,60 +0,0 @@ -package org.mobilitydata.gtfsvalidator.outputcomparator.io; - -import static org.junit.Assert.assertEquals; - -import java.util.*; -import org.junit.Before; -import org.junit.Test; -import org.mobilitydata.gtfsvalidator.performance.MemoryUsage; - -public class MemoryUsageUsedMemoryComparatorTest { - - private UsedMemoryIncreasedComparator comparator; - - @Before - public void setUp() { - comparator = new UsedMemoryIncreasedComparator(); - } - - @Test - public void testCompare_equalMemoryUsage() { - List referenceMemoryUsage = getMemoryUsage(100L); - List latestMemoryUsage = getMemoryUsage(100L); - DatasetMemoryUsage o1 = - new DatasetMemoryUsage("dataset1", referenceMemoryUsage, latestMemoryUsage); - DatasetMemoryUsage o2 = - new DatasetMemoryUsage("dataset1", referenceMemoryUsage, latestMemoryUsage); - assertEquals(0, comparator.compare(o1, o2)); - } - - @Test - public void testCompare_firstHasMoreMemoryDifference() { - List referenceMemoryUsage = getMemoryUsage(100L); - List latestMemoryUsage = getMemoryUsage(50L); - DatasetMemoryUsage o1 = - new DatasetMemoryUsage("dataset1", referenceMemoryUsage, latestMemoryUsage); - DatasetMemoryUsage o2 = - new DatasetMemoryUsage("dataset1", referenceMemoryUsage, getMemoryUsage(100L)); - assertEquals(-1, comparator.compare(o1, o2)); - } - - @Test - public void testCompare_firstHasLessMemoryDifference() { - List referenceMemoryUsage = getMemoryUsage(100L); - List latestMemoryUsage = getMemoryUsage(50L); - DatasetMemoryUsage o1 = - new DatasetMemoryUsage("dataset1", referenceMemoryUsage, latestMemoryUsage); - DatasetMemoryUsage o2 = - new DatasetMemoryUsage("dataset1", referenceMemoryUsage, getMemoryUsage(10L)); - assertEquals(1, comparator.compare(o1, o2)); - } - - private static List getMemoryUsage(long freeMemory) { - MemoryUsage[] referenceMemoryUsage = - new MemoryUsage[] { - new MemoryUsage("key1", 100L, freeMemory, 100L, 100L), - new MemoryUsage("key2", 100L, freeMemory, 100L, 100L), - }; - return Arrays.asList(referenceMemoryUsage); - } -} From f0bf1eb1c85404d81480ad5133bec4e64aca3bd2 Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Fri, 11 Oct 2024 11:50:54 -0400 Subject: [PATCH 30/30] delete unused code --- .../io/ValidationPerformanceCollector.java | 63 ------------------- 1 file changed, 63 deletions(-) diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java index 656a6b017a..f4712651a6 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java @@ -1,7 +1,6 @@ package org.mobilitydata.gtfsvalidator.outputcomparator.io; import java.util.*; -import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Function; import java.util.stream.Collectors; import org.mobilitydata.gtfsvalidator.model.ValidationReport; @@ -10,7 +9,6 @@ public class ValidationPerformanceCollector { - public static final int MEMORY_USAGE_COMPARE_MAX = 10; public static final String MEMORY_PIVOT_KEY = "org.mobilitydata.gtfsvalidator.table.GtfsFeedLoader.loadAndValidate"; private final Map referenceTimes; @@ -266,67 +264,6 @@ private void generatePerformanceMetricsLog( } } - private void addMemoryUsageReport( - List memoryUsages, - String order, - StringBuilder b, - boolean includeDifference) { - b.append(String.format("

List of %s datasets(%s).

", MEMORY_USAGE_COMPARE_MAX, order)) - .append("\n\n") - .append( - "| Dataset ID | Snapshot Key(Used Memory) | Reference | Latest |"); - if (includeDifference) { - b.append(" Difference |"); - } - b.append("\n"); - b.append( - "|-----------------------------|-------------------|----------------|----------------|"); - if (includeDifference) { - b.append("----------------|"); - } - b.append("\n"); - memoryUsages.stream() - .forEachOrdered( - datasetMemoryUsage -> { - generateMemoryLogByKey(datasetMemoryUsage, b, includeDifference); - }); - } - - private static void generateMemoryLogByKey( - DatasetMemoryUsage datasetMemoryUsage, StringBuilder b, boolean includeDifference) { - AtomicBoolean isFirst = new AtomicBoolean(true); - Set keys = new HashSet<>(); - keys.addAll(datasetMemoryUsage.getReferenceUsedMemoryByKey().keySet()); - keys.addAll(datasetMemoryUsage.getLatestUsedMemoryByKey().keySet()); - keys.stream() - .forEach( - key -> { - var reference = datasetMemoryUsage.getReferenceUsedMemoryByKey().get(key); - var latest = datasetMemoryUsage.getLatestUsedMemoryByKey().get(key); - if (isFirst.get()) { - b.append(String.format("| %s | | | |", datasetMemoryUsage.getDatasetId())); - if (includeDifference) { - b.append(" |"); - } - b.append("\n"); - isFirst.set(false); - } - String usedMemoryDiff = getMemoryDiff(reference, latest); - b.append( - String.format( - "| | %s | %s | %s |", - key, - reference != null - ? MemoryUsage.convertToHumanReadableMemory(reference) - : "N/A", - latest != null ? MemoryUsage.convertToHumanReadableMemory(latest) : "N/A")); - if (includeDifference) { - b.append(String.format(" %s |", usedMemoryDiff)); - } - b.append("\n"); - }); - } - public void compareValidationReports( String sourceId, ValidationReport referenceReport, ValidationReport latestReport) { if (referenceReport.getValidationTimeSeconds() != null) {