From b4c755f9a3954753cd3748f7064f671f54c7da6c Mon Sep 17 00:00:00 2001 From: David Venable Date: Tue, 11 Jun 2024 18:40:15 -0500 Subject: [PATCH] Adds and uses a Caffeine-based caching parser for the user_agent processor. Resolves #4618 Signed-off-by: David Venable --- data-prepper-plugins/opensearch/build.gradle | 2 +- .../otel-trace-raw-processor/build.gradle | 2 +- .../user-agent-processor/build.gradle | 3 +- .../useragent/CaffeineCachingParser.java | 74 ++++++++ .../useragent/UserAgentProcessor.java | 3 +- .../useragent/CaffeineCachingParserTest.java | 158 ++++++++++++++++++ settings.gradle | 1 + 7 files changed, 238 insertions(+), 5 deletions(-) create mode 100644 data-prepper-plugins/user-agent-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/useragent/CaffeineCachingParser.java create mode 100644 data-prepper-plugins/user-agent-processor/src/test/java/org/opensearch/dataprepper/plugins/processor/useragent/CaffeineCachingParserTest.java diff --git a/data-prepper-plugins/opensearch/build.gradle b/data-prepper-plugins/opensearch/build.gradle index 1d5be32d00..1f96c1f6ea 100644 --- a/data-prepper-plugins/opensearch/build.gradle +++ b/data-prepper-plugins/opensearch/build.gradle @@ -32,7 +32,7 @@ dependencies { implementation 'software.amazon.awssdk:s3' implementation 'software.amazon.awssdk:opensearchserverless' implementation libs.commons.lang3 - implementation 'com.github.ben-manes.caffeine:caffeine:3.1.8' + implementation libs.caffeine implementation 'software.amazon.awssdk:apache-client' implementation 'software.amazon.awssdk:netty-nio-client' implementation 'co.elastic.clients:elasticsearch-java:7.17.0' diff --git a/data-prepper-plugins/otel-trace-raw-processor/build.gradle b/data-prepper-plugins/otel-trace-raw-processor/build.gradle index 6d9994abbb..ff2bfc4a60 100644 --- a/data-prepper-plugins/otel-trace-raw-processor/build.gradle +++ b/data-prepper-plugins/otel-trace-raw-processor/build.gradle @@ -18,7 +18,7 @@ dependencies { implementation libs.armeria.grpc implementation 'com.fasterxml.jackson.core:jackson-databind' implementation 'com.fasterxml.jackson.dataformat:jackson-dataformat-yaml' - implementation 'com.github.ben-manes.caffeine:caffeine:3.1.8' + implementation libs.caffeine testImplementation 'org.assertj:assertj-core:3.25.3' testImplementation testLibs.mockito.inline } diff --git a/data-prepper-plugins/user-agent-processor/build.gradle b/data-prepper-plugins/user-agent-processor/build.gradle index 6ad33c84ba..746ee40397 100644 --- a/data-prepper-plugins/user-agent-processor/build.gradle +++ b/data-prepper-plugins/user-agent-processor/build.gradle @@ -11,7 +11,8 @@ dependencies { implementation project(':data-prepper-api') implementation project(':data-prepper-plugins:common') implementation 'com.fasterxml.jackson.core:jackson-databind' - implementation "com.github.ua-parser:uap-java:1.6.1" + implementation 'com.github.ua-parser:uap-java:1.6.1' + implementation libs.caffeine } jacocoTestCoverageVerification { diff --git a/data-prepper-plugins/user-agent-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/useragent/CaffeineCachingParser.java b/data-prepper-plugins/user-agent-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/useragent/CaffeineCachingParser.java new file mode 100644 index 0000000000..3b72f49ce3 --- /dev/null +++ b/data-prepper-plugins/user-agent-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/useragent/CaffeineCachingParser.java @@ -0,0 +1,74 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.dataprepper.plugins.processor.useragent; + +import com.github.benmanes.caffeine.cache.Cache; +import com.github.benmanes.caffeine.cache.Caffeine; +import ua_parser.Client; +import ua_parser.Device; +import ua_parser.OS; +import ua_parser.Parser; +import ua_parser.UserAgent; + +import java.util.function.Function; + +/** + * A superclass of {@link Parser} which uses Caffeine as a cache. + */ +class CaffeineCachingParser extends Parser { + private final Cache clientCache; + private final Cache userAgentCache; + private final Cache deviceCache; + private final Cache osCache; + + /** + * Constructs a new instance with a given cache size. + * + * @param cacheSize The size of the cache as a count of items. + */ + CaffeineCachingParser(final long cacheSize) { + userAgentCache = createCache(cacheSize); + clientCache = createCache(cacheSize); + deviceCache = createCache(cacheSize); + osCache = createCache(cacheSize); + } + + @Override + public Client parse(final String agentString) { + return parseCaching(agentString, clientCache, super::parse); + } + + @Override + public UserAgent parseUserAgent(final String agentString) { + return parseCaching(agentString, userAgentCache, super::parseUserAgent); + } + + @Override + public Device parseDevice(final String agentString) { + return parseCaching(agentString, deviceCache, super::parseDevice); + } + + @Override + public OS parseOS(final String agentString) { + return parseCaching(agentString, osCache, super::parseOS); + } + + private T parseCaching( + final String agentString, + final Cache cache, + final Function parseFunction) { + if (agentString == null) { + return null; + } + return cache.get(agentString, parseFunction); + } + + private static Cache createCache(final long maximumSize) { + return Caffeine.newBuilder() + .maximumSize(maximumSize) + .build(); + } +} diff --git a/data-prepper-plugins/user-agent-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/useragent/UserAgentProcessor.java b/data-prepper-plugins/user-agent-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/useragent/UserAgentProcessor.java index 220bb88287..32779655dc 100644 --- a/data-prepper-plugins/user-agent-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/useragent/UserAgentProcessor.java +++ b/data-prepper-plugins/user-agent-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/useragent/UserAgentProcessor.java @@ -14,7 +14,6 @@ import org.opensearch.dataprepper.model.record.Record; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import ua_parser.CachingParser; import ua_parser.Client; import ua_parser.Parser; @@ -36,7 +35,7 @@ public class UserAgentProcessor extends AbstractProcessor, Record< public UserAgentProcessor(final PluginMetrics pluginMetrics, final UserAgentProcessorConfig config) { super(pluginMetrics); this.config = config; - this.userAgentParser = new CachingParser(config.getCacheSize()); + this.userAgentParser = new CaffeineCachingParser(config.getCacheSize()); } @Override diff --git a/data-prepper-plugins/user-agent-processor/src/test/java/org/opensearch/dataprepper/plugins/processor/useragent/CaffeineCachingParserTest.java b/data-prepper-plugins/user-agent-processor/src/test/java/org/opensearch/dataprepper/plugins/processor/useragent/CaffeineCachingParserTest.java new file mode 100644 index 0000000000..e295aed702 --- /dev/null +++ b/data-prepper-plugins/user-agent-processor/src/test/java/org/opensearch/dataprepper/plugins/processor/useragent/CaffeineCachingParserTest.java @@ -0,0 +1,158 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.dataprepper.plugins.processor.useragent; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import ua_parser.Client; +import ua_parser.Device; +import ua_parser.OS; +import ua_parser.UserAgent; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.notNullValue; +import static org.hamcrest.CoreMatchers.nullValue; +import static org.hamcrest.CoreMatchers.sameInstance; +import static org.hamcrest.MatcherAssert.assertThat; + +@SuppressWarnings("StringOperationCanBeSimplified") +class CaffeineCachingParserTest { + private static final String KNOWN_USER_AGENT_STRING = "Mozilla/5.0 (iPhone; CPU iPhone OS 13_5_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Mobile/15E148 Safari/604.1"; + long cacheSize; + + @BeforeEach + void setUp() { + cacheSize = 1000; + } + + private CaffeineCachingParser createObjectUnderTest() { + return new CaffeineCachingParser(cacheSize); + } + + @Test + void parse_returns_expected_results() { + final Client client = createObjectUnderTest().parse(KNOWN_USER_AGENT_STRING); + + assertThat(client, notNullValue()); + assertThat(client.userAgent, notNullValue()); + assertThat(client.userAgent.family, equalTo("Mobile Safari")); + assertThat(client.userAgent.major, equalTo("13")); + assertThat(client.device.family, equalTo("iPhone")); + assertThat(client.os.family, equalTo("iOS")); + } + + @Test + void parse_with_null_returns_null() { + assertThat(createObjectUnderTest().parse(null), + nullValue()); + } + + @Test + void parse_called_multiple_times_returns_same_instance() { + final CaffeineCachingParser objectUnderTest = createObjectUnderTest(); + + final String userAgentString = KNOWN_USER_AGENT_STRING; + final Client client = objectUnderTest.parse(userAgentString); + + assertThat(client, notNullValue()); + + assertThat(objectUnderTest.parse(new String(userAgentString)), sameInstance(client)); + assertThat(objectUnderTest.parse(new String(userAgentString)), sameInstance(client)); + assertThat(objectUnderTest.parse(new String(userAgentString)), sameInstance(client)); + } + + @Test + void parseUserAgent_returns_expected_results() { + final CaffeineCachingParser objectUnderTest = createObjectUnderTest(); + + final UserAgent userAgent = objectUnderTest.parseUserAgent(KNOWN_USER_AGENT_STRING); + + assertThat(userAgent, notNullValue()); + assertThat(userAgent.family, equalTo("Mobile Safari")); + assertThat(userAgent.major, equalTo("13")); + } + + @Test + void parseUserAgent_with_null_returns_null() { + assertThat(createObjectUnderTest().parseUserAgent(null), + nullValue()); + } + + @Test + void parseUserAgent_called_multiple_times_returns_same_instance() { + final CaffeineCachingParser objectUnderTest = createObjectUnderTest(); + + final String userAgentString = KNOWN_USER_AGENT_STRING; + final UserAgent userAgent = objectUnderTest.parseUserAgent(userAgentString); + + assertThat(userAgent, notNullValue()); + + assertThat(objectUnderTest.parseUserAgent(new String(userAgentString)), sameInstance(userAgent)); + assertThat(objectUnderTest.parseUserAgent(new String(userAgentString)), sameInstance(userAgent)); + assertThat(objectUnderTest.parseUserAgent(new String(userAgentString)), sameInstance(userAgent)); + } + + @Test + void parseDevice_returns_expected_results() { + final CaffeineCachingParser objectUnderTest = createObjectUnderTest(); + + final Device device = objectUnderTest.parseDevice(KNOWN_USER_AGENT_STRING); + + assertThat(device, notNullValue()); + assertThat(device.family, equalTo("iPhone")); + } + + @Test + void parseDevice_with_null_returns_null() { + assertThat(createObjectUnderTest().parseDevice(null), + nullValue()); + } + + @Test + void parseDevice_called_multiple_times_returns_same_instance() { + final CaffeineCachingParser objectUnderTest = createObjectUnderTest(); + + final String userAgentString = KNOWN_USER_AGENT_STRING; + final Device device = objectUnderTest.parseDevice(userAgentString); + + assertThat(device, notNullValue()); + + assertThat(objectUnderTest.parseDevice(new String(userAgentString)), sameInstance(device)); + assertThat(objectUnderTest.parseDevice(new String(userAgentString)), sameInstance(device)); + assertThat(objectUnderTest.parseDevice(new String(userAgentString)), sameInstance(device)); + } + + @Test + void parseOS_returns_expected_results() { + final CaffeineCachingParser objectUnderTest = createObjectUnderTest(); + + final OS os = objectUnderTest.parseOS(KNOWN_USER_AGENT_STRING); + + assertThat(os, notNullValue()); + assertThat(os.family, equalTo("iOS")); + assertThat(os.major, equalTo("13")); + } + + @Test + void parseOS_with_null_returns_null() { + assertThat(createObjectUnderTest().parseOS(null), + nullValue()); + } + + @Test + void parseOS_called_multiple_times_returns_same_instance() { + final CaffeineCachingParser objectUnderTest = createObjectUnderTest(); + + final String userAgentString = KNOWN_USER_AGENT_STRING; + final OS os = objectUnderTest.parseOS(userAgentString); + + assertThat(os, notNullValue()); + + assertThat(objectUnderTest.parseOS(new String(userAgentString)), sameInstance(os)); + assertThat(objectUnderTest.parseOS(new String(userAgentString)), sameInstance(os)); + assertThat(objectUnderTest.parseOS(new String(userAgentString)), sameInstance(os)); + } +} \ No newline at end of file diff --git a/settings.gradle b/settings.gradle index 0a7718aa4d..c72986922e 100644 --- a/settings.gradle +++ b/settings.gradle @@ -65,6 +65,7 @@ dependencyResolutionManagement { library('hadoop-mapreduce', 'org.apache.hadoop', 'hadoop-mapreduce-client-core').versionRef('hadoop') version('avro', '1.11.3') library('avro-core', 'org.apache.avro', 'avro').versionRef('avro') + library('caffeine', 'com.github.ben-manes.caffeine', 'caffeine').version('3.1.8') } testLibs { version('junit', '5.8.2')