From 2aa48b01d3286b24536762107e9d344810dcbb68 Mon Sep 17 00:00:00 2001 From: Bernd Ahlers Date: Tue, 26 Mar 2019 15:06:48 +0100 Subject: [PATCH] Switch back to a repackaged and fixed version of java-grok (#5800) * Switch back to a repackaged and fixed version of java-grok To support underscores ("_") in Grok match group names, we had to modify the java-grok library to use the old regexp engine again. See: https://github.com/graylog-labs/java-grok/pull/2 This also adds a test for the Grok extractor to make sure that using underscores works. Fixes #5704 Fixes #5563 * Fix GrokPatternService#extractPatternNames and add a test for it * Add missing license header to GrokPatternServiceTest * Add test for named group with underscore Prior to this change, there was no test for named groups with underscores in the FunctionSnippetsTest This change enhances the grok() test to run with a named group with underscore. (cherry picked from commit e642a418a09909b0858b3ba5ba837a8e441a1944) --- graylog-project-parent/pom.xml | 4 +-- graylog2-server/pom.xml | 4 +-- .../org/graylog2/grok/GrokPatternService.java | 19 +++++++++-- .../functions/FunctionsSnippetsTest.java | 7 +++- .../graylog2/grok/GrokPatternServiceTest.java | 32 +++++++++++++++++++ .../inputs/extractors/GrokExtractorTest.java | 19 +++++++++++ .../pipelineprocessor/functions/grok.txt | 4 +++ pom.xml | 2 +- 8 files changed, 82 insertions(+), 9 deletions(-) create mode 100644 graylog2-server/src/test/java/org/graylog2/grok/GrokPatternServiceTest.java diff --git a/graylog-project-parent/pom.xml b/graylog-project-parent/pom.xml index 4ffeeee1c91b..52e7b8b4fb37 100644 --- a/graylog-project-parent/pom.xml +++ b/graylog-project-parent/pom.xml @@ -447,8 +447,8 @@ - io.krakens - java-grok + org.graylog2.repackaged + grok ${grok.version} diff --git a/graylog2-server/pom.xml b/graylog2-server/pom.xml index fb73dfeaf5be..56eae39d04a9 100644 --- a/graylog2-server/pom.xml +++ b/graylog2-server/pom.xml @@ -304,8 +304,8 @@ - io.krakens - java-grok + org.graylog2.repackaged + grok diff --git a/graylog2-server/src/main/java/org/graylog2/grok/GrokPatternService.java b/graylog2-server/src/main/java/org/graylog2/grok/GrokPatternService.java index ef99cf275daf..0d98ca609a21 100644 --- a/graylog2-server/src/main/java/org/graylog2/grok/GrokPatternService.java +++ b/graylog2-server/src/main/java/org/graylog2/grok/GrokPatternService.java @@ -23,11 +23,13 @@ import java.util.Collection; import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.regex.Matcher; +import java.util.regex.Pattern; public interface GrokPatternService { GrokPattern load(String patternId) throws NotFoundException; @@ -56,13 +58,24 @@ public interface GrokPatternService { static Set extractPatternNames(String namedPattern) { final Set result = new HashSet<>(); - final Set namedGroups = GrokUtils.getNameGroups(GrokUtils.GROK_PATTERN.pattern()); - final Matcher matcher = GrokUtils.GROK_PATTERN.matcher(namedPattern); + // We have to use java.util.Regex here to get the names because ".find()" on the "com.google.code.regexp.Matcher" + // would run in an endless loop. + final Set namedGroups = GrokUtils.getNameGroups(GrokUtils.GROK_PATTERN.namedPattern()); + final Matcher matcher = Pattern.compile(GrokUtils.GROK_PATTERN.namedPattern()).matcher(namedPattern); while (matcher.find()) { - final Map group = GrokUtils.namedGroups(matcher, namedGroups); + final Map group = namedGroups(matcher, namedGroups); final String patternName = group.get("pattern"); result.add(patternName); } return result; } + + static Map namedGroups(Matcher matcher, Set groupNames) { + Map namedGroups = new LinkedHashMap<>(); + for (String groupName : groupNames) { + String groupValue = matcher.group(groupName); + namedGroups.put(groupName, groupValue); + } + return namedGroups; + } } diff --git a/graylog2-server/src/test/java/org/graylog/plugins/pipelineprocessor/functions/FunctionsSnippetsTest.java b/graylog2-server/src/test/java/org/graylog/plugins/pipelineprocessor/functions/FunctionsSnippetsTest.java index a3acac50dae2..1f619072fc51 100644 --- a/graylog2-server/src/test/java/org/graylog/plugins/pipelineprocessor/functions/FunctionsSnippetsTest.java +++ b/graylog2-server/src/test/java/org/graylog/plugins/pipelineprocessor/functions/FunctionsSnippetsTest.java @@ -305,6 +305,7 @@ public static void registerFunctions() { GrokPattern.create("GREEDY", ".*"), GrokPattern.create("BASE10NUM", "(?[+-]?(?:(?:[0-9]+(?:\\.[0-9]+)?)|(?:\\.[0-9]+)))"), GrokPattern.create("NUMBER", "(?:%{BASE10NUM:UNWANTED})"), + GrokPattern.create("UNDERSCORE", "(?test)"), GrokPattern.create("NUM", "%{BASE10NUM}") ); when(grokPatternService.loadAll()).thenReturn(patterns); @@ -630,11 +631,15 @@ public void grok() { final Message message = evaluateRule(rule); assertThat(message).isNotNull(); - assertThat(message.getFieldCount()).isEqualTo(5); + assertThat(message.getFieldCount()).isEqualTo(6); assertThat(message.getTimestamp()).isEqualTo(DateTime.parse("2015-07-31T10:05:36.773Z")); // named captures only assertThat(message.hasField("num")).isTrue(); assertThat(message.hasField("BASE10NUM")).isFalse(); + + // Test for issue 5563 and 5794 + // ensure named groups with underscore work + assertThat(message.hasField("test_field")).isTrue(); } @Test diff --git a/graylog2-server/src/test/java/org/graylog2/grok/GrokPatternServiceTest.java b/graylog2-server/src/test/java/org/graylog2/grok/GrokPatternServiceTest.java new file mode 100644 index 000000000000..da618620c9de --- /dev/null +++ b/graylog2-server/src/test/java/org/graylog2/grok/GrokPatternServiceTest.java @@ -0,0 +1,32 @@ +/** + * This file is part of Graylog. + * + * Graylog is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Graylog is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Graylog. If not, see . + */ +package org.graylog2.grok; + +import org.junit.Test; + +import java.util.Set; + +import static org.assertj.core.api.Assertions.assertThat; + +public class GrokPatternServiceTest { + @Test + public void extractPatternNames() { + final Set names = GrokPatternService.extractPatternNames("%{EMAILLOCALPART}@%{HOSTNAME}"); + + assertThat(names).containsOnly("HOSTNAME", "EMAILLOCALPART"); + } +} \ No newline at end of file diff --git a/graylog2-server/src/test/java/org/graylog2/inputs/extractors/GrokExtractorTest.java b/graylog2-server/src/test/java/org/graylog2/inputs/extractors/GrokExtractorTest.java index ce4183db3b5b..bc9e056c8878 100644 --- a/graylog2-server/src/test/java/org/graylog2/inputs/extractors/GrokExtractorTest.java +++ b/graylog2-server/src/test/java/org/graylog2/inputs/extractors/GrokExtractorTest.java @@ -257,6 +257,25 @@ public void testIssue4773() throws Exception { ); } + @Test + public void testIssue5563() { + // See: https://github.com/Graylog2/graylog2-server/issues/5563 + // https://github.com/Graylog2/graylog2-server/issues/5704 + final Map config = new HashMap<>(); + + config.put("named_captures_only", true); + + patternSet.add(GrokPattern.create("YOLO", "(?test)")); + // Make sure that the user can use a capture name with an "_". + final GrokExtractor extractor = makeExtractor("%{YOLO}", config); + + assertThat(extractor.run("test")) + .hasSize(1) + .containsOnly( + new Extractor.Result("test", "test_field", -1, -1) + ); + } + private GrokExtractor makeExtractor(String pattern) { return makeExtractor(pattern, new HashMap<>()); } diff --git a/graylog2-server/src/test/resources/org/graylog/plugins/pipelineprocessor/functions/grok.txt b/graylog2-server/src/test/resources/org/graylog/plugins/pipelineprocessor/functions/grok.txt index 49e57025f301..9bfd4a12d05d 100644 --- a/graylog2-server/src/test/resources/org/graylog/plugins/pipelineprocessor/functions/grok.txt +++ b/graylog2-server/src/test/resources/org/graylog/plugins/pipelineprocessor/functions/grok.txt @@ -7,4 +7,8 @@ then // only named captures let matches1 = grok("%{NUM:num}", "10", true); set_fields(matches1); + + //test for underscore + let matches2 = grok("%{UNDERSCORE}", "test", true); + set_fields(matches2); end diff --git a/pom.xml b/pom.xml index 6a047d744e6e..817856ead4ac 100644 --- a/pom.xml +++ b/pom.xml @@ -100,7 +100,7 @@ 2.4.15+jackson 1.4.4 2.12.0 - 0.1.9 + 0.1.9-graylog-1 2.0.0 25.1-jre 4.2.0