Skip to content

Commit

Permalink
Switch back to a repackaged and fixed version of java-grok (#5800)
Browse files Browse the repository at this point in the history
* Switch back to a repackaged and fixed version of java-grok

To support underscores ("_") in Grok match group names, we had to modify
the java-grok library to use the old regexp engine again.

See: graylog-labs/java-grok#2

This also adds a test for the Grok extractor to make sure that using
underscores works.

Fixes #5704
Fixes #5563

* Fix GrokPatternService#extractPatternNames and add a test for it

* Add missing license header to GrokPatternServiceTest

* Add test for named group with underscore

Prior to this change, there was no test for named groups
with underscores in the FunctionSnippetsTest

This change enhances the grok() test to run with a
named group with underscore.
  • Loading branch information
bernd authored and kmerz committed Mar 26, 2019
1 parent 0e013b8 commit e642a41
Show file tree
Hide file tree
Showing 8 changed files with 82 additions and 9 deletions.
4 changes: 2 additions & 2 deletions graylog-project-parent/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -447,8 +447,8 @@
</exclusions>
</dependency>
<dependency>
<groupId>io.krakens</groupId>
<artifactId>java-grok</artifactId>
<groupId>org.graylog2.repackaged</groupId>
<artifactId>grok</artifactId>
<version>${grok.version}</version>
</dependency>
<dependency>
Expand Down
4 changes: 2 additions & 2 deletions graylog2-server/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -304,8 +304,8 @@
</dependency>

<dependency>
<groupId>io.krakens</groupId>
<artifactId>java-grok</artifactId>
<groupId>org.graylog2.repackaged</groupId>
<artifactId>grok</artifactId>
</dependency>

<dependency>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,13 @@

import java.util.Collection;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public interface GrokPatternService {
GrokPattern load(String patternId) throws NotFoundException;
Expand Down Expand Up @@ -56,13 +58,24 @@ public interface GrokPatternService {

static Set<String> extractPatternNames(String namedPattern) {
final Set<String> result = new HashSet<>();
final Set<String> namedGroups = GrokUtils.getNameGroups(GrokUtils.GROK_PATTERN.pattern());
final Matcher matcher = GrokUtils.GROK_PATTERN.matcher(namedPattern);
// We have to use java.util.Regex here to get the names because ".find()" on the "com.google.code.regexp.Matcher"
// would run in an endless loop.
final Set<String> namedGroups = GrokUtils.getNameGroups(GrokUtils.GROK_PATTERN.namedPattern());
final Matcher matcher = Pattern.compile(GrokUtils.GROK_PATTERN.namedPattern()).matcher(namedPattern);
while (matcher.find()) {
final Map<String, String> group = GrokUtils.namedGroups(matcher, namedGroups);
final Map<String, String> group = namedGroups(matcher, namedGroups);
final String patternName = group.get("pattern");
result.add(patternName);
}
return result;
}

static Map<String, String> namedGroups(Matcher matcher, Set<String> groupNames) {
Map<String, String> namedGroups = new LinkedHashMap<>();
for (String groupName : groupNames) {
String groupValue = matcher.group(groupName);
namedGroups.put(groupName, groupValue);
}
return namedGroups;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@ public static void registerFunctions() {
GrokPattern.create("GREEDY", ".*"),
GrokPattern.create("BASE10NUM", "(?<![0-9.+-])(?>[+-]?(?:(?:[0-9]+(?:\\.[0-9]+)?)|(?:\\.[0-9]+)))"),
GrokPattern.create("NUMBER", "(?:%{BASE10NUM:UNWANTED})"),
GrokPattern.create("UNDERSCORE", "(?<test_field>test)"),
GrokPattern.create("NUM", "%{BASE10NUM}")
);
when(grokPatternService.loadAll()).thenReturn(patterns);
Expand Down Expand Up @@ -630,11 +631,15 @@ public void grok() {
final Message message = evaluateRule(rule);

assertThat(message).isNotNull();
assertThat(message.getFieldCount()).isEqualTo(5);
assertThat(message.getFieldCount()).isEqualTo(6);
assertThat(message.getTimestamp()).isEqualTo(DateTime.parse("2015-07-31T10:05:36.773Z"));
// named captures only
assertThat(message.hasField("num")).isTrue();
assertThat(message.hasField("BASE10NUM")).isFalse();

// Test for issue 5563 and 5794
// ensure named groups with underscore work
assertThat(message.hasField("test_field")).isTrue();
}

@Test
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/**
* This file is part of Graylog.
*
* Graylog is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Graylog is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Graylog. If not, see <http://www.gnu.org/licenses/>.
*/
package org.graylog2.grok;

import org.junit.Test;

import java.util.Set;

import static org.assertj.core.api.Assertions.assertThat;

public class GrokPatternServiceTest {
@Test
public void extractPatternNames() {
final Set<String> names = GrokPatternService.extractPatternNames("%{EMAILLOCALPART}@%{HOSTNAME}");

assertThat(names).containsOnly("HOSTNAME", "EMAILLOCALPART");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,25 @@ public void testIssue4773() throws Exception {
);
}

@Test
public void testIssue5563() {
// See: https://github.com/Graylog2/graylog2-server/issues/5563
// https://github.com/Graylog2/graylog2-server/issues/5704
final Map<String, Object> config = new HashMap<>();

config.put("named_captures_only", true);

patternSet.add(GrokPattern.create("YOLO", "(?<test_field>test)"));
// Make sure that the user can use a capture name with an "_".
final GrokExtractor extractor = makeExtractor("%{YOLO}", config);

assertThat(extractor.run("test"))
.hasSize(1)
.containsOnly(
new Extractor.Result("test", "test_field", -1, -1)
);
}

private GrokExtractor makeExtractor(String pattern) {
return makeExtractor(pattern, new HashMap<>());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,8 @@ then
// only named captures
let matches1 = grok("%{NUM:num}", "10", true);
set_fields(matches1);

//test for underscore
let matches2 = grok("%{UNDERSCORE}", "test", true);
set_fields(matches2);
end
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@
<jest.version>2.4.15+jackson</jest.version>
<gelfclient.version>1.4.4</gelfclient.version>
<geoip2.version>2.12.0</geoip2.version>
<grok.version>0.1.9</grok.version>
<grok.version>0.1.9-graylog-1</grok.version>
<guava-retrying.version>2.0.0</guava-retrying.version>
<guava.version>25.1-jre</guava.version>
<guice.version>4.2.0</guice.version>
Expand Down

0 comments on commit e642a41

Please sign in to comment.