From db508600438e7e75e5ff9e8d337cbf4b3c59e381 Mon Sep 17 00:00:00 2001 From: Kat Shen Date: Mon, 31 Jul 2023 23:37:16 +0000 Subject: [PATCH 01/13] readme and config Signed-off-by: Kat Shen --- data-prepper-plugins/key-value-processor/README.md | 4 ++++ .../processor/keyvalue/KeyValueProcessorConfig.java | 9 +++++++++ 2 files changed, 13 insertions(+) diff --git a/data-prepper-plugins/key-value-processor/README.md b/data-prepper-plugins/key-value-processor/README.md index 02e77a76c6..bc9675f364 100644 --- a/data-prepper-plugins/key-value-processor/README.md +++ b/data-prepper-plugins/key-value-processor/README.md @@ -82,6 +82,10 @@ When run, the processor will parse the message into the following output: * Example: `remove_brackets` is `true`. `{"key1=(value1)"}` will parse into `{"key1": value1}` * Example: `remove_brackets` is `false`. `{"key1=(value1)"}` will parse into `{"key1": "(value1)"}` * In the case of a key-value pair with a brackets and a split character, the splitting will take priority over `remove_brackets=true`. `{key1=(value1&value2)}` will parse into `{"key1":"value1","value2)":null}` +* `recursive` - Specify whether to drill down into values and recursively get more key-value pairs from it. The extra key-value pairs will be stored as subkeys of the root key. + * Default: `false` + * Example: `recursive` is true. `{item1=[item1-subitem1=item1-subitem1-value&item1-subitem2=(item1-subitem2-subitem2A=item1-subitem2-subitem2A-value&item1-subitem2-subitem2B=item1-subitem2-subitem2B-value)]&item2=item2-value}` will parse into `"item1": {"item1-subitem1": "item1-subitem1-value", "item1-subitem2": {"item1-subitem2-subitem2A": "item1-subitem2-subitem2A-value", "item1-subitem2-subitem2B": "item1-subitem2-subitem2B-value"}}` + * Example: `recursive` is false. `{item1=[item1-subitem1=item1-subitem1-value&item1-subitem2=(item1-subitem2-subitem2A=item1-subitem2-subitem2A-value&item1-subitem2-subitem2B=item1-subitem2-subitem2B-value)]&item2=item2-value}` will parse into `"item1-subitem2": "(item1-subitem2-subitem2A=item1-subitem2-subitem2A-value", "item2": "item2-value","item1": "[item1-subitem1=item1-subitem1-value", "item1-subitem2-subitem2B": "item1-subitem2-subitem2B-value)]"` ## Developer Guide This plugin is compatible with Java 14. See diff --git a/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessorConfig.java b/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessorConfig.java index 57af39de9b..691de84b37 100644 --- a/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessorConfig.java +++ b/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessorConfig.java @@ -27,6 +27,7 @@ public class KeyValueProcessorConfig { static final String DEFAULT_WHITESPACE = "lenient"; static final boolean DEFAULT_SKIP_DUPLICATE_VALUES = false; static final boolean DEFAULT_REMOVE_BRACKETS = false; + static final boolean DEFAULT_RECURSIVE = false; @NotEmpty private String source = DEFAULT_SOURCE; @@ -85,6 +86,10 @@ public class KeyValueProcessorConfig { @NotNull private boolean removeBrackets = DEFAULT_REMOVE_BRACKETS; + @JsonProperty("recursive") + @NotNull + private boolean recursive = DEFAULT_RECURSIVE; + public String getSource() { return source; } @@ -148,4 +153,8 @@ public boolean getSkipDuplicateValues() { public boolean getRemoveBrackets() { return removeBrackets; } + + public boolean getRecursive() { + return recursive; + } } From 822570da224a5bce09204379412e89c2104f41a7 Mon Sep 17 00:00:00 2001 From: Kat Shen Date: Mon, 7 Aug 2023 18:27:12 +0000 Subject: [PATCH 02/13] clarify readme Signed-off-by: Kat Shen --- data-prepper-plugins/key-value-processor/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/data-prepper-plugins/key-value-processor/README.md b/data-prepper-plugins/key-value-processor/README.md index bc9675f364..55de5e952b 100644 --- a/data-prepper-plugins/key-value-processor/README.md +++ b/data-prepper-plugins/key-value-processor/README.md @@ -84,6 +84,7 @@ When run, the processor will parse the message into the following output: * In the case of a key-value pair with a brackets and a split character, the splitting will take priority over `remove_brackets=true`. `{key1=(value1&value2)}` will parse into `{"key1":"value1","value2)":null}` * `recursive` - Specify whether to drill down into values and recursively get more key-value pairs from it. The extra key-value pairs will be stored as subkeys of the root key. * Default: `false` + * The levels of recursive parsing must be defined by different brackets for each level: `[]`, `()`, and `<>` in this order. * Example: `recursive` is true. `{item1=[item1-subitem1=item1-subitem1-value&item1-subitem2=(item1-subitem2-subitem2A=item1-subitem2-subitem2A-value&item1-subitem2-subitem2B=item1-subitem2-subitem2B-value)]&item2=item2-value}` will parse into `"item1": {"item1-subitem1": "item1-subitem1-value", "item1-subitem2": {"item1-subitem2-subitem2A": "item1-subitem2-subitem2A-value", "item1-subitem2-subitem2B": "item1-subitem2-subitem2B-value"}}` * Example: `recursive` is false. `{item1=[item1-subitem1=item1-subitem1-value&item1-subitem2=(item1-subitem2-subitem2A=item1-subitem2-subitem2A-value&item1-subitem2-subitem2B=item1-subitem2-subitem2B-value)]&item2=item2-value}` will parse into `"item1-subitem2": "(item1-subitem2-subitem2A=item1-subitem2-subitem2A-value", "item2": "item2-value","item1": "[item1-subitem1=item1-subitem1-value", "item1-subitem2-subitem2B": "item1-subitem2-subitem2B-value)]"` From 47c9aa388c96847d73f63fd508f561622ac7de78 Mon Sep 17 00:00:00 2001 From: Kat Shen Date: Mon, 14 Aug 2023 23:01:20 +0000 Subject: [PATCH 03/13] working on recursive implementation, resolving issues Signed-off-by: Kat Shen --- .../key-value-processor/README.md | 7 +-- .../processor/keyvalue/KeyValueProcessor.java | 49 +++++++++++++++++++ 2 files changed, 53 insertions(+), 3 deletions(-) diff --git a/data-prepper-plugins/key-value-processor/README.md b/data-prepper-plugins/key-value-processor/README.md index bf1bc7df53..0a780fe1f5 100644 --- a/data-prepper-plugins/key-value-processor/README.md +++ b/data-prepper-plugins/key-value-processor/README.md @@ -88,12 +88,13 @@ When run, the processor will parse the message into the following output: * Default: `false` * Example: `remove_brackets` is `true`. `{"key1=(value1)"}` will parse into `{"key1": value1}` * Example: `remove_brackets` is `false`. `{"key1=(value1)"}` will parse into `{"key1": "(value1)"}` - * In the case of a key-value pair with a brackets and a split character, the splitting will take priority over `remove_brackets=true`. `{key1=(value1&value2)}` will parse into `{"key1":"value1","value2)":null}` + * In the case of a key-value pair with a brackets and a split character, the splitting will take priority over `remove_brackets=true`. `{"key1=(value1&value2)"}` will parse into `{"key1":"value1","value2)":null}` * `recursive` - Specify whether to drill down into values and recursively get more key-value pairs from it. The extra key-value pairs will be stored as subkeys of the root key. * Default: `false` * The levels of recursive parsing must be defined by different brackets for each level: `[]`, `()`, and `<>` in this order. - * Example: `recursive` is true. `{item1=[item1-subitem1=item1-subitem1-value&item1-subitem2=(item1-subitem2-subitem2A=item1-subitem2-subitem2A-value&item1-subitem2-subitem2B=item1-subitem2-subitem2B-value)]&item2=item2-value}` will parse into `"item1": {"item1-subitem1": "item1-subitem1-value", "item1-subitem2": {"item1-subitem2-subitem2A": "item1-subitem2-subitem2A-value", "item1-subitem2-subitem2B": "item1-subitem2-subitem2B-value"}}` - * Example: `recursive` is false. `{item1=[item1-subitem1=item1-subitem1-value&item1-subitem2=(item1-subitem2-subitem2A=item1-subitem2-subitem2A-value&item1-subitem2-subitem2B=item1-subitem2-subitem2B-value)]&item2=item2-value}` will parse into `"item1-subitem2": "(item1-subitem2-subitem2A=item1-subitem2-subitem2A-value", "item2": "item2-value","item1": "[item1-subitem1=item1-subitem1-value", "item1-subitem2-subitem2B": "item1-subitem2-subitem2B-value)]"` + * Example: `recursive` is true. `{"item1=[item1-subitem1=item1-subitem1-value&item1-subitem2=(item1-subitem2-subitem2A=item1-subitem2-subitem2A-value&item1-subitem2-subitem2B=item1-subitem2-subitem2B-value)]&item2=item2-value"}` will parse into `"item1": {"item1-subitem1": "item1-subitem1-value", "item1-subitem2": {"item1-subitem2-subitem2A": "item1-subitem2-subitem2A-value", "item1-subitem2-subitem2B": "item1-subitem2-subitem2B-value"}}` + * Example: `recursive` is false. `{"item1=[item1-subitem1=item1-subitem1-value&item1-subitem2=(item1-subitem2-subitem2A=item1-subitem2-subitem2A-value&item1-subitem2-subitem2B=item1-subitem2-subitem2B-value)]&item2=item2-value"}` will parse into `"item1-subitem2": "(item1-subitem2-subitem2A=item1-subitem2-subitem2A-value", "item2": "item2-value","item1": "[item1-subitem1=item1-subitem1-value", "item1-subitem2-subitem2B": "item1-subitem2-subitem2B-value)]"` + * While `recursive` is `true`, `remove_brackets` cannot also be `true`. ## Developer Guide This plugin is compatible with Java 14. See diff --git a/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java b/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java index bca9b11540..36a169ec7b 100644 --- a/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java +++ b/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java @@ -11,10 +11,13 @@ import org.opensearch.dataprepper.model.event.Event; import org.opensearch.dataprepper.model.processor.AbstractProcessor; import org.opensearch.dataprepper.model.processor.Processor; +import org.opensearch.dataprepper.model.event.JacksonEvent; import org.opensearch.dataprepper.model.record.Record; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.fasterxml.jackson.databind.ObjectMapper; + import java.util.Collection; import java.util.HashMap; import java.util.HashSet; @@ -122,6 +125,7 @@ public KeyValueProcessor(final PluginMetrics pluginMetrics, final KeyValueProces final Pattern boolCheck = Pattern.compile("true|false", Pattern.CASE_INSENSITIVE); final Matcher duplicateValueBoolMatch = boolCheck.matcher(String.valueOf(keyValueProcessorConfig.getSkipDuplicateValues())); final Matcher removeBracketsBoolMatch = boolCheck.matcher(String.valueOf(keyValueProcessorConfig.getRemoveBrackets())); + final Matcher recursiveBoolMatch = boolCheck.matcher(String.valueOf(keyValueProcessorConfig.getRecursive())); if (!duplicateValueBoolMatch.matches()) { throw new IllegalArgumentException(String.format("The skip_duplicate_values value must be either true or false", keyValueProcessorConfig.getSkipDuplicateValues())); @@ -130,6 +134,16 @@ public KeyValueProcessor(final PluginMetrics pluginMetrics, final KeyValueProces if (!removeBracketsBoolMatch.matches()) { throw new IllegalArgumentException(String.format("The remove_brackets value must be either true or false", keyValueProcessorConfig.getRemoveBrackets())); } + + if (!recursiveBoolMatch.matches()) { + throw new IllegalArgumentException(String.format("The recursive value must be either true or false", keyValueProcessorConfig.getRemoveBrackets())); + } + + if (keyValueProcessorConfig.getRemoveBrackets() && keyValueProcessorConfig.getRecursive()) { + throw new IllegalArgumentException("Cannot remove brackets needed for determining levels of recursion"); + } + + initRecursiveMap(recursiveBracketMap); } private String buildRegexFromCharacters(String s) { @@ -183,6 +197,12 @@ public Collection> doExecute(final Collection> recor final Event recordEvent = record.getData(); final String groupsRaw = recordEvent.get(keyValueProcessorConfig.getSource(), String.class); + + if (keyValueProcessorConfig.getRecursive()) { + ObjectMapper mapper = new ObjectMapper(); + JsonNode result = recurse(groupsRaw); + } + final String[] groups = fieldDelimiterPattern.split(groupsRaw, 0); for(final String group : groups) { @@ -254,6 +274,35 @@ public Collection> doExecute(final Collection> recor return records; } + private static JsonNode recurse(String rawStringInput, ObjectMapper mapper) { + ObjectNode root = mapper.createObjectNode(); + String[] pairs = rawStringInput.split("&"); // KeyValueProcessorConfig.getFieldSplitCharacters + + for (String pair : pairs) { + String[] kv = pair.split("="); // KeyValueProcessorConfig.getValueSplitCharacters + String key = kv[0]; + String val = kv[1]; + + if (val.startsWith("[") && val.endsWith("]")) { + String nested = val.substring(1, val.length() - 1); + JsonNode nestedNode = recurse(nested, mapper); + root.set(key, nestedNode); + } else if (val.startsWith("(") && val.endsWith(")")) { + String nested = val.substring(1, val.length() - 1); + JsonNode nestedNode = recurse(nested, mapper); + root.set(key, nestedNode); + } else if (val.startsWith("<") && val.endsWith(">")) { + String nested = val.substring(1, val.length() - 1); + JsonNode nestedNode = recurse(nested, mapper); + root.set(key, nestedNode); + } else { + root.put(key, val); + } + } + + return root; + } + private String[] trimWhitespace(String key, Object value) { String[] arr = {key.stripTrailing(), value.toString().stripLeading()}; return arr; From d003e461d9c7e625bd99e89e42aa0147b7b9a6d5 Mon Sep 17 00:00:00 2001 From: Kat Shen Date: Mon, 14 Aug 2023 23:14:37 +0000 Subject: [PATCH 04/13] resolve errors Signed-off-by: Kat Shen --- .../plugins/processor/keyvalue/KeyValueProcessor.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java b/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java index 36a169ec7b..72cdedc1dd 100644 --- a/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java +++ b/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java @@ -16,7 +16,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; import java.util.Collection; import java.util.HashMap; @@ -142,8 +144,6 @@ public KeyValueProcessor(final PluginMetrics pluginMetrics, final KeyValueProces if (keyValueProcessorConfig.getRemoveBrackets() && keyValueProcessorConfig.getRecursive()) { throw new IllegalArgumentException("Cannot remove brackets needed for determining levels of recursion"); } - - initRecursiveMap(recursiveBracketMap); } private String buildRegexFromCharacters(String s) { @@ -200,7 +200,7 @@ public Collection> doExecute(final Collection> recor if (keyValueProcessorConfig.getRecursive()) { ObjectMapper mapper = new ObjectMapper(); - JsonNode result = recurse(groupsRaw); + JsonNode result = recurse(groupsRaw, mapper); } final String[] groups = fieldDelimiterPattern.split(groupsRaw, 0); From 3a90c69c31f6423ba7f68fa2f6072a928223dc6f Mon Sep 17 00:00:00 2001 From: Kat Shen Date: Wed, 16 Aug 2023 16:45:18 +0000 Subject: [PATCH 05/13] inner string parse logic done, working on splitter logic Signed-off-by: Kat Shen --- .../processor/keyvalue/KeyValueProcessor.java | 85 +++++++++++++------ 1 file changed, 59 insertions(+), 26 deletions(-) diff --git a/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java b/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java index 72cdedc1dd..f72ff4c97c 100644 --- a/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java +++ b/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java @@ -31,6 +31,7 @@ import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; import java.util.regex.Matcher; +import java.util.Stack; @DataPrepperPlugin(name = "key_value", pluginType = Processor.class, pluginConfigurationType = KeyValueProcessorConfig.class) public class KeyValueProcessor extends AbstractProcessor, Record> { @@ -51,6 +52,8 @@ public class KeyValueProcessor extends AbstractProcessor, Record validWhitespaceSet = Set.of(whitespaceLenient, whitespaceStrict); + private final List openBrackets = new ArrayList(); + private final List closedBrackets = new ArrayList(); @DataPrepperPluginConstructor public KeyValueProcessor(final PluginMetrics pluginMetrics, final KeyValueProcessorConfig keyValueProcessorConfig) { @@ -197,13 +200,20 @@ public Collection> doExecute(final Collection> recor final Event recordEvent = record.getData(); final String groupsRaw = recordEvent.get(keyValueProcessorConfig.getSource(), String.class); + final String[] groups = fieldDelimiterPattern.split(groupsRaw, 0); if (keyValueProcessorConfig.getRecursive()) { ObjectMapper mapper = new ObjectMapper(); JsonNode result = recurse(groupsRaw, mapper); - } + Map recursedMap = treeToMap(mapper, result); + // recordEvent.put(keyValueProcessorConfig.getDestination(), recursedMap); - final String[] groups = fieldDelimiterPattern.split(groupsRaw, 0); + for (String name: recursedMap.keySet()) { + String key = name.toString(); + String value = recursedMap.get(name).toString(); + System.out.println(key + " " + value); + } + } for(final String group : groups) { final String[] terms = keyValueDelimiterPattern.split(group, 2); @@ -274,33 +284,56 @@ public Collection> doExecute(final Collection> recor return records; } - private static JsonNode recurse(String rawStringInput, ObjectMapper mapper) { - ObjectNode root = mapper.createObjectNode(); - String[] pairs = rawStringInput.split("&"); // KeyValueProcessorConfig.getFieldSplitCharacters - - for (String pair : pairs) { - String[] kv = pair.split("="); // KeyValueProcessorConfig.getValueSplitCharacters - String key = kv[0]; - String val = kv[1]; - - if (val.startsWith("[") && val.endsWith("]")) { - String nested = val.substring(1, val.length() - 1); - JsonNode nestedNode = recurse(nested, mapper); - root.set(key, nestedNode); - } else if (val.startsWith("(") && val.endsWith(")")) { - String nested = val.substring(1, val.length() - 1); - JsonNode nestedNode = recurse(nested, mapper); - root.set(key, nestedNode); - } else if (val.startsWith("<") && val.endsWith(">")) { - String nested = val.substring(1, val.length() - 1); - JsonNode nestedNode = recurse(nested, mapper); - root.set(key, nestedNode); - } else { - root.put(key, val); + private static JsonNode recurse(String input) { + Stack bracketStack = new Stack(); + initBracketLists(); + int innerStart = -1; + int innerEnd = -1; + + for (int i = 0; i < input.length(); i++) { + if (input[i] == "=" && bracketStack.isEmpty()) { // change to config variable + // do some splitting + // save key name into variable, need to keep track of beginning and end of key + } + + if (input[i] == "&" && bracketStack.isEmpty()) { + // do some splitting + // save individual pairs in some sort of data structure + } + + if openBrackets.contains(input[i]) { + if (bracketStack.isEmpty()) { + innerStart = i; + } + bracketStack.push(input[i]); + } + + if (closedBrackets.contains(input[i]) && !bracketStack.isEmpty()) { + if (bracketStack.peek() == openBrackets[closedBrackets.indexOf(input[i])]) { + innerEnd = i; + bracketStack.pop(); + } + } + + if (innerStart > -1 && innerEnd > -1) { + recurse(input.substring(innerStart + 1, innerEnd - 1)); } } + } + + private void initBracketLists() { + openBrackets.add("["); + openBrackets.add("(", ")"); + openBrackets.add("<", ">"); + + closedBrackets.add("]"); + closedBrackets.add(")"); + closedBrackets.add(">"); + } - return root; + private Map treeToMap(ObjectMapper mapper, JsonNode root) { + Map map = mapper.convertValue(root, Map.class); + return map; } private String[] trimWhitespace(String key, Object value) { From c5bbabaf1a63bf1b9c48b3c4a68f9c32e8e821ed Mon Sep 17 00:00:00 2001 From: Kat Shen Date: Thu, 17 Aug 2023 19:29:03 +0000 Subject: [PATCH 06/13] write recursive implementation and reorganize code for clarity, fixing bugs with recursing Signed-off-by: Kat Shen --- .../processor/keyvalue/KeyValueProcessor.java | 244 +++++++++++------- 1 file changed, 150 insertions(+), 94 deletions(-) diff --git a/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java b/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java index f72ff4c97c..7f57d0de86 100644 --- a/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java +++ b/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java @@ -19,6 +19,7 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ObjectNode; +import com.fasterxml.jackson.core.type.TypeReference; import java.util.Collection; import java.util.HashMap; @@ -26,12 +27,14 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.LinkedHashMap; import java.util.Objects; import java.util.Set; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; import java.util.regex.Matcher; import java.util.Stack; +import java.util.ArrayList; @DataPrepperPlugin(name = "key_value", pluginType = Processor.class, pluginConfigurationType = KeyValueProcessorConfig.class) public class KeyValueProcessor extends AbstractProcessor, Record> { @@ -52,8 +55,9 @@ public class KeyValueProcessor extends AbstractProcessor, Record validWhitespaceSet = Set.of(whitespaceLenient, whitespaceStrict); - private final List openBrackets = new ArrayList(); - private final List closedBrackets = new ArrayList(); + private static HashMap bracketMap = new HashMap<>(); + private HashMap nonRecursedMap = new LinkedHashMap<>(); + private HashMap recursedMap = new LinkedHashMap<>(); @DataPrepperPluginConstructor public KeyValueProcessor(final PluginMetrics pluginMetrics, final KeyValueProcessorConfig keyValueProcessorConfig) { @@ -200,135 +204,187 @@ public Collection> doExecute(final Collection> recor final Event recordEvent = record.getData(); final String groupsRaw = recordEvent.get(keyValueProcessorConfig.getSource(), String.class); - final String[] groups = fieldDelimiterPattern.split(groupsRaw, 0); if (keyValueProcessorConfig.getRecursive()) { ObjectMapper mapper = new ObjectMapper(); - JsonNode result = recurse(groupsRaw, mapper); - Map recursedMap = treeToMap(mapper, result); - // recordEvent.put(keyValueProcessorConfig.getDestination(), recursedMap); - - for (String name: recursedMap.keySet()) { - String key = name.toString(); - String value = recursedMap.get(name).toString(); - System.out.println(key + " " + value); - } + JsonNode recursedTree = recurse(groupsRaw, mapper); + createRecursedMap(recursedTree, mapper); + executeConfigs(recursedMap, parsedMap); + } else { + final String[] groups = fieldDelimiterPattern.split(groupsRaw, 0); + createNonRecursedMap(groups); + executeConfigs(nonRecursedMap, parsedMap); } - for(final String group : groups) { - final String[] terms = keyValueDelimiterPattern.split(group, 2); - String key = terms[0]; - Object value; - - if (!includeKeysSet.isEmpty() && !includeKeysSet.contains(key)) { - LOG.debug("Skipping not included key: '{}'", key); + for (Map.Entry pair : defaultValuesMap.entrySet()) { + if (parsedMap.containsKey(pair.getKey())) { + LOG.debug("Skipping already included default key: '{}'", pair.getKey()); continue; } + parsedMap.put(pair.getKey(), pair.getValue()); + } - if (excludeKeysSet.contains(key)) { - LOG.debug("Key is being excluded: '{}'", key); - continue; - } + recordEvent.put(keyValueProcessorConfig.getDestination(), parsedMap); + } - if(keyValueProcessorConfig.getDeleteKeyRegex() != null && !Objects.equals(keyValueProcessorConfig.getDeleteKeyRegex(), "")) { - key = key.replaceAll(keyValueProcessorConfig.getDeleteKeyRegex(), ""); - } - key = keyValueProcessorConfig.getPrefix() + key; + return records; + } - if (terms.length == 2) { - value = terms[1]; - } else { - LOG.debug("Unsuccessful match: '{}'", terms[0]); - value = keyValueProcessorConfig.getNonMatchValue(); - } + private ObjectNode recurse(String input, ObjectMapper mapper) { + Stack bracketStack = new Stack(); + initBracketMap(); + int pairStart = 0; - if(value != null - && value instanceof String - && keyValueProcessorConfig.getDeleteValueRegex() != null - && !Objects.equals(keyValueProcessorConfig.getDeleteValueRegex(), "")) { - value = ((String)value).replaceAll(keyValueProcessorConfig.getDeleteValueRegex(), ""); - } + ArrayList pairs = new ArrayList(); - if (keyValueProcessorConfig.getWhitespace().equals(whitespaceStrict)) { - String[] whitespace_arr = trimWhitespace(key, value); - key = whitespace_arr[0]; - value = whitespace_arr[1]; - } + // create empty root node + ObjectNode root = mapper.createObjectNode(); - if (keyValueProcessorConfig.getTransformKey() != null - && !keyValueProcessorConfig.getTransformKey().isEmpty()) { - key = transformKey(key); - } + for (int i = 0; i < input.length(); i++) { + if (bracketMap.containsKey(input.charAt(i))) { // open bracket + bracketStack.push(input.charAt(i)); + } - if (keyValueProcessorConfig.getRemoveBrackets()) { - final String bracketRegex = "[\\[\\]()<>]"; - if (value != null) { - value = value.toString().replaceAll(bracketRegex,""); - } + if (bracketMap.containsValue(input.charAt(i)) && !bracketStack.isEmpty()) { // closed bracket + if (bracketMap.get(bracketStack.peek()) == input.charAt(i)) { // check if brackets are matched + bracketStack.pop(); } + } - addKeyValueToMap(parsedMap, key, value); + if (bracketStack.isEmpty() && input.charAt(i) == '&') { // config variable + // save pairs in array + String pair = input.substring(pairStart, i - 1); + pairs.add(pair); + pairStart = i + 1; } + } - for (Map.Entry pair : defaultValuesMap.entrySet()) { - if (parsedMap.containsKey(pair.getKey())) { - LOG.debug("Skipping already included default key: '{}'", pair.getKey()); - continue; + // handle last pair case after parsing thru input and there are no more splitters + int pairEnd = input.length(); + pairs.add(input.substring(pairStart, pairEnd - 1)); + + for (final String pair : pairs) { + int keyStart = 0; + int keyEnd = -1; + int valueStart = -1; + int valueEnd = -1; + String keyString = ""; + String valueString; + + bracketStack.clear(); + + for (int i = 0; i < pair.length(); i++) { // search for kv splitter + if (bracketStack.isEmpty() && pair.charAt(i) == '=') { // change to config variable + keyString = pair.substring(keyStart, i - 1); + valueStart = i + 1; + break; } - parsedMap.put(pair.getKey(), pair.getValue()); } - recordEvent.put(keyValueProcessorConfig.getDestination(), parsedMap); + if (keyString.isBlank()) { + // handle nonmatch value + keyString = pair; + LOG.debug("Unsuccessful match: '{}'", keyString); + valueString = keyValueProcessorConfig.getNonMatchValue().toString(); + } else if (bracketMap.containsKey(pair.charAt(valueStart))) { // nested content + bracketStack.push(pair.charAt(valueStart)); + valueStart++; + + for (int i = valueStart + 1; i < pair.length(); i++) { + if (bracketMap.containsValue(pair.charAt(i))) { + if (bracketMap.get(bracketStack.peek()) == pair.charAt(i)) { // brackets match, set up for recursion + valueEnd = i; + bracketStack.pop(); + valueString = pair.substring(valueStart, valueEnd - 1); + JsonNode child = ((ObjectNode) root).put(keyString, recurse(valueString, mapper)); + } + } + } + } else { // no nested content + valueEnd = pair.length(); + valueString = pair.substring(valueStart, valueEnd - 1); + + ObjectNode child = ((ObjectNode)root).put(keyString, valueString); + } } - return records; + return root; } - private static JsonNode recurse(String input) { - Stack bracketStack = new Stack(); - initBracketLists(); - int innerStart = -1; - int innerEnd = -1; + private static void initBracketMap() { + bracketMap.put('[', ']'); + bracketMap.put('(', ')'); + bracketMap.put('<', '>'); + } - for (int i = 0; i < input.length(); i++) { - if (input[i] == "=" && bracketStack.isEmpty()) { // change to config variable - // do some splitting - // save key name into variable, need to keep track of beginning and end of key + private void createRecursedMap(JsonNode node, ObjectMapper mapper) { + recursedMap = mapper.convertValue(node, new TypeReference>() {}); + } + + private void createNonRecursedMap(String[] groups) { + for(final String group : groups) { + final String[] terms = keyValueDelimiterPattern.split(group, 2); + String key = terms[0]; + Object value; + + if (terms.length == 2) { + value = terms[1]; + } else { + LOG.debug("Unsuccessful match: '{}'", terms[0]); + value = keyValueProcessorConfig.getNonMatchValue(); } - if (input[i] == "&" && bracketStack.isEmpty()) { - // do some splitting - // save individual pairs in some sort of data structure + nonRecursedMap.put(key, value); + } + } + + private void executeConfigs(HashMap map, Map parsed) { + for (Map.Entry entry : map.entrySet()) { + String key = entry.getKey(); + Object value = entry.getValue(); + + if (!includeKeysSet.isEmpty() && !includeKeysSet.contains(key)) { + LOG.debug("Skipping not included key: '{}'", key); + continue; } - if openBrackets.contains(input[i]) { - if (bracketStack.isEmpty()) { - innerStart = i; - } - bracketStack.push(input[i]); + if (excludeKeysSet.contains(key)) { + LOG.debug("Key is being excluded: '{}'", key); + continue; } - if (closedBrackets.contains(input[i]) && !bracketStack.isEmpty()) { - if (bracketStack.peek() == openBrackets[closedBrackets.indexOf(input[i])]) { - innerEnd = i; - bracketStack.pop(); - } + if(keyValueProcessorConfig.getDeleteKeyRegex() != null && !Objects.equals(keyValueProcessorConfig.getDeleteKeyRegex(), "")) { + key = key.replaceAll(keyValueProcessorConfig.getDeleteKeyRegex(), ""); } + key = keyValueProcessorConfig.getPrefix() + key; - if (innerStart > -1 && innerEnd > -1) { - recurse(input.substring(innerStart + 1, innerEnd - 1)); + if(value != null + && value instanceof String + && keyValueProcessorConfig.getDeleteValueRegex() != null + && !Objects.equals(keyValueProcessorConfig.getDeleteValueRegex(), "")) { + value = ((String)value).replaceAll(keyValueProcessorConfig.getDeleteValueRegex(), ""); } - } - } - private void initBracketLists() { - openBrackets.add("["); - openBrackets.add("(", ")"); - openBrackets.add("<", ">"); + if (keyValueProcessorConfig.getWhitespace().equals(whitespaceStrict)) { + String[] whitespace_arr = trimWhitespace(key, value); + key = whitespace_arr[0]; + value = whitespace_arr[1]; + } + + if (keyValueProcessorConfig.getTransformKey() != null + && !keyValueProcessorConfig.getTransformKey().isEmpty()) { + key = transformKey(key); + } - closedBrackets.add("]"); - closedBrackets.add(")"); - closedBrackets.add(">"); + if (keyValueProcessorConfig.getRemoveBrackets()) { + final String bracketRegex = "[\\[\\]()<>]"; + if (value != null) { + value = value.toString().replaceAll(bracketRegex,""); + } + } + + addKeyValueToMap(parsed, key, value); + } } private Map treeToMap(ObjectMapper mapper, JsonNode root) { From 2f0ccbac98f569124bf81b6d30a766aa5c61ac98 Mon Sep 17 00:00:00 2001 From: Kat Shen Date: Thu, 17 Aug 2023 21:26:06 +0000 Subject: [PATCH 07/13] basic implementation done and working, cleaning code and testing edge cases Signed-off-by: Kat Shen --- .../processor/keyvalue/KeyValueProcessor.java | 47 ++++++++++--------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java b/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java index 7f57d0de86..5ceb51912e 100644 --- a/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java +++ b/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java @@ -55,9 +55,6 @@ public class KeyValueProcessor extends AbstractProcessor, Record validWhitespaceSet = Set.of(whitespaceLenient, whitespaceStrict); - private static HashMap bracketMap = new HashMap<>(); - private HashMap nonRecursedMap = new LinkedHashMap<>(); - private HashMap recursedMap = new LinkedHashMap<>(); @DataPrepperPluginConstructor public KeyValueProcessor(final PluginMetrics pluginMetrics, final KeyValueProcessorConfig keyValueProcessorConfig) { @@ -202,18 +199,18 @@ public Collection> doExecute(final Collection> recor for(final Record record : records) { final Map parsedMap = new HashMap<>(); final Event recordEvent = record.getData(); - final String groupsRaw = recordEvent.get(keyValueProcessorConfig.getSource(), String.class); + final Map outputMap; if (keyValueProcessorConfig.getRecursive()) { ObjectMapper mapper = new ObjectMapper(); JsonNode recursedTree = recurse(groupsRaw, mapper); - createRecursedMap(recursedTree, mapper); - executeConfigs(recursedMap, parsedMap); + outputMap = createRecursedMap(recursedTree, mapper); + executeConfigs(outputMap, parsedMap); } else { final String[] groups = fieldDelimiterPattern.split(groupsRaw, 0); - createNonRecursedMap(groups); - executeConfigs(nonRecursedMap, parsedMap); + outputMap = createNonRecursedMap(groups); + executeConfigs(outputMap, parsedMap); } for (Map.Entry pair : defaultValuesMap.entrySet()) { @@ -232,12 +229,10 @@ public Collection> doExecute(final Collection> recor private ObjectNode recurse(String input, ObjectMapper mapper) { Stack bracketStack = new Stack(); - initBracketMap(); + Map bracketMap = initBracketMap(); int pairStart = 0; ArrayList pairs = new ArrayList(); - - // create empty root node ObjectNode root = mapper.createObjectNode(); for (int i = 0; i < input.length(); i++) { @@ -253,15 +248,14 @@ private ObjectNode recurse(String input, ObjectMapper mapper) { if (bracketStack.isEmpty() && input.charAt(i) == '&') { // config variable // save pairs in array - String pair = input.substring(pairStart, i - 1); + String pair = input.substring(pairStart, i); pairs.add(pair); pairStart = i + 1; } } // handle last pair case after parsing thru input and there are no more splitters - int pairEnd = input.length(); - pairs.add(input.substring(pairStart, pairEnd - 1)); + pairs.add(input.substring(pairStart)); for (final String pair : pairs) { int keyStart = 0; @@ -275,7 +269,7 @@ private ObjectNode recurse(String input, ObjectMapper mapper) { for (int i = 0; i < pair.length(); i++) { // search for kv splitter if (bracketStack.isEmpty() && pair.charAt(i) == '=') { // change to config variable - keyString = pair.substring(keyStart, i - 1); + keyString = pair.substring(keyStart, i); valueStart = i + 1; break; } @@ -295,14 +289,13 @@ private ObjectNode recurse(String input, ObjectMapper mapper) { if (bracketMap.get(bracketStack.peek()) == pair.charAt(i)) { // brackets match, set up for recursion valueEnd = i; bracketStack.pop(); - valueString = pair.substring(valueStart, valueEnd - 1); + valueString = pair.substring(valueStart, valueEnd); JsonNode child = ((ObjectNode) root).put(keyString, recurse(valueString, mapper)); } } } } else { // no nested content - valueEnd = pair.length(); - valueString = pair.substring(valueStart, valueEnd - 1); + valueString = pair.substring(valueStart); ObjectNode child = ((ObjectNode)root).put(keyString, valueString); } @@ -311,17 +304,23 @@ private ObjectNode recurse(String input, ObjectMapper mapper) { return root; } - private static void initBracketMap() { + private static Map initBracketMap() { + Map bracketMap = new HashMap<>(); + bracketMap.put('[', ']'); bracketMap.put('(', ')'); bracketMap.put('<', '>'); + + return bracketMap; } - private void createRecursedMap(JsonNode node, ObjectMapper mapper) { - recursedMap = mapper.convertValue(node, new TypeReference>() {}); + private Map createRecursedMap(JsonNode node, ObjectMapper mapper) { + return mapper.convertValue(node, new TypeReference>() {}); } - private void createNonRecursedMap(String[] groups) { + private Map createNonRecursedMap(String[] groups) { + Map nonRecursedMap = new LinkedHashMap<>(); + for(final String group : groups) { final String[] terms = keyValueDelimiterPattern.split(group, 2); String key = terms[0]; @@ -336,9 +335,11 @@ private void createNonRecursedMap(String[] groups) { nonRecursedMap.put(key, value); } + + return nonRecursedMap; } - private void executeConfigs(HashMap map, Map parsed) { + private void executeConfigs(Map map, Map parsed) { for (Map.Entry entry : map.entrySet()) { String key = entry.getKey(); Object value = entry.getValue(); From df0a26daf630b0b5e4900897b8d95d558b824a18 Mon Sep 17 00:00:00 2001 From: Kat Shen Date: Fri, 18 Aug 2023 22:22:49 +0000 Subject: [PATCH 08/13] resolve duplicate value test failures and add basic recursive test Signed-off-by: Kat Shen --- .../key-value-processor/README.md | 5 +- .../processor/keyvalue/KeyValueProcessor.java | 114 +++++++++++++----- .../keyvalue/KeyValueProcessorTests.java | 23 +++- 3 files changed, 109 insertions(+), 33 deletions(-) diff --git a/data-prepper-plugins/key-value-processor/README.md b/data-prepper-plugins/key-value-processor/README.md index 0a780fe1f5..4ec57b68ec 100644 --- a/data-prepper-plugins/key-value-processor/README.md +++ b/data-prepper-plugins/key-value-processor/README.md @@ -49,9 +49,9 @@ When run, the processor will parse the message into the following output: * Default: `{}` * Example: `default_values` is `{"defaultkey": "defaultvalue"}`. `key1=value1` will parse into `{"key1": "value1", "defaultkey": "defaultvalue"}` * If the default key already exists in the message, the value is not changed. - * Example: `default_values` is `{"value1": "abc"}`. `key1=value1` will parse into `{"key1": "value1"}` + * Example: `default_values` is `{"key1": "abc"}`. `key1=value1` will parse into `{"key1": "value1"}` * It should be noted that the include_keys filter will be applied to the message first, and then default keys. - * Example: `include_keys` is `["key1"]`, and `default_keys` is `{"key2": "value2"}`. `key1=value1&key2=abc` will parse into `{"key1": "value1", "key2": "value2"}` + * Example: `include_keys` is `["key1"]`, and `default_values` is `{"key2": "value2"}`. `key1=value1&key2=abc` will parse into `{"key1": "value1", "key2": "value2"}` * `key_value_delimiter_regex` - A regex specifying the delimiter between a key and a value. Special regex characters such as `[` and `]` must be escaped using `\\`. * There is no default. * Note: This cannot be defined at the same time as `value_split_characters` @@ -95,6 +95,7 @@ When run, the processor will parse the message into the following output: * Example: `recursive` is true. `{"item1=[item1-subitem1=item1-subitem1-value&item1-subitem2=(item1-subitem2-subitem2A=item1-subitem2-subitem2A-value&item1-subitem2-subitem2B=item1-subitem2-subitem2B-value)]&item2=item2-value"}` will parse into `"item1": {"item1-subitem1": "item1-subitem1-value", "item1-subitem2": {"item1-subitem2-subitem2A": "item1-subitem2-subitem2A-value", "item1-subitem2-subitem2B": "item1-subitem2-subitem2B-value"}}` * Example: `recursive` is false. `{"item1=[item1-subitem1=item1-subitem1-value&item1-subitem2=(item1-subitem2-subitem2A=item1-subitem2-subitem2A-value&item1-subitem2-subitem2B=item1-subitem2-subitem2B-value)]&item2=item2-value"}` will parse into `"item1-subitem2": "(item1-subitem2-subitem2A=item1-subitem2-subitem2A-value", "item2": "item2-value","item1": "[item1-subitem1=item1-subitem1-value", "item1-subitem2-subitem2B": "item1-subitem2-subitem2B-value)]"` * While `recursive` is `true`, `remove_brackets` cannot also be `true`. + * While `recursive` is `true`, `skip_duplicate_values` will always be `true`. ## Developer Guide This plugin is compatible with Java 14. See diff --git a/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java b/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java index 5ceb51912e..781ecbda71 100644 --- a/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java +++ b/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java @@ -55,6 +55,7 @@ public class KeyValueProcessor extends AbstractProcessor, Record validWhitespaceSet = Set.of(whitespaceLenient, whitespaceStrict); + private final Set bracketSet = Set.of('[', ']', '(', ')', '<', '>'); @DataPrepperPluginConstructor public KeyValueProcessor(final PluginMetrics pluginMetrics, final KeyValueProcessorConfig keyValueProcessorConfig) { @@ -70,6 +71,14 @@ public KeyValueProcessor(final PluginMetrics pluginMetrics, final KeyValueProces throw new PatternSyntaxException("field_delimiter_regex is not a valid regex string", keyValueProcessorConfig.getFieldDelimiterRegex(), -1); } + if (keyValueProcessorConfig.getRecursive()) { + for (char c : keyValueProcessorConfig.getFieldDelimiterRegex().toCharArray()) { + if (bracketSet.contains(String.valueOf(c))) { + throw new IllegalArgumentException("The set field delimiter regex cannot contain brackets while you are trying to recurse."); + } + } + } + fieldDelimiterPattern = Pattern.compile(keyValueProcessorConfig.getFieldDelimiterRegex()); } else { String regex; @@ -79,6 +88,14 @@ public KeyValueProcessor(final PluginMetrics pluginMetrics, final KeyValueProces regex = buildRegexFromCharacters(keyValueProcessorConfig.getFieldSplitCharacters()); } + if (keyValueProcessorConfig.getRecursive()) { + for (char c : regex.toCharArray()) { + if (bracketSet.contains(String.valueOf(c))) { + throw new IllegalArgumentException("The set field split characters cannot contain brackets while you are trying to recurse."); + } + } + } + fieldDelimiterPattern = Pattern.compile(regex); } @@ -91,6 +108,14 @@ public KeyValueProcessor(final PluginMetrics pluginMetrics, final KeyValueProces throw new PatternSyntaxException("key_value_delimiter_regex is not a valid regex string", keyValueProcessorConfig.getKeyValueDelimiterRegex(), -1); } + if (keyValueProcessorConfig.getRecursive()) { + for (char c : keyValueProcessorConfig.getKeyValueDelimiterRegex().toCharArray()) { + if (bracketSet.contains(String.valueOf(c))) { + throw new IllegalArgumentException("The set key value delimiter regex cannot contain brackets while you are trying to recurse."); + } + } + } + keyValueDelimiterPattern = Pattern.compile(keyValueProcessorConfig.getKeyValueDelimiterRegex()); } else { String regex; @@ -100,6 +125,14 @@ public KeyValueProcessor(final PluginMetrics pluginMetrics, final KeyValueProces regex = buildRegexFromCharacters(keyValueProcessorConfig.getValueSplitCharacters()); } + if (keyValueProcessorConfig.getRecursive()) { + for (char c : regex.toCharArray()) { + if (bracketSet.contains(String.valueOf(c))) { + throw new IllegalArgumentException("The set value split characters cannot contain brackets while you are trying to recurse."); + } + } + } + keyValueDelimiterPattern = Pattern.compile(regex); } @@ -197,22 +230,26 @@ private void validateKeySets(final Set includeSet, final Set exc @Override public Collection> doExecute(final Collection> records) { for(final Record record : records) { + final Map outputMap = new HashMap<>(); final Map parsedMap = new HashMap<>(); final Event recordEvent = record.getData(); final String groupsRaw = recordEvent.get(keyValueProcessorConfig.getSource(), String.class); - final Map outputMap; + final String[] groups = fieldDelimiterPattern.split(groupsRaw, 0); if (keyValueProcessorConfig.getRecursive()) { ObjectMapper mapper = new ObjectMapper(); - JsonNode recursedTree = recurse(groupsRaw, mapper); - outputMap = createRecursedMap(recursedTree, mapper); - executeConfigs(outputMap, parsedMap); + try { + JsonNode recursedTree = recurse(groupsRaw, mapper); + outputMap.putAll(createRecursedMap(recursedTree, mapper)); + } catch (Exception e) { + LOG.error("Recursive parsing ran into an unexpected error, treating message as non-recursive"); + } } else { - final String[] groups = fieldDelimiterPattern.split(groupsRaw, 0); - outputMap = createNonRecursedMap(groups); - executeConfigs(outputMap, parsedMap); + outputMap.putAll(createNonRecursedMap(groups)); } + executeConfigs(outputMap, parsedMap); + for (Map.Entry pair : defaultValuesMap.entrySet()) { if (parsedMap.containsKey(pair.getKey())) { LOG.debug("Skipping already included default key: '{}'", pair.getKey()); @@ -236,25 +273,23 @@ private ObjectNode recurse(String input, ObjectMapper mapper) { ObjectNode root = mapper.createObjectNode(); for (int i = 0; i < input.length(); i++) { - if (bracketMap.containsKey(input.charAt(i))) { // open bracket + if (bracketMap.containsKey(input.charAt(i))) { bracketStack.push(input.charAt(i)); } - if (bracketMap.containsValue(input.charAt(i)) && !bracketStack.isEmpty()) { // closed bracket - if (bracketMap.get(bracketStack.peek()) == input.charAt(i)) { // check if brackets are matched + if (bracketMap.containsValue(input.charAt(i)) && !bracketStack.isEmpty()) { + if (bracketMap.get(bracketStack.peek()) == input.charAt(i)) { bracketStack.pop(); } } - if (bracketStack.isEmpty() && input.charAt(i) == '&') { // config variable - // save pairs in array + if (bracketStack.isEmpty() && input.charAt(i) == fieldDelimiterPattern.toString().charAt(0)) { String pair = input.substring(pairStart, i); pairs.add(pair); - pairStart = i + 1; + pairStart = i + fieldDelimiterPattern.toString().length(); } } - // handle last pair case after parsing thru input and there are no more splitters pairs.add(input.substring(pairStart)); for (final String pair : pairs) { @@ -267,26 +302,25 @@ private ObjectNode recurse(String input, ObjectMapper mapper) { bracketStack.clear(); - for (int i = 0; i < pair.length(); i++) { // search for kv splitter - if (bracketStack.isEmpty() && pair.charAt(i) == '=') { // change to config variable + for (int i = 0; i < pair.length(); i++) { + if (bracketStack.isEmpty() && pair.charAt(i) == keyValueDelimiterPattern.toString().charAt(0)) { keyString = pair.substring(keyStart, i); - valueStart = i + 1; + valueStart = i + keyValueDelimiterPattern.toString().length(); break; } } if (keyString.isBlank()) { - // handle nonmatch value keyString = pair; LOG.debug("Unsuccessful match: '{}'", keyString); valueString = keyValueProcessorConfig.getNonMatchValue().toString(); - } else if (bracketMap.containsKey(pair.charAt(valueStart))) { // nested content + } else if (bracketMap.containsKey(pair.charAt(valueStart))) { bracketStack.push(pair.charAt(valueStart)); valueStart++; for (int i = valueStart + 1; i < pair.length(); i++) { if (bracketMap.containsValue(pair.charAt(i))) { - if (bracketMap.get(bracketStack.peek()) == pair.charAt(i)) { // brackets match, set up for recursion + if (bracketMap.get(bracketStack.peek()) == pair.charAt(i)) { valueEnd = i; bracketStack.pop(); valueString = pair.substring(valueStart, valueEnd); @@ -294,9 +328,8 @@ private ObjectNode recurse(String input, ObjectMapper mapper) { } } } - } else { // no nested content + } else { valueString = pair.substring(valueStart); - ObjectNode child = ((ObjectNode)root).put(keyString, valueString); } } @@ -320,6 +353,7 @@ private Map createRecursedMap(JsonNode node, ObjectMapper mapper private Map createNonRecursedMap(String[] groups) { Map nonRecursedMap = new LinkedHashMap<>(); + List valueList; for(final String group : groups) { final String[] terms = keyValueDelimiterPattern.split(group, 2); @@ -333,7 +367,27 @@ private Map createNonRecursedMap(String[] groups) { value = keyValueProcessorConfig.getNonMatchValue(); } - nonRecursedMap.put(key, value); + if (nonRecursedMap.containsKey(key)) { + Object existingValue = nonRecursedMap.get(key); + + if (existingValue instanceof List) { + valueList = (List) existingValue; + } else { + valueList = new ArrayList(); + valueList.add(existingValue); + nonRecursedMap.put(key, valueList); + } + + if (keyValueProcessorConfig.getSkipDuplicateValues()) { + if (!valueList.contains(value)) { + valueList.add(value); + } + } else { + valueList.add(value); + } + } else { + nonRecursedMap.put(key, value); + } } return nonRecursedMap; @@ -388,11 +442,6 @@ private void executeConfigs(Map map, Map parsed) } } - private Map treeToMap(ObjectMapper mapper, JsonNode root) { - Map map = mapper.convertValue(root, Map.class); - return map; - } - private String[] trimWhitespace(String key, Object value) { String[] arr = {key.stripTrailing(), value.toString().stripLeading()}; return arr; @@ -409,7 +458,14 @@ private String transformKey(String key) { return key; } - private void addKeyValueToMap(final Map parsedMap, final String key, final Object value) { + private void addKeyValueToMap(final Map parsedMap, final String key, Object value) { + if (value instanceof List) { + List valueAsList = (List) value; + if (valueAsList.size() == 1) { + value = valueAsList.get(0); + } + } + if(!parsedMap.containsKey(key)) { parsedMap.put(key, value); return; diff --git a/data-prepper-plugins/key-value-processor/src/test/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessorTests.java b/data-prepper-plugins/key-value-processor/src/test/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessorTests.java index db50b7a403..ea9d24d602 100644 --- a/data-prepper-plugins/key-value-processor/src/test/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessorTests.java +++ b/data-prepper-plugins/key-value-processor/src/test/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessorTests.java @@ -71,6 +71,7 @@ void setup() { lenient().when(mockConfig.getWhitespace()).thenReturn(defaultConfig.getWhitespace()); lenient().when(mockConfig.getSkipDuplicateValues()).thenReturn(defaultConfig.getSkipDuplicateValues()); lenient().when(mockConfig.getRemoveBrackets()).thenReturn(defaultConfig.getRemoveBrackets()); + lenient().when(mockConfig.getRecursive()).thenReturn(defaultConfig.getRecursive()); keyValueProcessor = new KeyValueProcessor(pluginMetrics, mockConfig); } @@ -564,8 +565,6 @@ void testStrictWhitespaceKvProcessor() { @Test void testFalseSkipDuplicateValuesKvProcessor() { - when(mockConfig.getSkipDuplicateValues()).thenReturn(false); - final Record record = getMessage("key1=value1&key1=value1"); final List> editedRecords = (List>) keyValueProcessor.doExecute(Collections.singletonList(record)); final LinkedHashMap parsed_message = getLinkedHashMap(editedRecords); @@ -633,6 +632,26 @@ void testTrueRemoveMultipleBracketsKvProcessor() { assertThatKeyEquals(parsed_message, "key2", "value1value2"); } + @Test + void testBasicRecursiveKvProcessor() { + when(mockConfig.getRecursive()).thenReturn(true); + + final Record record = getMessage("item1=[item1-subitem1=item1-subitem1-value&item1-subitem2=item1-subitem2-value]&item2=item2-value"); + final List> editedRecords = (List>) keyValueProcessor.doExecute(Collections.singletonList(record)); + final LinkedHashMap parsed_message = getLinkedHashMap(editedRecords); + + final Map expectedValueMap = new HashMap<>(); + expectedValueMap.put("item1-subitem1", "item1-subitem1-value"); + expectedValueMap.put("item1-subitem2", "item1-subitem2-value"); + + assertThat(parsed_message.size(), equalTo(2)); + assertThatKeyEquals(parsed_message, "item1", expectedValueMap); + assertThatKeyEquals(parsed_message, "item2", "item2-value"); + } + + // write test for multiple levels of recursion + // write tests for interactions with other features (only applied to top level keys) + @Test void testShutdownIsReady() { assertThat(keyValueProcessor.isReadyForShutdown(), is(true)); From 941a7e815c42f7d2f2f37ea49b2aea8c8e626a91 Mon Sep 17 00:00:00 2001 From: Kat Shen Date: Fri, 18 Aug 2023 23:26:02 +0000 Subject: [PATCH 09/13] write tests and specify configs in regards to recursive Signed-off-by: Kat Shen --- .../key-value-processor/README.md | 2 + .../processor/keyvalue/KeyValueProcessor.java | 11 ++- .../keyvalue/KeyValueProcessorTests.java | 95 ++++++++++++++++++- 3 files changed, 102 insertions(+), 6 deletions(-) diff --git a/data-prepper-plugins/key-value-processor/README.md b/data-prepper-plugins/key-value-processor/README.md index 4ec57b68ec..32c1f0a7e3 100644 --- a/data-prepper-plugins/key-value-processor/README.md +++ b/data-prepper-plugins/key-value-processor/README.md @@ -94,8 +94,10 @@ When run, the processor will parse the message into the following output: * The levels of recursive parsing must be defined by different brackets for each level: `[]`, `()`, and `<>` in this order. * Example: `recursive` is true. `{"item1=[item1-subitem1=item1-subitem1-value&item1-subitem2=(item1-subitem2-subitem2A=item1-subitem2-subitem2A-value&item1-subitem2-subitem2B=item1-subitem2-subitem2B-value)]&item2=item2-value"}` will parse into `"item1": {"item1-subitem1": "item1-subitem1-value", "item1-subitem2": {"item1-subitem2-subitem2A": "item1-subitem2-subitem2A-value", "item1-subitem2-subitem2B": "item1-subitem2-subitem2B-value"}}` * Example: `recursive` is false. `{"item1=[item1-subitem1=item1-subitem1-value&item1-subitem2=(item1-subitem2-subitem2A=item1-subitem2-subitem2A-value&item1-subitem2-subitem2B=item1-subitem2-subitem2B-value)]&item2=item2-value"}` will parse into `"item1-subitem2": "(item1-subitem2-subitem2A=item1-subitem2-subitem2A-value", "item2": "item2-value","item1": "[item1-subitem1=item1-subitem1-value", "item1-subitem2-subitem2B": "item1-subitem2-subitem2B-value)]"` + * Any other configurations specified will only be applied on the OUTER keys. * While `recursive` is `true`, `remove_brackets` cannot also be `true`. * While `recursive` is `true`, `skip_duplicate_values` will always be `true`. + * While `recursive` is `true`, `whitespace` will always be `"lenient"`. ## Developer Guide This plugin is compatible with Java 14. See diff --git a/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java b/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java index 781ecbda71..27c8be224d 100644 --- a/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java +++ b/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java @@ -304,8 +304,11 @@ private ObjectNode recurse(String input, ObjectMapper mapper) { for (int i = 0; i < pair.length(); i++) { if (bracketStack.isEmpty() && pair.charAt(i) == keyValueDelimiterPattern.toString().charAt(0)) { - keyString = pair.substring(keyStart, i); + keyString = pair.substring(keyStart, i).stripTrailing(); valueStart = i + keyValueDelimiterPattern.toString().length(); + while(pair.charAt(valueStart) == ' ') { + valueStart++; + } break; } } @@ -313,7 +316,7 @@ private ObjectNode recurse(String input, ObjectMapper mapper) { if (keyString.isBlank()) { keyString = pair; LOG.debug("Unsuccessful match: '{}'", keyString); - valueString = keyValueProcessorConfig.getNonMatchValue().toString(); + valueString = keyValueProcessorConfig.getNonMatchValue().toString().stripLeading(); } else if (bracketMap.containsKey(pair.charAt(valueStart))) { bracketStack.push(pair.charAt(valueStart)); valueStart++; @@ -323,13 +326,13 @@ private ObjectNode recurse(String input, ObjectMapper mapper) { if (bracketMap.get(bracketStack.peek()) == pair.charAt(i)) { valueEnd = i; bracketStack.pop(); - valueString = pair.substring(valueStart, valueEnd); + valueString = pair.substring(valueStart, valueEnd).stripLeading(); JsonNode child = ((ObjectNode) root).put(keyString, recurse(valueString, mapper)); } } } } else { - valueString = pair.substring(valueStart); + valueString = pair.substring(valueStart).stripLeading(); ObjectNode child = ((ObjectNode)root).put(keyString, valueString); } } diff --git a/data-prepper-plugins/key-value-processor/src/test/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessorTests.java b/data-prepper-plugins/key-value-processor/src/test/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessorTests.java index ea9d24d602..89bddd2589 100644 --- a/data-prepper-plugins/key-value-processor/src/test/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessorTests.java +++ b/data-prepper-plugins/key-value-processor/src/test/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessorTests.java @@ -649,8 +649,99 @@ void testBasicRecursiveKvProcessor() { assertThatKeyEquals(parsed_message, "item2", "item2-value"); } - // write test for multiple levels of recursion - // write tests for interactions with other features (only applied to top level keys) + @Test + void testMultiRecursiveKvProcessor() { + when(mockConfig.getRecursive()).thenReturn(true); + + final Record record = getMessage("item1=[item1-subitem1=(inner1=abc&inner2=xyz)&item1-subitem2=item1-subitem2-value]&item2=item2-value"); + final List> editedRecords = (List>) keyValueProcessor.doExecute(Collections.singletonList(record)); + final LinkedHashMap parsed_message = getLinkedHashMap(editedRecords); + + final Map expectedValueMap = new HashMap<>(); + final Map nestedInnerMap = new HashMap<>(); + + nestedInnerMap.put("inner1", "abc"); + nestedInnerMap.put("inner2", "xyz"); + expectedValueMap.put("item1-subitem1", nestedInnerMap); + expectedValueMap.put("item1-subitem2", "item1-subitem2-value"); + + assertThat(parsed_message.size(), equalTo(2)); + assertThatKeyEquals(parsed_message, "item1", expectedValueMap); + assertThatKeyEquals(parsed_message, "item2", "item2-value"); + } + + @Test + void testTransformKeyRecursiveKvProcessor() { + when(mockConfig.getRecursive()).thenReturn(true); + when(mockConfig.getTransformKey()).thenReturn("capitalize"); + + final Record record = getMessage("item1=[item1-subitem1=item1-subitem1-value&item1-subitem2=item1-subitem2-value]&item2=item2-value"); + final List> editedRecords = (List>) keyValueProcessor.doExecute(Collections.singletonList(record)); + final LinkedHashMap parsed_message = getLinkedHashMap(editedRecords); + + final Map expectedValueMap = new HashMap<>(); + expectedValueMap.put("item1-subitem1", "item1-subitem1-value"); + expectedValueMap.put("item1-subitem2", "item1-subitem2-value"); + + assertThat(parsed_message.size(), equalTo(2)); + assertThatKeyEquals(parsed_message, "ITEM1", expectedValueMap); + assertThatKeyEquals(parsed_message, "ITEM2", "item2-value"); + } + + @Test + void testIncludeInnerKeyRecursiveKvProcessor() { + final List includeKeys = List.of("item1-subitem1"); + when(mockConfig.getRecursive()).thenReturn(true); + when(mockConfig.getIncludeKeys()).thenReturn(includeKeys); + keyValueProcessor = new KeyValueProcessor(pluginMetrics, mockConfig); + + final Record record = getMessage("item1=[item1-subitem1=item1-subitem1-value&item1-subitem2=item1-subitem2-value]&item2=item2-value"); + final List> editedRecords = (List>) keyValueProcessor.doExecute(Collections.singletonList(record)); + final LinkedHashMap parsed_message = getLinkedHashMap(editedRecords); + + assertThat(parsed_message.size(), equalTo(0)); + } + + @Test + void testExcludeInnerKeyRecursiveKvProcessor() { + final List excludeKeys = List.of("item1-subitem1"); + when(mockConfig.getRecursive()).thenReturn(true); + when(mockConfig.getExcludeKeys()).thenReturn(excludeKeys); + keyValueProcessor = new KeyValueProcessor(pluginMetrics, mockConfig); + + final Record record = getMessage("item1=[item1-subitem1=item1-subitem1-value&item1-subitem2=item1-subitem2-value]&item2=item2-value"); + final List> editedRecords = (List>) keyValueProcessor.doExecute(Collections.singletonList(record)); + final LinkedHashMap parsed_message = getLinkedHashMap(editedRecords); + + final Map expectedValueMap = new HashMap<>(); + expectedValueMap.put("item1-subitem1", "item1-subitem1-value"); + expectedValueMap.put("item1-subitem2", "item1-subitem2-value"); + + assertThat(parsed_message.size(), equalTo(2)); + assertThatKeyEquals(parsed_message, "item1", expectedValueMap); + assertThatKeyEquals(parsed_message, "item2", "item2-value"); + } + + @Test + void testDefaultInnerKeyRecursiveKvProcessor() { + final Map defaultMap = Map.of("item1-subitem1", "default"); + when(mockConfig.getRecursive()).thenReturn(true); + when(mockConfig.getDefaultValues()).thenReturn(defaultMap); + keyValueProcessor = new KeyValueProcessor(pluginMetrics, mockConfig); + + final Record record = getMessage("item1=[item1-subitem1=item1-subitem1-value&item1-subitem2=item1-subitem2-value]&item2=item2-value"); + final List> editedRecords = (List>) keyValueProcessor.doExecute(Collections.singletonList(record)); + final LinkedHashMap parsed_message = getLinkedHashMap(editedRecords); + + final Map expectedValueMap = new HashMap<>(); + expectedValueMap.put("item1-subitem1", "item1-subitem1-value"); + expectedValueMap.put("item1-subitem2", "item1-subitem2-value"); + + assertThat(parsed_message.size(), equalTo(3)); + assertThatKeyEquals(parsed_message, "item1", expectedValueMap); + assertThatKeyEquals(parsed_message, "item2", "item2-value"); + assertThatKeyEquals(parsed_message, "item1-subitem1", "default"); + } @Test void testShutdownIsReady() { From 7d4aec7a21733efe938494bfb382611b8ee896ce Mon Sep 17 00:00:00 2001 From: Kat Shen Date: Thu, 24 Aug 2023 19:16:43 +0000 Subject: [PATCH 10/13] switch transform_key config functionality, specify that splitters have to have length = 1, switch bracket check logic to pattern matching Signed-off-by: Kat Shen --- .../key-value-processor/README.md | 6 +- .../processor/keyvalue/KeyValueProcessor.java | 114 ++++++++++-------- .../keyvalue/KeyValueProcessorTests.java | 8 +- 3 files changed, 68 insertions(+), 60 deletions(-) diff --git a/data-prepper-plugins/key-value-processor/README.md b/data-prepper-plugins/key-value-processor/README.md index 32c1f0a7e3..02c01a7c4c 100644 --- a/data-prepper-plugins/key-value-processor/README.md +++ b/data-prepper-plugins/key-value-processor/README.md @@ -74,8 +74,8 @@ When run, the processor will parse the message into the following output: * `transform_key` - Change keys to lowercase, uppercase, or all capitals. * Default is an empty string (no transformation) * Example: `transform_key` is `lowercase`. `{"Key1=value1"}` will parse into `{"key1": "value1"}` - * Example: `transform_key` is `uppercase`. `{"key1=value1"}` will parse into `{"Key1": "value1"}` - * Example: `transform_key` is `capitalize`. `{"key1=value1"}` will parse into `{"KEY1": "value1"}` + * Example: `transform_key` is `capitalize`. `{"key1=value1"}` will parse into `{"Key1": "value1"}` + * Example: `transform_key` is `uppercase`. `{"key1=value1"}` will parse into `{"KEY1": "value1"}` * `whitespace` - Specify whether to be lenient or strict with the acceptance of unnecessary whitespace surrounding the configured value-split sequence. * Default: `lenient` * Example: `whitespace` is `"lenient"`. `{"key1 = value1"}` will parse into `{"key1 ": " value1"}` @@ -97,7 +97,7 @@ When run, the processor will parse the message into the following output: * Any other configurations specified will only be applied on the OUTER keys. * While `recursive` is `true`, `remove_brackets` cannot also be `true`. * While `recursive` is `true`, `skip_duplicate_values` will always be `true`. - * While `recursive` is `true`, `whitespace` will always be `"lenient"`. + * While `recursive` is `true`, `whitespace` will always be `"strict"`. ## Developer Guide This plugin is compatible with Java 14. See diff --git a/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java b/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java index 27c8be224d..d78a6f122b 100644 --- a/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java +++ b/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java @@ -55,6 +55,7 @@ public class KeyValueProcessor extends AbstractProcessor, Record validWhitespaceSet = Set.of(whitespaceLenient, whitespaceStrict); + final String delimiterBracketCheck = "[\\[\\]()<>]"; private final Set bracketSet = Set.of('[', ']', '(', ')', '<', '>'); @DataPrepperPluginConstructor @@ -72,10 +73,11 @@ public KeyValueProcessor(final PluginMetrics pluginMetrics, final KeyValueProces } if (keyValueProcessorConfig.getRecursive()) { - for (char c : keyValueProcessorConfig.getFieldDelimiterRegex().toCharArray()) { - if (bracketSet.contains(String.valueOf(c))) { - throw new IllegalArgumentException("The set field delimiter regex cannot contain brackets while you are trying to recurse."); - } + if (Pattern.compile(keyValueProcessorConfig.getFieldDelimiterRegex()).matcher(delimiterBracketCheck).matches()) { + throw new IllegalArgumentException("The set field delimiter regex cannot contain brackets while you are trying to recurse."); + } + if (keyValueProcessorConfig.getFieldDelimiterRegex().length() != 1) { + throw new IllegalArgumentException("The set field delimiter is limited to one character only."); } } @@ -85,14 +87,17 @@ public KeyValueProcessor(final PluginMetrics pluginMetrics, final KeyValueProces if (keyValueProcessorConfig.getFieldSplitCharacters().isEmpty()) { regex = KeyValueProcessorConfig.DEFAULT_FIELD_SPLIT_CHARACTERS; } else { + if (keyValueProcessorConfig.getRecursive()) { + if (keyValueProcessorConfig.getFieldSplitCharacters().length() != 1) { + throw new IllegalArgumentException("The set field split characters is limited to one character only."); + } + } regex = buildRegexFromCharacters(keyValueProcessorConfig.getFieldSplitCharacters()); } if (keyValueProcessorConfig.getRecursive()) { - for (char c : regex.toCharArray()) { - if (bracketSet.contains(String.valueOf(c))) { - throw new IllegalArgumentException("The set field split characters cannot contain brackets while you are trying to recurse."); - } + if (Pattern.compile(regex).matcher(delimiterBracketCheck).matches()) { + throw new IllegalArgumentException("The set field split characters cannot contain brackets while you are trying to recurse."); } } @@ -109,27 +114,32 @@ public KeyValueProcessor(final PluginMetrics pluginMetrics, final KeyValueProces } if (keyValueProcessorConfig.getRecursive()) { - for (char c : keyValueProcessorConfig.getKeyValueDelimiterRegex().toCharArray()) { - if (bracketSet.contains(String.valueOf(c))) { - throw new IllegalArgumentException("The set key value delimiter regex cannot contain brackets while you are trying to recurse."); - } + if (Pattern.compile(keyValueProcessorConfig.getKeyValueDelimiterRegex()).matcher(delimiterBracketCheck).matches()) { + throw new IllegalArgumentException("The set key value delimiter regex cannot contain brackets while you are trying to recurse."); + } + if (keyValueProcessorConfig.getKeyValueDelimiterRegex().length() != 1) { + throw new IllegalArgumentException("The set key value delimiter regex is limited to one character only."); } } keyValueDelimiterPattern = Pattern.compile(keyValueProcessorConfig.getKeyValueDelimiterRegex()); } else { String regex; - if(keyValueProcessorConfig.getValueSplitCharacters().isEmpty()) { + if (keyValueProcessorConfig.getValueSplitCharacters().isEmpty()) { regex = KeyValueProcessorConfig.DEFAULT_VALUE_SPLIT_CHARACTERS; } else { + if (keyValueProcessorConfig.getRecursive()) { + if (keyValueProcessorConfig.getValueSplitCharacters().length() != 1) { + throw new IllegalArgumentException("The set value split characters is limited to one character only."); + } + } + regex = buildRegexFromCharacters(keyValueProcessorConfig.getValueSplitCharacters()); } if (keyValueProcessorConfig.getRecursive()) { - for (char c : regex.toCharArray()) { - if (bracketSet.contains(String.valueOf(c))) { - throw new IllegalArgumentException("The set value split characters cannot contain brackets while you are trying to recurse."); - } + if (Pattern.compile(regex).matcher(delimiterBracketCheck).matches()) { + throw new IllegalArgumentException("The set value split characters cannot contain brackets while you are trying to recurse."); } } @@ -231,7 +241,6 @@ private void validateKeySets(final Set includeSet, final Set exc public Collection> doExecute(final Collection> records) { for(final Record record : records) { final Map outputMap = new HashMap<>(); - final Map parsedMap = new HashMap<>(); final Event recordEvent = record.getData(); final String groupsRaw = recordEvent.get(keyValueProcessorConfig.getSource(), String.class); final String[] groups = fieldDelimiterPattern.split(groupsRaw, 0); @@ -248,17 +257,9 @@ public Collection> doExecute(final Collection> recor outputMap.putAll(createNonRecursedMap(groups)); } - executeConfigs(outputMap, parsedMap); - - for (Map.Entry pair : defaultValuesMap.entrySet()) { - if (parsedMap.containsKey(pair.getKey())) { - LOG.debug("Skipping already included default key: '{}'", pair.getKey()); - continue; - } - parsedMap.put(pair.getKey(), pair.getValue()); - } + final Map processedMap = executeConfigs(outputMap); - recordEvent.put(keyValueProcessorConfig.getDestination(), parsedMap); + recordEvent.put(keyValueProcessorConfig.getDestination(), processedMap); } return records; @@ -286,7 +287,7 @@ private ObjectNode recurse(String input, ObjectMapper mapper) { if (bracketStack.isEmpty() && input.charAt(i) == fieldDelimiterPattern.toString().charAt(0)) { String pair = input.substring(pairStart, i); pairs.add(pair); - pairStart = i + fieldDelimiterPattern.toString().length(); + pairStart = i + 1; } } @@ -305,7 +306,7 @@ private ObjectNode recurse(String input, ObjectMapper mapper) { for (int i = 0; i < pair.length(); i++) { if (bracketStack.isEmpty() && pair.charAt(i) == keyValueDelimiterPattern.toString().charAt(0)) { keyString = pair.substring(keyStart, i).stripTrailing(); - valueStart = i + keyValueDelimiterPattern.toString().length(); + valueStart = i + 1; while(pair.charAt(valueStart) == ' ') { valueStart++; } @@ -318,19 +319,12 @@ private ObjectNode recurse(String input, ObjectMapper mapper) { LOG.debug("Unsuccessful match: '{}'", keyString); valueString = keyValueProcessorConfig.getNonMatchValue().toString().stripLeading(); } else if (bracketMap.containsKey(pair.charAt(valueStart))) { - bracketStack.push(pair.charAt(valueStart)); - valueStart++; - - for (int i = valueStart + 1; i < pair.length(); i++) { - if (bracketMap.containsValue(pair.charAt(i))) { - if (bracketMap.get(bracketStack.peek()) == pair.charAt(i)) { - valueEnd = i; - bracketStack.pop(); - valueString = pair.substring(valueStart, valueEnd).stripLeading(); - JsonNode child = ((ObjectNode) root).put(keyString, recurse(valueString, mapper)); - } - } - } + if (pair.charAt(pair.length() - 1) == bracketMap.get(pair.charAt(valueStart))) { + valueStart++; + valueEnd = pair.length() - 1; + valueString = pair.substring(valueStart, valueEnd).stripLeading(); + JsonNode child = ((ObjectNode) root).put(keyString, recurse(valueString, mapper)); + } } else { valueString = pair.substring(valueStart).stripLeading(); ObjectNode child = ((ObjectNode)root).put(keyString, valueString); @@ -396,7 +390,9 @@ private Map createNonRecursedMap(String[] groups) { return nonRecursedMap; } - private void executeConfigs(Map map, Map parsed) { + private Map executeConfigs(Map map) { + Map processed = new HashMap<>(); + for (Map.Entry entry : map.entrySet()) { String key = entry.getKey(); Object value = entry.getValue(); @@ -441,8 +437,18 @@ private void executeConfigs(Map map, Map parsed) } } - addKeyValueToMap(parsed, key, value); + addKeyValueToMap(processed, key, value); + } + + for (Map.Entry pair : defaultValuesMap.entrySet()) { + if (processed.containsKey(pair.getKey())) { + LOG.debug("Skipping already included default key: '{}'", pair.getKey()); + continue; + } + processed.put(pair.getKey(), pair.getValue()); } + + return processed; } private String[] trimWhitespace(String key, Object value) { @@ -453,45 +459,47 @@ private String[] trimWhitespace(String key, Object value) { private String transformKey(String key) { if (keyValueProcessorConfig.getTransformKey().equals(lowercaseKey)) { key = key.toLowerCase(); - } else if (keyValueProcessorConfig.getTransformKey().equals(uppercaseKey)) { - key = key.substring(0, 1).toUpperCase() + key.substring(1); } else if (keyValueProcessorConfig.getTransformKey().equals(capitalizeKey)) { + key = key.substring(0, 1).toUpperCase() + key.substring(1); + } else if (keyValueProcessorConfig.getTransformKey().equals(uppercaseKey)) { key = key.toUpperCase(); } return key; } private void addKeyValueToMap(final Map parsedMap, final String key, Object value) { + Object processedValue = value; + if (value instanceof List) { List valueAsList = (List) value; if (valueAsList.size() == 1) { - value = valueAsList.get(0); + processedValue = valueAsList.get(0); } } if(!parsedMap.containsKey(key)) { - parsedMap.put(key, value); + parsedMap.put(key, processedValue); return; } if (parsedMap.get(key) instanceof List) { if (keyValueProcessorConfig.getSkipDuplicateValues()) { - if (((List) parsedMap.get(key)).contains(value)) { + if (((List) parsedMap.get(key)).contains(processedValue)) { return; } } - ((List) parsedMap.get(key)).add(value); + ((List) parsedMap.get(key)).add(processedValue); } else { if (keyValueProcessorConfig.getSkipDuplicateValues()) { - if (parsedMap.containsValue(value)) { + if (parsedMap.containsValue(processedValue)) { return; } } final LinkedList combinedList = new LinkedList<>(); combinedList.add(parsedMap.get(key)); - combinedList.add(value); + combinedList.add(processedValue); parsedMap.replace(key, combinedList); } diff --git a/data-prepper-plugins/key-value-processor/src/test/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessorTests.java b/data-prepper-plugins/key-value-processor/src/test/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessorTests.java index 89bddd2589..38da5beebf 100644 --- a/data-prepper-plugins/key-value-processor/src/test/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessorTests.java +++ b/data-prepper-plugins/key-value-processor/src/test/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessorTests.java @@ -536,7 +536,7 @@ void testUppercaseTransformKvProcessor() { final LinkedHashMap parsed_message = getLinkedHashMap(editedRecords); assertThat(parsed_message.size(), equalTo(1)); - assertThatKeyEquals(parsed_message, "Key1", "value1"); + assertThatKeyEquals(parsed_message, "KEY1", "value1"); } @Test @@ -548,7 +548,7 @@ void testCapitalizeTransformKvProcessor() { final LinkedHashMap parsedMessage = getLinkedHashMap(editedRecords); assertThat(parsedMessage.size(), equalTo(1)); - assertThatKeyEquals(parsedMessage, "KEY1", "value1"); + assertThatKeyEquals(parsedMessage, "Key1", "value1"); } @Test @@ -684,8 +684,8 @@ void testTransformKeyRecursiveKvProcessor() { expectedValueMap.put("item1-subitem2", "item1-subitem2-value"); assertThat(parsed_message.size(), equalTo(2)); - assertThatKeyEquals(parsed_message, "ITEM1", expectedValueMap); - assertThatKeyEquals(parsed_message, "ITEM2", "item2-value"); + assertThatKeyEquals(parsed_message, "Item1", expectedValueMap); + assertThatKeyEquals(parsed_message, "Item2", "item2-value"); } @Test From 4d8f218ca41e0735e8d74e0221240c50885dce37 Mon Sep 17 00:00:00 2001 From: Kat Shen Date: Thu, 24 Aug 2023 22:57:22 +0000 Subject: [PATCH 11/13] clean code Signed-off-by: Kat Shen --- .../processor/keyvalue/KeyValueProcessor.java | 68 +++++++++---------- 1 file changed, 33 insertions(+), 35 deletions(-) diff --git a/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java b/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java index d78a6f122b..14c734af04 100644 --- a/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java +++ b/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java @@ -11,7 +11,6 @@ import org.opensearch.dataprepper.model.event.Event; import org.opensearch.dataprepper.model.processor.AbstractProcessor; import org.opensearch.dataprepper.model.processor.Processor; -import org.opensearch.dataprepper.model.event.JacksonEvent; import org.opensearch.dataprepper.model.record.Record; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -72,36 +71,35 @@ public KeyValueProcessor(final PluginMetrics pluginMetrics, final KeyValueProces throw new PatternSyntaxException("field_delimiter_regex is not a valid regex string", keyValueProcessorConfig.getFieldDelimiterRegex(), -1); } + fieldDelimiterPattern = Pattern.compile(keyValueProcessorConfig.getFieldDelimiterRegex()); + if (keyValueProcessorConfig.getRecursive()) { - if (Pattern.compile(keyValueProcessorConfig.getFieldDelimiterRegex()).matcher(delimiterBracketCheck).matches()) { - throw new IllegalArgumentException("The set field delimiter regex cannot contain brackets while you are trying to recurse."); - } - if (keyValueProcessorConfig.getFieldDelimiterRegex().length() != 1) { - throw new IllegalArgumentException("The set field delimiter is limited to one character only."); + if (fieldDelimiterPattern.matcher(delimiterBracketCheck).matches()) { + throw new IllegalArgumentException("While recursive is true, + the set field delimiter regex cannot contain brackets while you are trying to recurse."); } } - - fieldDelimiterPattern = Pattern.compile(keyValueProcessorConfig.getFieldDelimiterRegex()); } else { String regex; if (keyValueProcessorConfig.getFieldSplitCharacters().isEmpty()) { regex = KeyValueProcessorConfig.DEFAULT_FIELD_SPLIT_CHARACTERS; } else { - if (keyValueProcessorConfig.getRecursive()) { - if (keyValueProcessorConfig.getFieldSplitCharacters().length() != 1) { - throw new IllegalArgumentException("The set field split characters is limited to one character only."); - } + if (keyValueProcessorConfig.getRecursive() + && keyValueProcessorConfig.getFieldSplitCharacters().length() != 1) { + throw new IllegalArgumentException("While recursive is true, + the set field split characters is limited to one character only."); } regex = buildRegexFromCharacters(keyValueProcessorConfig.getFieldSplitCharacters()); } + fieldDelimiterPattern = Pattern.compile(regex); + if (keyValueProcessorConfig.getRecursive()) { - if (Pattern.compile(regex).matcher(delimiterBracketCheck).matches()) { - throw new IllegalArgumentException("The set field split characters cannot contain brackets while you are trying to recurse."); + if (fieldDelimiterPattern.matcher(delimiterBracketCheck).matches()) { + throw new IllegalArgumentException("While recursive is true, + the set field split characters cannot contain brackets while you are trying to recurse."); } } - - fieldDelimiterPattern = Pattern.compile(regex); } if(keyValueProcessorConfig.getKeyValueDelimiterRegex() != null @@ -113,37 +111,36 @@ public KeyValueProcessor(final PluginMetrics pluginMetrics, final KeyValueProces throw new PatternSyntaxException("key_value_delimiter_regex is not a valid regex string", keyValueProcessorConfig.getKeyValueDelimiterRegex(), -1); } + keyValueDelimiterPattern = Pattern.compile(keyValueProcessorConfig.getKeyValueDelimiterRegex()); + if (keyValueProcessorConfig.getRecursive()) { - if (Pattern.compile(keyValueProcessorConfig.getKeyValueDelimiterRegex()).matcher(delimiterBracketCheck).matches()) { - throw new IllegalArgumentException("The set key value delimiter regex cannot contain brackets while you are trying to recurse."); - } - if (keyValueProcessorConfig.getKeyValueDelimiterRegex().length() != 1) { - throw new IllegalArgumentException("The set key value delimiter regex is limited to one character only."); + if (keyValueDelimiterPattern.matcher(delimiterBracketCheck).matches()) { + throw new IllegalArgumentException("While recursive is true, + the set key value delimiter regex cannot contain brackets while you are trying to recurse."); } } - - keyValueDelimiterPattern = Pattern.compile(keyValueProcessorConfig.getKeyValueDelimiterRegex()); } else { String regex; if (keyValueProcessorConfig.getValueSplitCharacters().isEmpty()) { regex = KeyValueProcessorConfig.DEFAULT_VALUE_SPLIT_CHARACTERS; } else { - if (keyValueProcessorConfig.getRecursive()) { - if (keyValueProcessorConfig.getValueSplitCharacters().length() != 1) { - throw new IllegalArgumentException("The set value split characters is limited to one character only."); - } + if (keyValueProcessorConfig.getRecursive() + && keyValueProcessorConfig.getValueSplitCharacters().length() != 1) { + throw new IllegalArgumentException("While recursive is true, + the set value split characters is limited to one character only."); } regex = buildRegexFromCharacters(keyValueProcessorConfig.getValueSplitCharacters()); } + keyValueDelimiterPattern = Pattern.compile(regex); + if (keyValueProcessorConfig.getRecursive()) { - if (Pattern.compile(regex).matcher(delimiterBracketCheck).matches()) { - throw new IllegalArgumentException("The set value split characters cannot contain brackets while you are trying to recurse."); + if (keyValueDelimiterPattern.matcher(delimiterBracketCheck).matches()) { + throw new IllegalArgumentException("While recursive is true, + the set value split characters cannot contain brackets while you are trying to recurse."); } } - - keyValueDelimiterPattern = Pattern.compile(regex); } if (!validateRegex(keyValueProcessorConfig.getDeleteKeyRegex())) { @@ -246,7 +243,7 @@ public Collection> doExecute(final Collection> recor final String[] groups = fieldDelimiterPattern.split(groupsRaw, 0); if (keyValueProcessorConfig.getRecursive()) { - ObjectMapper mapper = new ObjectMapper(); + private final ObjectMapper mapper = new ObjectMapper(); try { JsonNode recursedTree = recurse(groupsRaw, mapper); outputMap.putAll(createRecursedMap(recursedTree, mapper)); @@ -284,7 +281,7 @@ private ObjectNode recurse(String input, ObjectMapper mapper) { } } - if (bracketStack.isEmpty() && input.charAt(i) == fieldDelimiterPattern.toString().charAt(0)) { + if (bracketStack.isEmpty() && fieldDelimiterPattern.matcher(input.charAt(i).matches())) { String pair = input.substring(pairStart, i); pairs.add(pair); pairStart = i + 1; @@ -300,14 +297,15 @@ private ObjectNode recurse(String input, ObjectMapper mapper) { int valueEnd = -1; String keyString = ""; String valueString; + final Character whitespaceChar = ' '; bracketStack.clear(); for (int i = 0; i < pair.length(); i++) { - if (bracketStack.isEmpty() && pair.charAt(i) == keyValueDelimiterPattern.toString().charAt(0)) { + if (bracketStack.isEmpty() && keyValueDelimiterPattern.matcher(pair.charAt(i).matches())) { keyString = pair.substring(keyStart, i).stripTrailing(); valueStart = i + 1; - while(pair.charAt(valueStart) == ' ') { + while(pair.charAt(valueStart) == whitespaceChar) { valueStart++; } break; From c8950373502be1ae68badf8ba73e45a8c62b30ab Mon Sep 17 00:00:00 2001 From: Kat Shen Date: Thu, 24 Aug 2023 23:57:53 +0000 Subject: [PATCH 12/13] fix errors Signed-off-by: Kat Shen --- .../processor/keyvalue/KeyValueProcessor.java | 25 ++++++++----------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java b/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java index 14c734af04..5f27e92512 100644 --- a/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java +++ b/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java @@ -75,8 +75,7 @@ public KeyValueProcessor(final PluginMetrics pluginMetrics, final KeyValueProces if (keyValueProcessorConfig.getRecursive()) { if (fieldDelimiterPattern.matcher(delimiterBracketCheck).matches()) { - throw new IllegalArgumentException("While recursive is true, - the set field delimiter regex cannot contain brackets while you are trying to recurse."); + throw new IllegalArgumentException("While recursive is true, the set field delimiter regex cannot contain brackets while you are trying to recurse."); } } } else { @@ -86,8 +85,7 @@ public KeyValueProcessor(final PluginMetrics pluginMetrics, final KeyValueProces } else { if (keyValueProcessorConfig.getRecursive() && keyValueProcessorConfig.getFieldSplitCharacters().length() != 1) { - throw new IllegalArgumentException("While recursive is true, - the set field split characters is limited to one character only."); + throw new IllegalArgumentException("While recursive is true, the set field split characters is limited to one character only."); } regex = buildRegexFromCharacters(keyValueProcessorConfig.getFieldSplitCharacters()); } @@ -96,8 +94,7 @@ public KeyValueProcessor(final PluginMetrics pluginMetrics, final KeyValueProces if (keyValueProcessorConfig.getRecursive()) { if (fieldDelimiterPattern.matcher(delimiterBracketCheck).matches()) { - throw new IllegalArgumentException("While recursive is true, - the set field split characters cannot contain brackets while you are trying to recurse."); + throw new IllegalArgumentException("While recursive is true, the set field split characters cannot contain brackets while you are trying to recurse."); } } } @@ -115,8 +112,7 @@ public KeyValueProcessor(final PluginMetrics pluginMetrics, final KeyValueProces if (keyValueProcessorConfig.getRecursive()) { if (keyValueDelimiterPattern.matcher(delimiterBracketCheck).matches()) { - throw new IllegalArgumentException("While recursive is true, - the set key value delimiter regex cannot contain brackets while you are trying to recurse."); + throw new IllegalArgumentException("While recursive is true, the set key value delimiter regex cannot contain brackets while you are trying to recurse."); } } } else { @@ -126,8 +122,7 @@ public KeyValueProcessor(final PluginMetrics pluginMetrics, final KeyValueProces } else { if (keyValueProcessorConfig.getRecursive() && keyValueProcessorConfig.getValueSplitCharacters().length() != 1) { - throw new IllegalArgumentException("While recursive is true, - the set value split characters is limited to one character only."); + throw new IllegalArgumentException("While recursive is true, the set value split characters is limited to one character only."); } regex = buildRegexFromCharacters(keyValueProcessorConfig.getValueSplitCharacters()); @@ -137,8 +132,7 @@ public KeyValueProcessor(final PluginMetrics pluginMetrics, final KeyValueProces if (keyValueProcessorConfig.getRecursive()) { if (keyValueDelimiterPattern.matcher(delimiterBracketCheck).matches()) { - throw new IllegalArgumentException("While recursive is true, - the set value split characters cannot contain brackets while you are trying to recurse."); + throw new IllegalArgumentException("While recursive is true, the set value split characters cannot contain brackets while you are trying to recurse."); } } } @@ -236,6 +230,8 @@ private void validateKeySets(final Set includeSet, final Set exc @Override public Collection> doExecute(final Collection> records) { + final ObjectMapper mapper = new ObjectMapper(); + for(final Record record : records) { final Map outputMap = new HashMap<>(); final Event recordEvent = record.getData(); @@ -243,7 +239,6 @@ public Collection> doExecute(final Collection> recor final String[] groups = fieldDelimiterPattern.split(groupsRaw, 0); if (keyValueProcessorConfig.getRecursive()) { - private final ObjectMapper mapper = new ObjectMapper(); try { JsonNode recursedTree = recurse(groupsRaw, mapper); outputMap.putAll(createRecursedMap(recursedTree, mapper)); @@ -281,7 +276,7 @@ private ObjectNode recurse(String input, ObjectMapper mapper) { } } - if (bracketStack.isEmpty() && fieldDelimiterPattern.matcher(input.charAt(i).matches())) { + if (bracketStack.isEmpty() && fieldDelimiterPattern.matcher(String.valueOf(input.charAt(i))).matches()) { String pair = input.substring(pairStart, i); pairs.add(pair); pairStart = i + 1; @@ -302,7 +297,7 @@ private ObjectNode recurse(String input, ObjectMapper mapper) { bracketStack.clear(); for (int i = 0; i < pair.length(); i++) { - if (bracketStack.isEmpty() && keyValueDelimiterPattern.matcher(pair.charAt(i).matches())) { + if (bracketStack.isEmpty() && keyValueDelimiterPattern.matcher(String.valueOf(pair.charAt(i))).matches()) { keyString = pair.substring(keyStart, i).stripTrailing(); valueStart = i + 1; while(pair.charAt(valueStart) == whitespaceChar) { From b4f62f93123a0782964361b6e41c2bb656933852 Mon Sep 17 00:00:00 2001 From: Kat Shen Date: Fri, 25 Aug 2023 19:33:30 +0000 Subject: [PATCH 13/13] fix nits Signed-off-by: Kat Shen --- .../processor/keyvalue/KeyValueProcessor.java | 43 +++++++++---------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java b/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java index 5f27e92512..fb11a3386d 100644 --- a/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java +++ b/data-prepper-plugins/key-value-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java @@ -62,12 +62,12 @@ public KeyValueProcessor(final PluginMetrics pluginMetrics, final KeyValueProces super(pluginMetrics); this.keyValueProcessorConfig = keyValueProcessorConfig; - if(keyValueProcessorConfig.getFieldDelimiterRegex() != null + if (keyValueProcessorConfig.getFieldDelimiterRegex() != null && !keyValueProcessorConfig.getFieldDelimiterRegex().isEmpty()) { - if(keyValueProcessorConfig.getFieldSplitCharacters() != null + if (keyValueProcessorConfig.getFieldSplitCharacters() != null && !keyValueProcessorConfig.getFieldSplitCharacters().isEmpty()) { throw new IllegalArgumentException("field_delimiter_regex and field_split_characters cannot both be defined."); - } else if(!validateRegex(keyValueProcessorConfig.getFieldDelimiterRegex())) { + } else if (!validateRegex(keyValueProcessorConfig.getFieldDelimiterRegex())) { throw new PatternSyntaxException("field_delimiter_regex is not a valid regex string", keyValueProcessorConfig.getFieldDelimiterRegex(), -1); } @@ -92,16 +92,15 @@ public KeyValueProcessor(final PluginMetrics pluginMetrics, final KeyValueProces fieldDelimiterPattern = Pattern.compile(regex); - if (keyValueProcessorConfig.getRecursive()) { - if (fieldDelimiterPattern.matcher(delimiterBracketCheck).matches()) { - throw new IllegalArgumentException("While recursive is true, the set field split characters cannot contain brackets while you are trying to recurse."); - } + if (keyValueProcessorConfig.getRecursive() + && fieldDelimiterPattern.matcher(delimiterBracketCheck).matches()) { + throw new IllegalArgumentException("While recursive is true, the set field split characters cannot contain brackets while you are trying to recurse."); } } - if(keyValueProcessorConfig.getKeyValueDelimiterRegex() != null + if (keyValueProcessorConfig.getKeyValueDelimiterRegex() != null && !keyValueProcessorConfig.getKeyValueDelimiterRegex().isEmpty()) { - if(keyValueProcessorConfig.getValueSplitCharacters() != null + if (keyValueProcessorConfig.getValueSplitCharacters() != null && !keyValueProcessorConfig.getValueSplitCharacters().isEmpty()) { throw new IllegalArgumentException("key_value_delimiter_regex and value_split_characters cannot both be defined."); } else if (!validateRegex(keyValueProcessorConfig.getKeyValueDelimiterRegex())) { @@ -110,10 +109,9 @@ public KeyValueProcessor(final PluginMetrics pluginMetrics, final KeyValueProces keyValueDelimiterPattern = Pattern.compile(keyValueProcessorConfig.getKeyValueDelimiterRegex()); - if (keyValueProcessorConfig.getRecursive()) { - if (keyValueDelimiterPattern.matcher(delimiterBracketCheck).matches()) { - throw new IllegalArgumentException("While recursive is true, the set key value delimiter regex cannot contain brackets while you are trying to recurse."); - } + if (keyValueProcessorConfig.getRecursive() + && keyValueDelimiterPattern.matcher(delimiterBracketCheck).matches()) { + throw new IllegalArgumentException("While recursive is true, the set key value delimiter regex cannot contain brackets while you are trying to recurse."); } } else { String regex; @@ -130,10 +128,9 @@ public KeyValueProcessor(final PluginMetrics pluginMetrics, final KeyValueProces keyValueDelimiterPattern = Pattern.compile(regex); - if (keyValueProcessorConfig.getRecursive()) { - if (keyValueDelimiterPattern.matcher(delimiterBracketCheck).matches()) { - throw new IllegalArgumentException("While recursive is true, the set value split characters cannot contain brackets while you are trying to recurse."); - } + if (keyValueProcessorConfig.getRecursive() + && keyValueDelimiterPattern.matcher(delimiterBracketCheck).matches()) { + throw new IllegalArgumentException("While recursive is true, the set value split characters cannot contain brackets while you are trying to recurse."); } } @@ -188,7 +185,7 @@ private String buildRegexFromCharacters(String s) { char[] splitters = s.toCharArray(); StringBuilder regexedFieldSplitCharacters = new StringBuilder(); for(char c : splitters) { - if(Objects.equals(c, '\\')) { + if (Objects.equals(c, '\\')) { regexedFieldSplitCharacters.append(c); } else { regexedFieldSplitCharacters.append(c).append('|'); @@ -202,7 +199,7 @@ private String buildRegexFromCharacters(String s) { private boolean validateRegex(final String pattern) { - if(pattern != null && !Objects.equals(pattern, "")) { + if (pattern != null && !Objects.equals(pattern, "")) { try { Pattern.compile(pattern); } catch (PatternSyntaxException e) { @@ -257,7 +254,7 @@ public Collection> doExecute(final Collection> recor return records; } - private ObjectNode recurse(String input, ObjectMapper mapper) { + private ObjectNode recurse(final String input, final ObjectMapper mapper) { Stack bracketStack = new Stack(); Map bracketMap = initBracketMap(); int pairStart = 0; @@ -400,12 +397,12 @@ private Map executeConfigs(Map map) { continue; } - if(keyValueProcessorConfig.getDeleteKeyRegex() != null && !Objects.equals(keyValueProcessorConfig.getDeleteKeyRegex(), "")) { + if (keyValueProcessorConfig.getDeleteKeyRegex() != null && !Objects.equals(keyValueProcessorConfig.getDeleteKeyRegex(), "")) { key = key.replaceAll(keyValueProcessorConfig.getDeleteKeyRegex(), ""); } key = keyValueProcessorConfig.getPrefix() + key; - if(value != null + if (value != null && value instanceof String && keyValueProcessorConfig.getDeleteValueRegex() != null && !Objects.equals(keyValueProcessorConfig.getDeleteValueRegex(), "")) { @@ -470,7 +467,7 @@ private void addKeyValueToMap(final Map parsedMap, final String } } - if(!parsedMap.containsKey(key)) { + if (!parsedMap.containsKey(key)) { parsedMap.put(key, processedValue); return; }