Documentation improvements for a number of Data Prepper processors. (#…

…5025) Adds missing enumerations in the key-value processor to support better documentation. Corrects @JsonClassDescription to use HTML rather than Markdown. This set of changes is for key_value, flatten, translate, parse_json, parse_xml, parse_ion, and csv. Also, this adds documentation to the csv input codec. Signed-off-by: David Venable <[email protected]>
opensearch-project · Oct 7, 2024 · af304e6 · af304e6
1 parent 68a8184
commit af304e6
Show file tree

Hide file tree

Showing 15 changed files with 345 additions and 122 deletions.
diff --git a/...ssor/src/main/java/org/opensearch/dataprepper/plugins/codec/csv/CsvOutputCodecConfig.java b/...ssor/src/main/java/org/opensearch/dataprepper/plugins/codec/csv/CsvOutputCodecConfig.java
@@ -6,21 +6,24 @@
 
 import com.fasterxml.jackson.annotation.JsonClassDescription;
 import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonPropertyDescription;
 import com.fasterxml.jackson.annotation.JsonPropertyOrder;
 import jakarta.validation.Valid;
 import jakarta.validation.constraints.Size;
 
 import java.util.List;
 
 @JsonPropertyOrder
-@JsonClassDescription("The `csv` processor parses comma-separated values (CSVs) from the event into columns.")
+@JsonClassDescription("The <code>csv</code> codec parses comma-separated values (CSVs) content into events from that content.")
 public class CsvOutputCodecConfig {
     static final String DEFAULT_DELIMITER = ",";
 
     @JsonProperty("delimiter")
+    @JsonPropertyDescription("The character separating each column. Default value is <code>,</code>.")
     private String delimiter = DEFAULT_DELIMITER;
 
     @JsonProperty("header")
+    @JsonPropertyDescription("User-specified names for the CSV columns.")
     private List<String> header;
 
     @Valid
@@ -32,10 +35,12 @@ public class CsvOutputCodecConfig {
     @Size(max = 0, message = "Header from file is not supported.")
     @JsonProperty("region")
     private String region;
+
     @Valid
     @Size(max = 0, message = "Header from file is not supported.")
     @JsonProperty("bucket_name")
     private String bucketName;
+
     @Valid
     @Size(max = 0, message = "Header from file is not supported.")
     @JsonProperty("fileKey")

diff --git a/...or/src/main/java/org/opensearch/dataprepper/plugins/processor/csv/CsvProcessorConfig.java b/...or/src/main/java/org/opensearch/dataprepper/plugins/processor/csv/CsvProcessorConfig.java
@@ -5,6 +5,7 @@
 
 package org.opensearch.dataprepper.plugins.processor.csv;
 
+import com.fasterxml.jackson.annotation.JsonClassDescription;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import com.fasterxml.jackson.annotation.JsonPropertyDescription;
 import jakarta.validation.constraints.AssertTrue;
@@ -14,6 +15,7 @@
 /**
  * Configuration class for {@link CsvProcessor}.
  */
+@JsonClassDescription("The <code>csv</code> processor parses comma-separated values (CSVs) strings into structured data.")
 public class CsvProcessorConfig {
     static final String DEFAULT_SOURCE = "message";
     static final String DEFAULT_DELIMITER = ",";
@@ -40,8 +42,8 @@ public class CsvProcessorConfig {
 
     @JsonProperty("column_names_source_key")
     @JsonPropertyDescription("The field in the event that specifies the CSV column names, which will be " +
-            "automatically detected. If there need to be extra column names, the column names are automatically " +
-            "generated according to their index. If <code>column_names</code> is also defined, the header in " +
+            "automatically detected. If there are additional columns in the <code>source</code>, the column names are automatically " +
+            "generated according to column index. If <code>column_names</code> is also defined, the header in " +
             "<code>column_names_source_key</code> can also be used to generate the event fields. " +
             "If too few columns are specified in this field, the remaining column names are automatically generated. " +
             "If too many column names are specified in this field, the CSV processor omits the extra column names.")
@@ -57,9 +59,8 @@ public class CsvProcessorConfig {
     private List<String> columnNames;
 
     @JsonProperty("csv_when")
-    @JsonPropertyDescription("Allows you to specify a Data Prepper <a href=\"https://opensearch.org/docs/latest/data-prepper/pipelines/expression-syntax/\">conditional expression</a>, " +
-            "such as <code>/some-key == \"test\"</code>, that will be evaluated to determine whether " +
-            "the processor should be applied to the event.")
+    @JsonPropertyDescription("A <a href=\"https://opensearch.org/docs/latest/data-prepper/pipelines/expression-syntax/\">conditional expression</a> such as <code>/some_key == \"test\"</code>. " +
+            "If specified, the <code>csv</code> processor will only run on events when the expression evaluates to true. ")
     private String csvWhen;
 
     @JsonPropertyDescription("If true, the configured source field will be deleted after the CSV data is parsed into separate fields.")

diff --git a/...ain/java/org/opensearch/dataprepper/plugins/processor/flatten/FlattenProcessorConfig.java b/...ain/java/org/opensearch/dataprepper/plugins/processor/flatten/FlattenProcessorConfig.java
@@ -17,7 +17,7 @@
 import java.util.List;
 
 @JsonPropertyOrder
-@JsonClassDescription("The `flatten` processor transforms nested objects inside of events into flattened structures.")
+@JsonClassDescription("The <code>flatten</code> processor transforms nested objects inside of events into flattened structures.")
 public class FlattenProcessorConfig {
 
     private static final List<String> DEFAULT_EXCLUDE_KEYS = new ArrayList<>();
@@ -35,7 +35,8 @@ public class FlattenProcessorConfig {
     private String target;
 
     @JsonProperty("remove_processed_fields")
-    @JsonPropertyDescription("When <code>true</code>, the processor removes all processed fields from the source. Default is <code>false</code>.")
+    @JsonPropertyDescription("When <code>true</code>, the processor removes all processed fields from the source. " +
+            "The default is <code>false</code> which leaves the source fields.")
     private boolean removeProcessedFields = false;
 
     @JsonProperty("remove_list_indices")
@@ -50,13 +51,12 @@ public class FlattenProcessorConfig {
 
     @JsonProperty("exclude_keys")
     @JsonPropertyDescription("The keys from the source field that should be excluded from processing. " +
-            "Default is an empty list (<code>[]</code>).")
+            "By default no keys are excluded.")
     private List<String> excludeKeys = DEFAULT_EXCLUDE_KEYS;
 
     @JsonProperty("flatten_when")
-    @JsonPropertyDescription("A Data Prepper <a href=\"https://opensearch.org/docs/latest/data-prepper/pipelines/expression-syntax/\">conditional expression</a>, " +
-            "such as <code>/some-key == \"test\"'</code>, that determines whether the <code>flatten</code> processor will be run on the " +
-            "event. Default is <code>null</code>, which means that all events will be processed unless otherwise stated.")
+    @JsonPropertyDescription("A <a href=\"https://opensearch.org/docs/latest/data-prepper/pipelines/expression-syntax/\">conditional expression</a> such as <code>/some_key == \"test\"</code>. " +
+            "If specified, the <code>flatten</code> processor will only run on events when the expression evaluates to true. ")
     private String flattenWhen;
 
     @JsonProperty("tags_on_failure")

diff --git a/...rc/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java b/...rc/main/java/org/opensearch/dataprepper/plugins/processor/keyvalue/KeyValueProcessor.java
@@ -55,10 +55,6 @@ public class KeyValueProcessor extends AbstractProcessor<Record<Event>, Record<E
     private final Set<String> excludeKeysSet = new HashSet<String>();
     private final HashMap<String, Object> defaultValuesMap = new HashMap<>();
     private final Set<String> defaultValuesSet = new HashSet<String>();
-    private final String lowercaseKey = "lowercase";
-    private final String uppercaseKey = "uppercase";
-    private final String capitalizeKey = "capitalize";
-    private final Set<String> validTransformOptionSet = Set.of("", lowercaseKey, uppercaseKey, capitalizeKey);
     private final String whitespaceStrict = "strict";
     private final String whitespaceLenient = "lenient";
     private final Set<String> validWhitespaceSet = Set.of(whitespaceLenient, whitespaceStrict);
@@ -167,14 +163,6 @@ public KeyValueProcessor(final PluginMetrics pluginMetrics,
 
         validateKeySets(includeKeysSet, excludeKeysSet, defaultValuesSet);
 
-        if (!validTransformOptionSet.contains(keyValueProcessorConfig.getTransformKey())) {
-            throw new IllegalArgumentException(String.format("The transform_key value: %s is not a valid option", keyValueProcessorConfig.getTransformKey()));
-        }
-
-        if (!(validWhitespaceSet.contains(keyValueProcessorConfig.getWhitespace()))) {
-            throw new IllegalArgumentException(String.format("The whitespace value: %s is not a valid option", keyValueProcessorConfig.getWhitespace()));
-        }
-
         final Pattern boolCheck = Pattern.compile("true|false", Pattern.CASE_INSENSITIVE);
         final Matcher duplicateValueBoolMatch = boolCheck.matcher(String.valueOf(keyValueProcessorConfig.getSkipDuplicateValues()));
         final Matcher removeBracketsBoolMatch = boolCheck.matcher(String.valueOf(keyValueProcessorConfig.getRemoveBrackets()));
@@ -596,14 +584,14 @@ private Map<String, Object> executeConfigs(Map<String, Object> map) {
                 value = ((String)value).replaceAll(keyValueProcessorConfig.getDeleteValueRegex(), "");
             }
 
-            if (keyValueProcessorConfig.getWhitespace().equals(whitespaceStrict)) {
+            if (keyValueProcessorConfig.getWhitespace() == WhitespaceOption.STRICT) {
                 String[] whitespace_arr = trimWhitespace(key, value);
                 key = whitespace_arr[0];
                 value = whitespace_arr[1];
             }
 
             if (keyValueProcessorConfig.getTransformKey() != null
-                    && !keyValueProcessorConfig.getTransformKey().isEmpty()) {
+                    && keyValueProcessorConfig.getTransformKey() != TransformOption.NONE) {
                 key = transformKey(key);
             }
 
@@ -636,14 +624,7 @@ private String[] trimWhitespace(String key, Object value) {
     }
 
     private String transformKey(String key) {
-        if (keyValueProcessorConfig.getTransformKey().equals(lowercaseKey)) {
-            key = key.toLowerCase();
-        } else if (keyValueProcessorConfig.getTransformKey().equals(capitalizeKey)) {
-            key = key.substring(0, 1).toUpperCase() + key.substring(1);
-        } else if (keyValueProcessorConfig.getTransformKey().equals(uppercaseKey)) {
-            key = key.toUpperCase();
-        }
-        return key;
+        return keyValueProcessorConfig.getTransformKey().getTransformFunction().apply(key);
     }
 
     private boolean validKeyAndValue(String key, Object value) {