Skip to content

Commit

Permalink
Documentation improvements for a number of Data Prepper processors. (#…
Browse files Browse the repository at this point in the history
…5025)

Adds missing enumerations in the key-value processor to support better documentation. Corrects @JsonClassDescription to use HTML rather than Markdown. This set of changes is for key_value, flatten, translate, parse_json, parse_xml, parse_ion, and csv. Also, this adds documentation to the csv input codec.

Signed-off-by: David Venable <[email protected]>
  • Loading branch information
dlvenable authored Oct 7, 2024
1 parent 68a8184 commit af304e6
Show file tree
Hide file tree
Showing 15 changed files with 345 additions and 122 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,24 @@

import com.fasterxml.jackson.annotation.JsonClassDescription;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonPropertyDescription;
import com.fasterxml.jackson.annotation.JsonPropertyOrder;
import jakarta.validation.Valid;
import jakarta.validation.constraints.Size;

import java.util.List;

@JsonPropertyOrder
@JsonClassDescription("The `csv` processor parses comma-separated values (CSVs) from the event into columns.")
@JsonClassDescription("The <code>csv</code> codec parses comma-separated values (CSVs) content into events from that content.")
public class CsvOutputCodecConfig {
static final String DEFAULT_DELIMITER = ",";

@JsonProperty("delimiter")
@JsonPropertyDescription("The character separating each column. Default value is <code>,</code>.")
private String delimiter = DEFAULT_DELIMITER;

@JsonProperty("header")
@JsonPropertyDescription("User-specified names for the CSV columns.")
private List<String> header;

@Valid
Expand All @@ -32,10 +35,12 @@ public class CsvOutputCodecConfig {
@Size(max = 0, message = "Header from file is not supported.")
@JsonProperty("region")
private String region;

@Valid
@Size(max = 0, message = "Header from file is not supported.")
@JsonProperty("bucket_name")
private String bucketName;

@Valid
@Size(max = 0, message = "Header from file is not supported.")
@JsonProperty("fileKey")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

package org.opensearch.dataprepper.plugins.processor.csv;

import com.fasterxml.jackson.annotation.JsonClassDescription;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonPropertyDescription;
import jakarta.validation.constraints.AssertTrue;
Expand All @@ -14,6 +15,7 @@
/**
* Configuration class for {@link CsvProcessor}.
*/
@JsonClassDescription("The <code>csv</code> processor parses comma-separated values (CSVs) strings into structured data.")
public class CsvProcessorConfig {
static final String DEFAULT_SOURCE = "message";
static final String DEFAULT_DELIMITER = ",";
Expand All @@ -40,8 +42,8 @@ public class CsvProcessorConfig {

@JsonProperty("column_names_source_key")
@JsonPropertyDescription("The field in the event that specifies the CSV column names, which will be " +
"automatically detected. If there need to be extra column names, the column names are automatically " +
"generated according to their index. If <code>column_names</code> is also defined, the header in " +
"automatically detected. If there are additional columns in the <code>source</code>, the column names are automatically " +
"generated according to column index. If <code>column_names</code> is also defined, the header in " +
"<code>column_names_source_key</code> can also be used to generate the event fields. " +
"If too few columns are specified in this field, the remaining column names are automatically generated. " +
"If too many column names are specified in this field, the CSV processor omits the extra column names.")
Expand All @@ -57,9 +59,8 @@ public class CsvProcessorConfig {
private List<String> columnNames;

@JsonProperty("csv_when")
@JsonPropertyDescription("Allows you to specify a Data Prepper <a href=\"https://opensearch.org/docs/latest/data-prepper/pipelines/expression-syntax/\">conditional expression</a>, " +
"such as <code>/some-key == \"test\"</code>, that will be evaluated to determine whether " +
"the processor should be applied to the event.")
@JsonPropertyDescription("A <a href=\"https://opensearch.org/docs/latest/data-prepper/pipelines/expression-syntax/\">conditional expression</a> such as <code>/some_key == \"test\"</code>. " +
"If specified, the <code>csv</code> processor will only run on events when the expression evaluates to true. ")
private String csvWhen;

@JsonPropertyDescription("If true, the configured source field will be deleted after the CSV data is parsed into separate fields.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import java.util.List;

@JsonPropertyOrder
@JsonClassDescription("The `flatten` processor transforms nested objects inside of events into flattened structures.")
@JsonClassDescription("The <code>flatten</code> processor transforms nested objects inside of events into flattened structures.")
public class FlattenProcessorConfig {

private static final List<String> DEFAULT_EXCLUDE_KEYS = new ArrayList<>();
Expand All @@ -35,7 +35,8 @@ public class FlattenProcessorConfig {
private String target;

@JsonProperty("remove_processed_fields")
@JsonPropertyDescription("When <code>true</code>, the processor removes all processed fields from the source. Default is <code>false</code>.")
@JsonPropertyDescription("When <code>true</code>, the processor removes all processed fields from the source. " +
"The default is <code>false</code> which leaves the source fields.")
private boolean removeProcessedFields = false;

@JsonProperty("remove_list_indices")
Expand All @@ -50,13 +51,12 @@ public class FlattenProcessorConfig {

@JsonProperty("exclude_keys")
@JsonPropertyDescription("The keys from the source field that should be excluded from processing. " +
"Default is an empty list (<code>[]</code>).")
"By default no keys are excluded.")
private List<String> excludeKeys = DEFAULT_EXCLUDE_KEYS;

@JsonProperty("flatten_when")
@JsonPropertyDescription("A Data Prepper <a href=\"https://opensearch.org/docs/latest/data-prepper/pipelines/expression-syntax/\">conditional expression</a>, " +
"such as <code>/some-key == \"test\"'</code>, that determines whether the <code>flatten</code> processor will be run on the " +
"event. Default is <code>null</code>, which means that all events will be processed unless otherwise stated.")
@JsonPropertyDescription("A <a href=\"https://opensearch.org/docs/latest/data-prepper/pipelines/expression-syntax/\">conditional expression</a> such as <code>/some_key == \"test\"</code>. " +
"If specified, the <code>flatten</code> processor will only run on events when the expression evaluates to true. ")
private String flattenWhen;

@JsonProperty("tags_on_failure")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,6 @@ public class KeyValueProcessor extends AbstractProcessor<Record<Event>, Record<E
private final Set<String> excludeKeysSet = new HashSet<String>();
private final HashMap<String, Object> defaultValuesMap = new HashMap<>();
private final Set<String> defaultValuesSet = new HashSet<String>();
private final String lowercaseKey = "lowercase";
private final String uppercaseKey = "uppercase";
private final String capitalizeKey = "capitalize";
private final Set<String> validTransformOptionSet = Set.of("", lowercaseKey, uppercaseKey, capitalizeKey);
private final String whitespaceStrict = "strict";
private final String whitespaceLenient = "lenient";
private final Set<String> validWhitespaceSet = Set.of(whitespaceLenient, whitespaceStrict);
Expand Down Expand Up @@ -167,14 +163,6 @@ public KeyValueProcessor(final PluginMetrics pluginMetrics,

validateKeySets(includeKeysSet, excludeKeysSet, defaultValuesSet);

if (!validTransformOptionSet.contains(keyValueProcessorConfig.getTransformKey())) {
throw new IllegalArgumentException(String.format("The transform_key value: %s is not a valid option", keyValueProcessorConfig.getTransformKey()));
}

if (!(validWhitespaceSet.contains(keyValueProcessorConfig.getWhitespace()))) {
throw new IllegalArgumentException(String.format("The whitespace value: %s is not a valid option", keyValueProcessorConfig.getWhitespace()));
}

final Pattern boolCheck = Pattern.compile("true|false", Pattern.CASE_INSENSITIVE);
final Matcher duplicateValueBoolMatch = boolCheck.matcher(String.valueOf(keyValueProcessorConfig.getSkipDuplicateValues()));
final Matcher removeBracketsBoolMatch = boolCheck.matcher(String.valueOf(keyValueProcessorConfig.getRemoveBrackets()));
Expand Down Expand Up @@ -596,14 +584,14 @@ private Map<String, Object> executeConfigs(Map<String, Object> map) {
value = ((String)value).replaceAll(keyValueProcessorConfig.getDeleteValueRegex(), "");
}

if (keyValueProcessorConfig.getWhitespace().equals(whitespaceStrict)) {
if (keyValueProcessorConfig.getWhitespace() == WhitespaceOption.STRICT) {
String[] whitespace_arr = trimWhitespace(key, value);
key = whitespace_arr[0];
value = whitespace_arr[1];
}

if (keyValueProcessorConfig.getTransformKey() != null
&& !keyValueProcessorConfig.getTransformKey().isEmpty()) {
&& keyValueProcessorConfig.getTransformKey() != TransformOption.NONE) {
key = transformKey(key);
}

Expand Down Expand Up @@ -636,14 +624,7 @@ private String[] trimWhitespace(String key, Object value) {
}

private String transformKey(String key) {
if (keyValueProcessorConfig.getTransformKey().equals(lowercaseKey)) {
key = key.toLowerCase();
} else if (keyValueProcessorConfig.getTransformKey().equals(capitalizeKey)) {
key = key.substring(0, 1).toUpperCase() + key.substring(1);
} else if (keyValueProcessorConfig.getTransformKey().equals(uppercaseKey)) {
key = key.toUpperCase();
}
return key;
return keyValueProcessorConfig.getTransformKey().getTransformFunction().apply(key);
}

private boolean validKeyAndValue(String key, Object value) {
Expand Down
Loading

0 comments on commit af304e6

Please sign in to comment.