Skip to content

Commit

Permalink
4602 one way hash (#4750)
Browse files Browse the repository at this point in the history
added capabilities for working with OneWay Hash

Signed-off-by: mishavay-aws <[email protected]>
  • Loading branch information
mishavay-aws authored Jul 30, 2024
1 parent 1ca5c69 commit 9e084a0
Show file tree
Hide file tree
Showing 11 changed files with 465 additions and 8 deletions.
53 changes: 52 additions & 1 deletion data-prepper-plugins/obfuscate-processor/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ Below are the list of configuration options.
* `patterns` - (optional) - A list of Regex patterns. You can define multiple patterns for the same field. Only the
parts that matched the Regex patterns to be obfuscated. If not provided, the full field will be obfuscated.
* `single_word_only` - (optional) - When set to `true`, a word boundary `\b` is added to the pattern, due to which obfuscation would be applied only to words that are standalone in the input text. By default, it is `false`, meaning obfuscation patterns are applied to all occurrences.
* `action` - (optional) - Obfuscation action, default to `mask`. Currently, `mask` is the only supported action.
* `action` - (optional) - Obfuscation action, `mask` or `hash` to use one way hashing. Default to `mask`


### Configuration - Mask Action
Expand All @@ -75,6 +75,57 @@ There are some additional configuration options for Mask action.
* `mask_character_length` - (optional) - Default to 3. The value must be between 1 and 10. There will be n numbers of
obfuscation characters, e.g. '***'

### Configuration - One Way Hash Action

There are some additional configuration options for One Way Hash action.

* `format` - (optional) - Default to SHA-512. Format of One Way Hash to use.
* `salt` - (optional) - Default to generate random salt.
* `salt_key` - (optional) - Instructs to generate salt for each record based on a value of a specified field in the message

```yaml
pipeline:
source:
http:
processor:
- obfuscate:
source: "log"
target: "new_log"
patterns:
- "[A-Za-z0-9+_.-]+@([\\w-]+\\.)+[\\w-]{2,4}"
action:
hash:
salt_key: "/<key>"
salt: "<salt>"
- obfuscate:
source: "phone"
action:
hash:
salt: "<salt>"
sink:
- stdout:
```
Take below input
```json
{
"id": 1,
"phone": "(555) 555 5555",
"log": "My name is Bob and my email address is [email protected]"
}
```

When run, the processor will parse the message into the following output:

```json
{
"id": 1,
"phone": "***",
"log": "My name is Bob and my email address is <hash>",
"newLog": "My name is Bob and my email address is <hash>"
}
```
---

## FAQ:
Expand Down
1 change: 1 addition & 0 deletions data-prepper-plugins/obfuscate-processor/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ dependencies {
implementation 'com.fasterxml.jackson.core:jackson-core'
implementation 'com.fasterxml.jackson.core:jackson-databind'
testImplementation project(':data-prepper-test-common')
testImplementation project(':data-prepper-test-event')
}

test {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ public Collection<Record<Event>> doExecute(Collection<Record<Event>> records) {
String rawValue = recordEvent.get(source, String.class);

// Call obfuscation action
String newValue = this.action.obfuscate(rawValue, patterns);
String newValue = this.action.obfuscate(rawValue, patterns, record);

// No changes means it does not match any patterns
if (rawValue.equals(newValue)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

import org.opensearch.dataprepper.model.annotations.DataPrepperPlugin;
import org.opensearch.dataprepper.model.annotations.DataPrepperPluginConstructor;
import org.opensearch.dataprepper.model.event.Event;
import org.opensearch.dataprepper.model.record.Record;

import java.util.List;
import java.util.regex.Pattern;
Expand All @@ -21,7 +23,7 @@ public MaskAction(final MaskActionConfig config) {
}

@Override
public String obfuscate(String source, List<Pattern> patterns) {
public String obfuscate(String source, List<Pattern> patterns, Record<Event> record) {

if (patterns == null || patterns.size() == 0) {
// This is to replace the whole field.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
import java.util.List;
import java.util.regex.Pattern;

import org.opensearch.dataprepper.model.event.Event;
import org.opensearch.dataprepper.model.record.Record;


/**
* Interface represents a specific action to be taken for obfuscation.
Expand All @@ -20,7 +23,8 @@ public interface ObfuscationAction {
*
* @param source source string
* @param patterns a list of patterns to match
* @param record raw record
* @return obfuscated string
*/
String obfuscate(String source, List<Pattern> patterns);
String obfuscate(String source, List<Pattern> patterns, Record<Event> record);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.dataprepper.plugins.processor.obfuscation.action;

import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.security.SecureRandom;
import java.util.Arrays;
import java.util.Base64;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.opensearch.dataprepper.model.annotations.DataPrepperPlugin;
import org.opensearch.dataprepper.model.annotations.DataPrepperPluginConstructor;
import org.opensearch.dataprepper.model.event.Event;
import org.opensearch.dataprepper.model.record.Record;
import org.opensearch.dataprepper.plugins.processor.obfuscation.ObfuscationProcessor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.opensearch.dataprepper.model.event.EventKey;

@DataPrepperPlugin(name = "hash", pluginType = ObfuscationAction.class, pluginConfigurationType = OneWayHashActionConfig.class)
public class OneWayHashAction implements ObfuscationAction {


private final MessageDigest messageDigest;
private final byte[] salt;
private EventKey saltKey;
private static final Logger LOG = LoggerFactory.getLogger(ObfuscationProcessor.class);

@DataPrepperPluginConstructor
public OneWayHashAction(final OneWayHashActionConfig config) {

this.saltKey = config.getSaltKey();

if (config.getSalt() == null || config.getSalt().isEmpty() ) {
this.salt = generateSalt();
} else {
this.salt = config.getSalt().getBytes(StandardCharsets.UTF_8);
}

try {
messageDigest = MessageDigest.getInstance(config.getFormat());
} catch (NoSuchAlgorithmException noSuchAlgorithmException){
LOG.error("The hash format provided ({}) is not a known algorithm [{}]", config.getFormat(), noSuchAlgorithmException);
throw new RuntimeException(noSuchAlgorithmException);
}
}

@Override
public String obfuscate(String source, List<Pattern> patterns, Record<Event> record) {

byte [] saltToApply = this.salt;

// Resolve salt to compute based on a path provided in the configuration.
// For records where path was not found, the salt value defined in the pipeline configuration will be used, if salt value was not configured, one will be generated.

if(saltKey != null && saltKey.equals("") == false) {

final Event recordEvent = record.getData();

if (recordEvent.containsKey(saltKey)) {

saltToApply = computeSaltBasedOnKeyValue(recordEvent.get(saltKey, String.class));
} else {
LOG.info("Unable to find a key '{}' for using as salt, using default salt pipeline configuration for the record instead", saltKey);
}
}

if (patterns == null || patterns.size() == 0) {
// no pattern to match, replace the whole string
return oneWayHashString(source,saltToApply);
}

String replacementString = source;

for (Pattern pattern : patterns) {

Matcher matcher = Pattern.compile(pattern.pattern()).matcher(replacementString);
StringBuffer stringBuffer = new StringBuffer();

while (matcher.find()) {

String stringToHash = replacementString.substring(matcher.start(),matcher.end());
matcher.appendReplacement(stringBuffer, oneWayHashString(stringToHash,saltToApply));
}

matcher.appendTail(stringBuffer);
replacementString = stringBuffer.toString();
}
return replacementString;


}

private String oneWayHashString(String source, byte[] salt) {

String oneWayHashedSource = "";

try {
MessageDigest messageDigestClone = (MessageDigest) messageDigest.clone();

messageDigestClone.update(salt);
byte[] bytes = messageDigestClone.digest(source.getBytes(StandardCharsets.UTF_8));

oneWayHashedSource = Base64.getEncoder().encodeToString(bytes);

} catch (CloneNotSupportedException cloneNotSupportedException) {
LOG.error("There was an exception while processing Event [{}]", cloneNotSupportedException);
throw new RuntimeException(cloneNotSupportedException);
}

return oneWayHashedSource;
}

private byte [] computeSaltBasedOnKeyValue(String saltValue) {

byte [] value = saltValue.getBytes(StandardCharsets.UTF_8);
byte [] result = new byte [64];

Arrays.fill(result, Byte.MIN_VALUE);

System.arraycopy(value, 0, result, 0,
(value.length >= result.length) ? result.length : value.length);

return result;
}

private byte[] generateSalt() {

byte [] saltBytes = new byte[64];
SecureRandom secureRandom = new SecureRandom();
secureRandom.nextBytes(saltBytes);
return saltBytes;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.dataprepper.plugins.processor.obfuscation.action;

import org.opensearch.dataprepper.model.event.EventKeyConfiguration;
import org.opensearch.dataprepper.model.event.EventKeyFactory;

import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonPropertyDescription;

import jakarta.validation.constraints.Pattern;
import jakarta.validation.constraints.Size;

import org.opensearch.dataprepper.model.event.EventKey;
import org.opensearch.dataprepper.model.event.EventKeyConfiguration;
import org.opensearch.dataprepper.model.event.EventKeyFactory;


public class OneWayHashActionConfig {

@JsonProperty("salt")
@JsonPropertyDescription("Salt value to use when generating hash. If not specified, salt will be randomly generated by the processor.")
@Size(min = 16, message = "Minimum size of salt string is 16.")
@Size(max = 64, message = "Maximum size of salt string is 64")
private String salt;

@JsonProperty("format")
@Pattern(regexp = "SHA-512", message = "Valid values: SHA-512")
@JsonPropertyDescription("Format of one way hash to generate. Default to SHA-512.")
private String format = "SHA-512";

@JsonProperty("salt_key")
@JsonPropertyDescription("A key to compute salt based on a value provided as part of a record." +
"If key or value was not found in the record(s), a salt defined in the pipeline configuration will be used instead.")
@EventKeyConfiguration(EventKeyFactory.EventAction.GET)
private EventKey saltKey;

public OneWayHashActionConfig(){

}

public String getSalt () {
return salt;
}

public String getFormat() {
return format;
}

public EventKey getSaltKey() {
return saltKey;
}

}

Original file line number Diff line number Diff line change
Expand Up @@ -140,11 +140,12 @@ void testBasicProcessor(String message) {
void testProcessorWithDifferentAction() {
final PluginModel mockModel = mock(PluginModel.class);
final ObfuscationAction mockAction = mock(ObfuscationAction.class);

when(mockModel.getPluginName()).thenReturn("mock");
when(mockModel.getPluginSettings()).thenReturn(new HashMap<>());
when(mockConfig.getAction()).thenReturn(mockModel);
when(mockConfig.getTarget()).thenReturn("");
when(mockAction.obfuscate(anyString(), anyList())).thenReturn("abc");
when(mockAction.obfuscate(anyString(), anyList(),any())).thenReturn("abc");

when(mockFactory.loadPlugin(eq(ObfuscationAction.class), any(PluginSetting.class)))
.thenReturn(mockAction);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;

class MaskActionTest {
class MaskActionTest implements ObfuscationActionTest {

private MaskAction maskAction;

Expand All @@ -25,7 +25,7 @@ class MaskActionTest {
void testObfuscateWithPatternAsNull() {
String message = "Hello";
maskAction = createMaskAction("*", 3);
String result = maskAction.obfuscate(message, null);
String result = maskAction.obfuscate(message, null, createRecord(message));
assertThat(result, equalTo("***"));
}

Expand All @@ -39,7 +39,7 @@ void testObfuscateWithPatternAsNull() {
void testObfuscateWithDifferentConfig(String message, String maskCharacter, int maskCharacterLength, String expected) {
maskAction = createMaskAction(maskCharacter, maskCharacterLength);
List<Pattern> patterns = new ArrayList<>();
String result = maskAction.obfuscate(message, patterns);
String result = maskAction.obfuscate(message, patterns,createRecord(message));
assertThat(result, equalTo(expected));
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.dataprepper.plugins.processor.obfuscation.action;

import java.util.HashMap;
import java.util.Map;

import org.opensearch.dataprepper.event.TestEventFactory;
import org.opensearch.dataprepper.model.event.EventBuilder;
import org.opensearch.dataprepper.model.event.Event;
import org.opensearch.dataprepper.model.record.Record;

interface ObfuscationActionTest {

default Record<Event> createRecord(String message) {
final Map<String, Object> testData = new HashMap<>();
testData.put("message", message);

return new Record<>(TestEventFactory.getTestEventFactory().eventBuilder(EventBuilder.class).withEventType("event").withData(testData).build());
}
}
Loading

0 comments on commit 9e084a0

Please sign in to comment.