Skip to content

Commit

Permalink
Add metadata for primary key and primary key type and validations for…
Browse files Browse the repository at this point in the history
… Mongo/DocDb source (opensearch-project#4512)

* Add metadata for primary key and primary key type and validations for Mongo/DocDb source

Signed-off-by: Dinu John <[email protected]>

---------

Signed-off-by: Dinu John <[email protected]>
  • Loading branch information
dinujoh authored May 7, 2024
1 parent 60ff9db commit 91ff22d
Show file tree
Hide file tree
Showing 11 changed files with 84 additions and 44 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -354,9 +354,9 @@ private boolean isJsonPath(String parameter) {
*/
public String calculateDepth(String s3Prefix) {
if(s3Prefix == null){
return Integer.toString(3);
return Integer.toString(4);
}
return Integer.toString(s3Prefix.split("/").length + 3);
return Integer.toString(s3Prefix.split("/").length + 4);
}

public String getSourceCoordinationIdentifierEnvVariable(String s3Prefix){
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ docdb-pipeline-transformed:
sink:
- opensearch:
hosts: "host"
depth: "5"
depth: "6"
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ documentdb-pipeline-writer:
- bucket:
name: "bucket-name1"
filter:
depth: "5"
depth: "6"
scheduling:
interval: "1s"
acknowledgments: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ public class BsonHelper {
private static final String REGEX_PATTERN = "pattern";
private static final String REGEX_OPTIONS = "options";
public static final String DOCUMENTDB_ID_FIELD_NAME = "_id";
// public static final String DEFAULT_ID_MAPPING_FIELD_NAME = "doc_id";

public static final JsonWriterSettings JSON_WRITER_SETTINGS = JsonWriterSettings.builder()
.outputMode(JsonMode.RELAXED)
.objectIdConverter((value, writer) -> writer.writeString(value.toHexString()))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import com.fasterxml.jackson.annotation.JsonProperty;
import jakarta.validation.constraints.NotNull;
import jakarta.validation.constraints.Pattern;

import java.util.Arrays;
import java.util.stream.Collectors;
Expand All @@ -12,6 +13,7 @@ public class CollectionConfig {
private static final int DEFAULT_PARTITION_COUNT = 100;
private static final int DEFAULT_EXPORT_BATCH_SIZE = 10_000;
@JsonProperty("collection")
@Pattern(regexp = ".+?\\..+", message = "Should be of pattern <database>.<collection>")
private @NotNull String collection;

@JsonProperty("export")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import com.fasterxml.jackson.annotation.JsonProperty;
import jakarta.validation.Valid;
import jakarta.validation.constraints.NotNull;
import jakarta.validation.constraints.Size;

import java.time.Duration;
import java.util.ArrayList;
Expand All @@ -29,6 +30,9 @@ public class MongoDBSourceConfig {
@JsonProperty("read_preference")
private String readPreference;
@JsonProperty("collections")
@Valid
@NotNull
@Size(min = 1)
private List<CollectionConfig> collections;
@JsonProperty("acknowledgments")
private Boolean acknowledgments = false;
Expand Down Expand Up @@ -60,6 +64,9 @@ public class MongoDBSourceConfig {
@JsonProperty("disable_s3_read_for_leader")
private boolean disableS3ReadForLeader = false;

@JsonProperty("id_key")
private String idKey;

public MongoDBSourceConfig() {
this.readPreference = DEFAULT_READ_PREFERENCE;
this.collections = new ArrayList<>();
Expand Down Expand Up @@ -133,6 +140,10 @@ public boolean isDisableS3ReadForLeader() {
return disableS3ReadForLeader;
}

public String getIdKey() {
return this.idKey;
}

public AwsConfig getAwsConfig() {
return this.awsConfig;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.time.Instant;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutorService;
Expand Down Expand Up @@ -83,10 +84,11 @@ private String getS3PathPrefix() {
}

final String s3PathPrefix;
final Instant now = Instant.now();
if (sourceCoordinator.getPartitionPrefix() != null ) {
s3PathPrefix = s3UserPathPrefix + sourceCoordinator.getPartitionPrefix() + S3_PATH_DELIMITER;
s3PathPrefix = s3UserPathPrefix + sourceCoordinator.getPartitionPrefix() + S3_PATH_DELIMITER + now.toEpochMilli() + S3_PATH_DELIMITER;
} else {
s3PathPrefix = s3UserPathPrefix;
s3PathPrefix = s3UserPathPrefix + now.toEpochMilli() + S3_PATH_DELIMITER;
}
return s3PathPrefix;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,12 +178,14 @@ public void run() {
recordBytes.add(bytes);
bytesReceivedSummary.record(bytes);
final Optional<BsonDocument> primaryKeyDoc = Optional.ofNullable(document.toBsonDocument());
final String primaryKeyBsonType = primaryKeyDoc.map(bsonDocument -> bsonDocument.getBsonType().name()).orElse(UNKNOWN_TYPE);
final String primaryKeyBsonType = primaryKeyDoc.map(bsonDocument -> bsonDocument.get(DOCUMENTDB_ID_FIELD_NAME).getBsonType().name()).orElse(UNKNOWN_TYPE);

// The version number is the export time minus some overlap to ensure new stream events still get priority
final long eventVersionNumber = (exportStartTime - VERSION_OVERLAP_TIME_FOR_EXPORT.toMillis()) * 1_000;
final Event event = recordConverter.convert(record, exportStartTime, eventVersionNumber, primaryKeyBsonType);
// event.put(DEFAULT_ID_MAPPING_FIELD_NAME, event.get(DOCUMENTDB_ID_FIELD_NAME, Object.class));
if (sourceConfig.getIdKey() !=null && !sourceConfig.getIdKey().isBlank()) {
event.put(sourceConfig.getIdKey(), event.get(DOCUMENTDB_ID_FIELD_NAME, Object.class));
}
// delete _id
event.delete(DOCUMENTDB_ID_FIELD_NAME);
records.add(event);
Expand All @@ -207,7 +209,7 @@ public void run() {

successRecords += 1;
} catch (Exception e) {
LOG.error("Failed to add record to buffer with error {}", e.getMessage());
LOG.error("Failed to add record to buffer with error.", e);
failureItemsCounter.increment(records.size());
failedRecords += 1;
}
Expand All @@ -233,7 +235,7 @@ public void run() {
}

} catch (Exception e) {
LOG.error("Exception connecting to cluster and loading partition {}. Exception: {}", query, e.getMessage());
LOG.error("Exception connecting to cluster and loading partition {}.", query, e);
throw new RuntimeException(e);
} finally {
// Do final checkpoint when reaching end of partition or due to exception
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,11 +196,13 @@ record = document.getFullDocument().toJson(JSON_WRITER_SETTINGS);

checkPointToken = document.getResumeToken().toJson(JSON_WRITER_SETTINGS);
final Optional<BsonDocument> primaryKeyDoc = Optional.ofNullable(document.getDocumentKey());
final String primaryKeyBsonType = primaryKeyDoc.map(bsonDocument -> bsonDocument.getBsonType().name()).orElse(UNKNOWN_TYPE);
final String primaryKeyBsonType = primaryKeyDoc.map(bsonDocument -> bsonDocument.get(DOCUMENTDB_ID_FIELD_NAME).getBsonType().name()).orElse(UNKNOWN_TYPE);
// TODO fix eventVersionNumber
final Event event = recordConverter.convert(record, eventCreationTimeMillis, eventCreationTimeMillis,
document.getOperationType(), primaryKeyBsonType);
// event.put(DEFAULT_ID_MAPPING_FIELD_NAME, event.get(DOCUMENTDB_ID_FIELD_NAME, Object.class));
if (sourceConfig.getIdKey() !=null && !sourceConfig.getIdKey().isBlank()) {
event.put(sourceConfig.getIdKey(), event.get(DOCUMENTDB_ID_FIELD_NAME, Object.class));
}
// delete _id
event.delete(DOCUMENTDB_ID_FIELD_NAME);
records.add(event);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,13 @@
import io.micrometer.core.instrument.Counter;
import io.micrometer.core.instrument.DistributionSummary;
import org.bson.BsonDocument;
import org.bson.BsonObjectId;
import org.bson.BsonString;
import org.bson.BsonType;
import org.bson.Document;
import org.bson.conversions.Bson;
import org.bson.json.JsonWriterSettings;
import org.bson.types.ObjectId;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.params.ParameterizedTest;
Expand Down Expand Up @@ -51,6 +54,7 @@
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import static org.opensearch.dataprepper.plugins.mongo.client.BsonHelper.DOCUMENTDB_ID_FIELD_NAME;
import static org.opensearch.dataprepper.plugins.mongo.export.ExportPartitionWorker.EXPORT_RECORDS_TOTAL_COUNT;
import static org.opensearch.dataprepper.plugins.mongo.export.ExportPartitionWorker.BYTES_RECEIVED;
import static org.opensearch.dataprepper.plugins.mongo.export.ExportPartitionWorker.BYTES_PROCESSED;
Expand Down Expand Up @@ -132,20 +136,23 @@ public void testProcessPartitionSuccess(final String partitionKey) {
Document doc2 = mock(Document.class);
BsonDocument bsonDoc1 = mock(BsonDocument.class);
BsonDocument bsonDoc2 = mock(BsonDocument.class);
when(bsonDoc1.get("_id")).thenReturn(new BsonObjectId(new ObjectId()));
when(bsonDoc2.get("_id")).thenReturn(new BsonString(UUID.randomUUID().toString()));
when(doc1.toBsonDocument()).thenReturn(bsonDoc1);
when(doc2.toBsonDocument()).thenReturn(bsonDoc2);
when(bsonDoc1.getBsonType()).thenReturn(BsonType.OBJECT_ID);
when(bsonDoc2.getBsonType()).thenReturn(BsonType.STRING);
final String docJson1 = UUID.randomUUID().toString();
final String docJson2 = UUID.randomUUID() + docJson1;
when(doc1.toJson(any(JsonWriterSettings.class))).thenReturn(docJson1);
when(doc2.toJson(any(JsonWriterSettings.class))).thenReturn(docJson2);
lenient().when(cursor.next())
.thenReturn(doc1)
.thenReturn(doc2);
when(mockSourceConfig.getIdKey()).thenReturn("docdb_id");
final long eventVersionNumber = (exportStartTime - VERSION_OVERLAP_TIME_FOR_EXPORT.toMillis()) * 1_000;
Event event1 = mock((Event.class));
Event event2 = mock((Event.class));
when(event1.get("_id", Object.class)).thenReturn(UUID.randomUUID().toString());
when(event2.get("_id", Object.class)).thenReturn(UUID.randomUUID().toString());
when(mockRecordConverter.convert(docJson1, exportStartTime, eventVersionNumber, BsonType.OBJECT_ID.name())).thenReturn(event1);
when(mockRecordConverter.convert(docJson2, exportStartTime, eventVersionNumber, BsonType.STRING.name())).thenReturn(event2);
lenient().when(dataQueryPartition.getPartitionKey()).thenReturn(partitionKey);
Expand Down Expand Up @@ -183,6 +190,8 @@ public void testProcessPartitionSuccess(final String partitionKey) {
verify(mongoDatabase).getCollection(eq("collection"));
verify(mockRecordConverter).initializePartitions(partitions);
verify(mockRecordBufferWriter).writeToBuffer(eq(mockAcknowledgementSet), any());
verify(event1).put(mockSourceConfig.getIdKey(), event1.get(DOCUMENTDB_ID_FIELD_NAME, Object.class));
verify(event2).put(mockSourceConfig.getIdKey(), event2.get(DOCUMENTDB_ID_FIELD_NAME, Object.class));
verify(exportRecordTotalCounter, times(2)).increment();
verify(successItemsCounter).increment(2.0);
verify(bytesReceivedSummary).record(docJson1.getBytes().length);
Expand Down
Loading

0 comments on commit 91ff22d

Please sign in to comment.