Skip to content

Commit

Permalink
feat: add processing time on gcs partition
Browse files Browse the repository at this point in the history
  • Loading branch information
lavkesh committed Oct 11, 2023
1 parent de5d3bb commit 8b52fa6
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ public interface BlobSinkConfig extends AppConfig {
@Key("SINK_BLOB_FILE_PARTITION_PROTO_TIMESTAMP_FIELD_NAME")
String getFilePartitionProtoTimestampFieldName();

@Key("SINK_BLOB_FILE_PARTITION_PROCESSING_TIME_ENABLED")
@DefaultValue("false")
boolean getFilePartitionProcessingTimeEnabled();

@Key("SINK_BLOB_FILE_PARTITION_TIME_GRANULARITY_TYPE")
@DefaultValue("day")
@ConverterClass(BlobSinkFilePartitionTypeConverter.class)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,14 @@

import com.google.protobuf.Descriptors;
import com.google.protobuf.DynamicMessage;
import com.gotocompany.firehose.config.BlobSinkConfig;
import com.gotocompany.firehose.sink.blob.proto.KafkaMetadataProtoMessage;
import lombok.AllArgsConstructor;
import lombok.Data;

import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneId;

@AllArgsConstructor
@Data
Expand Down Expand Up @@ -34,4 +37,14 @@ public Instant getTimestamp(String fieldName) {
int nanos = (int) timestamp.getField(timestamp.getDescriptorForType().findFieldByName("nanos"));
return Instant.ofEpochSecond(seconds, nanos);
}

public LocalDateTime getLocalDateTime(BlobSinkConfig config) {
if (config.getFilePartitionProcessingTimeEnabled()) {
return LocalDateTime.now();
} else {
return LocalDateTime.ofInstant(
getTimestamp(config.getFilePartitionProtoTimestampFieldName()),
ZoneId.of(config.getFilePartitionProtoTimestampTimezone()));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,7 @@

import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.Instant;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.LocalTime;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;

/**
Expand All @@ -25,22 +21,21 @@ public class TimePartitionedPathUtils {

public static Path getTimePartitionedPath(Record record, BlobSinkConfig sinkConfig) {
String topic = record.getTopic(sinkConfig.getOutputKafkaMetadataColumnName());
Instant timestamp = record.getTimestamp(sinkConfig.getFilePartitionProtoTimestampFieldName());
Path path = Paths.get(topic);
if (sinkConfig.getFilePartitionTimeGranularityType() == Constants.FilePartitionType.NONE) {
return Paths.get(topic);
return path;
}
LocalDate localDate = LocalDateTime.ofInstant(timestamp, ZoneId.of(sinkConfig.getFilePartitionProtoTimestampTimezone())).toLocalDate();
String datePart = DATE_FORMATTER.format(localDate);
LocalTime localTime = LocalDateTime.ofInstant(timestamp, ZoneId.of(sinkConfig.getFilePartitionProtoTimestampTimezone())).toLocalTime();
String hourPart = HOUR_FORMATTER.format(localTime);
LocalDateTime dateTime = record.getLocalDateTime(sinkConfig);
String datePart = DATE_FORMATTER.format(dateTime.toLocalDate());
String hourPart = HOUR_FORMATTER.format(dateTime.toLocalTime());

String dateSegment = String.format("%s%s", sinkConfig.getFilePartitionTimeDatePrefix(), datePart);
String hourSegment = String.format("%s%s", sinkConfig.getFilePartitionTimeHourPrefix(), hourPart);

String dateTimePartition;
switch (sinkConfig.getFilePartitionTimeGranularityType()) {
case NONE:
return Paths.get(topic);
return path;
case DAY:
dateTimePartition = String.format("%s", dateSegment);
break;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
package com.gotocompany.firehose.sink.blob.message;

import com.google.protobuf.DynamicMessage;
import com.gotocompany.firehose.config.BlobSinkConfig;
import com.gotocompany.firehose.sink.blob.TestUtils;
import org.junit.Assert;
import org.junit.Test;
import org.mockito.Mockito;

import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneId;

public class RecordTest {

Expand Down Expand Up @@ -39,4 +43,33 @@ public void shouldGetTimeStampFromMessage() {
Record record = new Record(message, metadata);
Assert.assertEquals(defaultTimestamp, record.getTimestamp("created_time"));
}

@Test
public void shouldGetDateTimeLocally() throws InterruptedException {
BlobSinkConfig config = Mockito.mock(BlobSinkConfig.class);
Mockito.when(config.getFilePartitionProcessingTimeEnabled()).thenReturn(true);
DynamicMessage message = TestUtils.createMessage(defaultTimestamp, defaultOrderNumber);
DynamicMessage metadata = TestUtils.createMetadata("nested_field", defaultTimestamp, defaultOffset, defaultPartition, defaultTopic);
Record record = new Record(message, metadata);
LocalDateTime before = LocalDateTime.now();
Thread.sleep(1000);
LocalDateTime localDateTime = record.getLocalDateTime(config);
Thread.sleep(1000);
LocalDateTime after = LocalDateTime.now();
Assert.assertTrue(localDateTime.isAfter(before));
Assert.assertTrue(localDateTime.isBefore(after));
}

@Test
public void shouldGetDateTimeFromMessage() throws InterruptedException {
BlobSinkConfig config = Mockito.mock(BlobSinkConfig.class);
Mockito.when(config.getFilePartitionProcessingTimeEnabled()).thenReturn(false);
Mockito.when(config.getFilePartitionProtoTimestampFieldName()).thenReturn("created_time");
Mockito.when(config.getFilePartitionProtoTimestampTimezone()).thenReturn("UTC");
DynamicMessage message = TestUtils.createMessage(defaultTimestamp, defaultOrderNumber);
DynamicMessage metadata = TestUtils.createMetadata("nested_field", defaultTimestamp, defaultOffset, defaultPartition, defaultTopic);
Record record = new Record(message, metadata);
LocalDateTime localDateTime = record.getLocalDateTime(config);
Assert.assertEquals(LocalDateTime.ofInstant(defaultTimestamp, ZoneId.of("UTC")), localDateTime);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@

import java.io.IOException;
import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.util.HashSet;
import java.util.Set;

Expand Down Expand Up @@ -58,6 +60,7 @@ public void setUp() {
MockitoAnnotations.initMocks(this);
this.sinkConfig = Mockito.mock(BlobSinkConfig.class);
Mockito.when(sinkConfig.getFilePartitionProtoTimestampTimezone()).thenReturn(zone);
Mockito.when(sinkConfig.getFilePartitionProcessingTimeEnabled()).thenReturn(false);
Mockito.when(sinkConfig.getOutputKafkaMetadataColumnName()).thenReturn("");
Mockito.when(sinkConfig.getFilePartitionProtoTimestampFieldName()).thenReturn(timeStampFieldName);
Mockito.when(sinkConfig.getFilePartitionTimeGranularityType()).thenReturn(Constants.FilePartitionType.HOUR);
Expand All @@ -68,6 +71,7 @@ public void setUp() {
@Test
public void shouldCreateLocalFileWriter() throws Exception {
Record record = Mockito.mock(Record.class);
Mockito.when(record.getLocalDateTime(sinkConfig)).thenReturn(LocalDateTime.now());
Mockito.when(record.getTimestamp(timeStampFieldName)).thenReturn(Instant.ofEpochMilli(1L));
Mockito.when(record.getTopic("")).thenReturn(defaultTopic);
Mockito.when(localFileWriter1.getFullPath()).thenReturn("/tmp/test");
Expand All @@ -82,13 +86,15 @@ public void shouldCreateLocalFileWriter() throws Exception {
@Test
public void shouldCreateMultipleWriterBasedOnPartition() throws Exception {
Record record1 = Mockito.mock(Record.class);
Mockito.when(record1.getLocalDateTime(sinkConfig)).thenReturn(LocalDateTime.ofInstant(Instant.ofEpochMilli(3600000L), ZoneId.of(zone)));
Mockito.when(record1.getTimestamp(timeStampFieldName)).thenReturn(Instant.ofEpochMilli(3600000L));
Mockito.when(record1.getTopic("")).thenReturn(defaultTopic);
Mockito.when(localStorage.createLocalFileWriter(TimePartitionedPathUtils.getTimePartitionedPath(record1, sinkConfig))).thenReturn(localFileWriter1);
Mockito.when(localFileWriter1.write(record1)).thenReturn(true);
Mockito.when(localFileWriter1.getFullPath()).thenReturn("/tmp/test1");

Record record2 = Mockito.mock(Record.class);
Mockito.when(record2.getLocalDateTime(sinkConfig)).thenReturn(LocalDateTime.ofInstant(Instant.ofEpochMilli(7200000L), ZoneId.of(zone)));
Mockito.when(record2.getTimestamp(timeStampFieldName)).thenReturn(Instant.ofEpochMilli(7200000L));
Mockito.when(record2.getTopic("")).thenReturn(defaultTopic);
Mockito.when(localStorage.createLocalFileWriter(TimePartitionedPathUtils.getTimePartitionedPath(record2, sinkConfig))).thenReturn(localFileWriter2);
Expand All @@ -106,6 +112,7 @@ public void shouldCreateMultipleWriterBasedOnPartition() throws Exception {
@Test(expected = IOException.class)
public void shouldThrowIOExceptionWhenWriteThrowsException() throws Exception {
Record record = Mockito.mock(Record.class);
Mockito.when(record.getLocalDateTime(sinkConfig)).thenReturn(LocalDateTime.now());
Mockito.when(record.getTimestamp(timeStampFieldName)).thenReturn(Instant.ofEpochMilli(3600000L));
Mockito.when(record.getTopic("")).thenReturn(defaultTopic);
Mockito.when(localFileWriter1.getMetadata()).thenReturn(new LocalFileMetadata("/tmp/", "/tmp/test1", 0, 0, 0));
Expand All @@ -120,6 +127,7 @@ public void shouldThrowIOExceptionWhenWriteThrowsException() throws Exception {
public void shouldThrowIOExceptionWhenOpenNewWriterFailed() throws Exception {
expectedException.expect(LocalFileWriterFailedException.class);
Record record = Mockito.mock(Record.class);
Mockito.when(record.getLocalDateTime(sinkConfig)).thenReturn(LocalDateTime.now());
Mockito.when(record.getTimestamp(timeStampFieldName)).thenReturn(Instant.ofEpochMilli(3600000L));
Mockito.when(record.getTopic("")).thenReturn(defaultTopic);
Mockito.when(localFileWriter1.getMetadata()).thenReturn(new LocalFileMetadata("/tmp/", "/tmp/test1", 0, 0, 0));
Expand All @@ -133,6 +141,7 @@ public void shouldThrowIOExceptionWhenOpenNewWriterFailed() throws Exception {
public void shouldGetEmptyFlushedPath() throws Exception {
Record record = Mockito.mock(Record.class);
Mockito.when(record.getTimestamp(timeStampFieldName)).thenReturn(Instant.ofEpochMilli(1L));
Mockito.when(record.getLocalDateTime(sinkConfig)).thenReturn(LocalDateTime.ofInstant(Instant.ofEpochMilli(1L), ZoneId.of(zone)));
Mockito.when(record.getTopic("")).thenReturn(defaultTopic);
Mockito.when(localFileWriter1.getFullPath()).thenReturn("/tmp/test");
Mockito.when(localStorage.createLocalFileWriter(TimePartitionedPathUtils.getTimePartitionedPath(record, sinkConfig))).thenReturn(localFileWriter1);
Expand Down

0 comments on commit 8b52fa6

Please sign in to comment.