Skip to content

Commit

Permalink
Added faster ingestion for Iceberg java code
Browse files Browse the repository at this point in the history
  • Loading branch information
shubham19may committed Mar 7, 2025
1 parent eeb7794 commit b9fa0b0
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 24 deletions.
5 changes: 5 additions & 0 deletions writers/iceberg/debezium-server-iceberg-sink/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,11 @@
<version>5.0.0</version>
<!-- <scope>test</scope>-->
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.15.2</version> <!-- or the latest version -->
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${version.spark.scala}</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,25 +9,26 @@
import org.apache.iceberg.Table;
import org.apache.iceberg.catalog.Catalog;
import org.apache.iceberg.catalog.TableIdentifier;
import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.core.type.TypeReference;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.nio.charset.StandardCharsets;
import java.util.List;

import java.util.Map;
import java.util.stream.Collectors;


// This class is used to receive rows from the Olake Golang project and dump it into iceberg using prebuilt code here.
@Dependent
public class OlakeRowsIngester extends StringArrayServiceGrpc.StringArrayServiceImplBase {

private static final Logger LOGGER = LoggerFactory.getLogger(OlakeRowsIngester.class);

private String icebergNamespace = "public";
Catalog icebergCatalog;

private final IcebergTableOperator icebergTableOperator;
// Create a single reusable ObjectMapper instance
private static final ObjectMapper objectMapper = new ObjectMapper();

public OlakeRowsIngester() {
icebergTableOperator = new IcebergTableOperator();
Expand All @@ -45,56 +46,55 @@ public void setIcebergCatalog(Catalog icebergCatalog) {
this.icebergCatalog = icebergCatalog;
}


@Override
public void sendStringArray(Messaging.StringArrayRequest request, StreamObserver<Messaging.StringArrayResponse> responseObserver) {
String requestId = String.format("[Thread-%d-%d]", Thread.currentThread().getId(), System.nanoTime());
long startTime = System.currentTimeMillis();
// Retrieve the array of strings from the request
List<String> messages = request.getMessagesList();

long parsingStartTime = System.currentTimeMillis();
Map<String, List<RecordConverter>> result =
messages.stream()
messages.parallelStream() // Use parallel stream for concurrent processing
.map(message -> {
try {
ObjectMapper objectMapper = new ObjectMapper();

// Read the entire JSON message into a Map<String, Object>:
Map<String, Object> messageMap =
objectMapper.readValue(message, new TypeReference<Map<String, Object>>() {
});
Map<String, Object> messageMap = objectMapper.readValue(message, new TypeReference<Map<String, Object>>() {});

// Get the destination table:
String destinationTable = (String) messageMap.get("destination_table");

// Convert the "key" part back to a JSON string:
String keyString = objectMapper.writeValueAsString(messageMap.get("key"));
// Get key and value objects directly without re-serializing
Object key = messageMap.get("key");
Object value = messageMap.get("value");

// Convert the "value" part back to a JSON string:
String valueString = objectMapper.writeValueAsString(messageMap.get("value"));
// Convert to bytes only once
byte[] keyBytes = key != null ? objectMapper.writeValueAsBytes(key) : null;
byte[] valueBytes = objectMapper.writeValueAsBytes(value);

// Now keyString and valueString contain the JSON for their respective fields.
return new RecordConverter(destinationTable,
valueString.getBytes(StandardCharsets.UTF_8),
keyString.getBytes(StandardCharsets.UTF_8));
// TODO: implement key being null
// return new RecordConverter(destinationTable, value.getBytes(), key == null ? null : key.getBytes());
return new RecordConverter(destinationTable, valueBytes, keyBytes);
} catch (Exception e) {
throw new RuntimeException("Error parsing message as JSON", e);
}
})
.collect(Collectors.groupingBy(RecordConverter::destination));
LOGGER.info("{} Parsing messages took: {} ms", requestId, (System.currentTimeMillis() - parsingStartTime));

// consume list of events for each destination table
long processingStartTime = System.currentTimeMillis();
for (Map.Entry<String, List<RecordConverter>> tableEvents : result.entrySet()) {
Table icebergTable = this.loadIcebergTable(TableIdentifier.of(icebergNamespace, tableEvents.getKey()), tableEvents.getValue().get(0));
icebergTableOperator.addToTable(icebergTable, tableEvents.getValue());
}
LOGGER.info("{} Processing tables took: {} ms", requestId, (System.currentTimeMillis() - processingStartTime));

// Build and send a response
Messaging.StringArrayResponse response = Messaging.StringArrayResponse.newBuilder()
.setResult("Received " + messages.size() + " messages")
.setResult(requestId + " Received " + messages.size() + " messages")
.build();
responseObserver.onNext(response);
responseObserver.onCompleted();
LOGGER.info("{} Total time taken: {} ms", requestId, (System.currentTimeMillis() - startTime));
}

public Table loadIcebergTable(TableIdentifier tableId, RecordConverter sampleEvent) {
Expand Down

0 comments on commit b9fa0b0

Please sign in to comment.