Skip to content

Commit

Permalink
add spark support for iceberg
Browse files Browse the repository at this point in the history
  • Loading branch information
nianliuu committed Jan 17, 2025
1 parent 9708050 commit d145b8a
Show file tree
Hide file tree
Showing 7 changed files with 48 additions and 15 deletions.
2 changes: 1 addition & 1 deletion plugin-mapping.properties
Original file line number Diff line number Diff line change
Expand Up @@ -156,5 +156,5 @@ seatunnel.source.Qdrant = connector-qdrant
seatunnel.sink.Qdrant = connector-qdrant
seatunnel.source.TencentVectorDB = connector-tencent-vectordb
setunnel.source.AstraDB = connector-astradb
seatunnel.sink.Shopify = connector-shopify
seatunnel.source.Shopify = connector-shopify

4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
<revision>2.3.8-SNAPSHOT</revision>
<seatunnel.config.shade.version>2.1.1</seatunnel.config.shade.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<java.version>17</java.version>
<java.version>11</java.version>
<scala.version>2.12.15</scala.version>
<scala.binary.version>2.12</scala.binary.version>
<maven.compiler.source>${java.version}</maven.compiler.source>
Expand Down Expand Up @@ -102,7 +102,7 @@
<elasticsearch6.client.version>6.3.1</elasticsearch6.client.version>
<elasticsearch7.client.version>7.5.1</elasticsearch7.client.version>
<flink-shaded-hadoop-2.version>2.7.5-7.0</flink-shaded-hadoop-2.version>
<commons-lang3.version>3.5</commons-lang3.version>
<commons-lang3.version>3.17.0</commons-lang3.version>
<commons-io.version>2.11.0</commons-io.version>
<commons-collections4.version>4.4</commons-collections4.version>
<commons-csv.version>1.10.0</commons-csv.version>
Expand Down
16 changes: 14 additions & 2 deletions seatunnel-connectors-v2/connector-iceberg/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,21 @@

<dependency>
<groupId>org.apache.iceberg</groupId>
<artifactId>iceberg-spark-runtime-3.5_2.12</artifactId>
<artifactId>iceberg-spark-runtime-3.3_2.12</artifactId>
<version>1.5.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.12</artifactId>
<version>3.5.4</version>
<version>3.3.0</version>
</dependency>

<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-catalyst_2.12</artifactId>
<version>3.3.0</version>
<scope>test</scope>
</dependency>

<!-- https://mvnrepository.com/artifact/software.amazon.awssdk/s3 -->
Expand Down Expand Up @@ -285,6 +292,11 @@
<!--suppress UnresolvedMavenProperty -->
<shadedPattern>${seatunnel.shade.package}.${connector.name}.shaded.parquet</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark</pattern>
<!--suppress UnresolvedMavenProperty -->
<shadedPattern>iceberg.org.apache.spark</shadedPattern>
</relocation>
</relocations>
</configuration>
</execution>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ public class MilvusSinkWriter
private final DescribeCollectionResp describeCollectionResp;
private final Boolean hasPartitionKey;

private final static AtomicLong writeCount = new AtomicLong();
private final static AtomicLong writeCache = new AtomicLong();
private final static AtomicLong writeCount = new AtomicLong(0);
private final static AtomicLong writeCache = new AtomicLong(0);

public MilvusSinkWriter(
Context context,
Expand Down
6 changes: 6 additions & 0 deletions seatunnel-dist/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,12 @@
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.seatunnel</groupId>
<artifactId>connector-iceberg</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
<!-- <dependency>-->
<!-- <groupId>org.apache.seatunnel</groupId>-->
<!-- <artifactId>connector-assert</artifactId>-->
Expand Down
28 changes: 22 additions & 6 deletions seatunnel-examples/seatunnel-spark-connector-v2-example/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

<properties>
<spark.scope>compile</spark.scope>
<spark.2.4.0.jackson.version>2.6.7</spark.2.4.0.jackson.version>
<spark.2.4.0.jackson.version>2.13.5</spark.2.4.0.jackson.version>
</properties>

<dependencyManagement>
Expand All @@ -53,7 +53,7 @@

<dependency>
<groupId>org.apache.seatunnel</groupId>
<artifactId>seatunnel-spark-2-starter</artifactId>
<artifactId>seatunnel-spark-3-starter</artifactId>
<version>${project.version}</version>
</dependency>

Expand Down Expand Up @@ -82,14 +82,14 @@
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_${scala.binary.version}</artifactId>
<version>${spark.2.4.0.version}</version>
<version>${spark.3.3.0.version}</version>
<scope>${spark.scope}</scope>
</dependency>

<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<version>${spark.2.4.0.version}</version>
<version>${spark.3.3.0.version}</version>
<scope>${spark.scope}</scope>
<exclusions>
<exclusion>
Expand All @@ -107,14 +107,14 @@
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>${spark.2.4.0.version}</version>
<version>${spark.3.3.0.version}</version>
<scope>${spark.scope}</scope>
</dependency>

<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_${scala.binary.version}</artifactId>
<version>${spark.2.4.0.version}</version>
<version>${spark.3.3.0.version}</version>
<scope>${spark.scope}</scope>
<exclusions>
<exclusion>
Expand All @@ -134,6 +134,22 @@
<artifactId>connector-milvus</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.seatunnel</groupId>
<artifactId>connector-iceberg</artifactId>
<version>${project.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.github.luben/zstd-jni -->
<dependency>
<groupId>com.github.luben</groupId>
<artifactId>zstd-jni</artifactId>
<version>1.5.6-9</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-catalyst -->

<!-- Thanks for using https://jar-download.com -->


<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common -->
<dependency>
<groupId>org.apache.hadoop</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,14 @@
package org.apache.seatunnel.example.spark.v2;

import org.apache.seatunnel.core.starter.exception.CommandException;

import java.io.FileNotFoundException;
import java.net.URISyntaxException;

public class SeaTunnelApiExample {

public static void main(String[] args)
throws FileNotFoundException, URISyntaxException, CommandException {
String configurePath = args.length > 0 ? args[0] : "/examples/milvus.conf";
String configurePath = args.length > 0 ? args[0] : "/examples/iceberg.conf";
ExampleUtils.builder(configurePath);
}
}

0 comments on commit d145b8a

Please sign in to comment.