From 9f5d9a6317367b28a4257bfc3654e469064275e2 Mon Sep 17 00:00:00 2001 From: fariel Date: Wed, 10 Mar 2021 13:34:02 +0800 Subject: [PATCH 01/50] add doriswriter --- doriswriter/doc/doriswriter.md | 170 ++++++++++++++++++ doriswriter/pom.xml | 155 ++++++++++++++++ doriswriter/src/main/assembly/package.xml | 35 ++++ .../writer/doriswriter/DorisWriter.java | 144 +++++++++++++++ .../doriswriter/DorisWriterOptions.java | 111 ++++++++++++ .../manager/DorisStreamLoadVisitor.java | 143 +++++++++++++++ .../manager/DorisWriterManager.java | 113 ++++++++++++ .../doriswriter/util/DorisWriterUtil.java | 83 +++++++++ doriswriter/src/main/resources/plugin.json | 6 + .../main/resources/plugin_job_template.json | 14 ++ package.xml | 7 + pom.xml | 3 +- 12 files changed, 983 insertions(+), 1 deletion(-) create mode 100644 doriswriter/doc/doriswriter.md create mode 100755 doriswriter/pom.xml create mode 100755 doriswriter/src/main/assembly/package.xml create mode 100755 doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriter.java create mode 100644 doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriterOptions.java create mode 100644 doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java create mode 100644 doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisWriterManager.java create mode 100755 doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/util/DorisWriterUtil.java create mode 100755 doriswriter/src/main/resources/plugin.json create mode 100644 doriswriter/src/main/resources/plugin_job_template.json diff --git a/doriswriter/doc/doriswriter.md b/doriswriter/doc/doriswriter.md new file mode 100644 index 0000000000..715afa8cbe --- /dev/null +++ b/doriswriter/doc/doriswriter.md @@ -0,0 +1,170 @@ +# DataX DorisWriter + + +--- + + +## 1 快速介绍 + +DorisWriter 插件实现了写入数据到 Doris 主库的目的表的功能。在底层实现上, DorisWriter 通过Streamload以csv格式导入数据至Doris。 + + +## 2 实现原理 + + DorisWriter 通过Streamload以csv格式导入数据至Doris, 内部将`reader`读取的数据进行缓存后批量导入至Doris,以提高写入性能。 + + +## 3 功能说明 + +### 3.1 配置样例 + +* 这里使用一份从内存Mysql读取数据后导入至Doris。 + +```json +{ + "job": { + "setting": { + "speed": { + "channel": 1 + }, + "errorLimit": { + "record": 0, + "percentage": 0 + } + }, + "content": [ + { + "reader": { + "name": "mysqlreader", + "parameter": { + "username": "xxxx", + "password": "xxxx", + "column": [ "k1", "k2", "v1", "v2" ], + "connection": [ + { + "table": [ "table1", "table2" ], + "jdbcUrl": [ + "jdbc:mysql://127.0.0.1:3306/datax_test1" + ] + }, + { + "table": [ "table3", "table4" ], + "jdbcUrl": [ + "jdbc:mysql://127.0.0.1:3306/datax_test2" + ] + } + ] + } + }, + "writer": { + "name": "doriswriter", + "parameter": { + "username": "xxxx", + "password": "xxxx", + "database": "xxxx", + "table": "xxxx", + "column": ["k1", "k2", "v1", "v2"], + "preSql": [], + "postSql": [], + "jdbcUrl": "jdbc:mysql://172.28.17.100:9030/", + "loadUrl": ["172.28.17.100:8030", "172.28.17.100:8030"] + } + } + } + ] + } +} + +``` + + +### 3.2 参数说明 + +* **username** + + * 描述:Doris数据库的用户名
+ + * 必选:是
+ + * 默认值:无
+ +* **password** + + * 描述:Doris数据库的密码
+ + * 必选:是
+ + * 默认值:无
+ +* **database** + + * 描述:Doris表的数据库名称。 + + * 必选:是
+ + * 默认值:无
+ +* **table** + + * 描述:Doris表的表名称。 + + * 必选:是
+ + * 默认值:无
+ +* **loadUrl** + + * 描述:Doris FE的地址用于Streamload,可以为多个fe地址,`fe_ip:fe_http_port`。 + + * 必选:是
+ + * 默认值:无
+ +* **column** + + * 描述:目的表需要写入数据的字段,字段之间用英文逗号分隔。例如: "column": ["id","name","age"]。 + + **column配置项必须指定,不能留空!** + + 注意:我们强烈不推荐你这样配置,因为当你目的表字段个数、类型等有改动时,你的任务可能运行不正确或者失败 + + * 必选:是
+ + * 默认值:否
+ +* **preSql** + + * 描述:写入数据到目的表前,会先执行这里的标准语句。
+ + * 必选:否
+ + * 默认值:无
+ +* **postSql** + + * 描述:写入数据到目的表后,会执行这里的标准语句。
+ + * 必选:否
+ + * 默认值:无
+ +* **jdbcUrl** + + * 描述:目的数据库的 JDBC 连接信息,用于执行`preSql`及`postSql`。
+ + * 必选:否
+ + * 默认值:无
+ + +### 3.3 类型转换 + +传入的数据均会被转为字符串,并以`\t`作为列分隔符,`\n`作为行分隔符,组成`csv`文件进行StreamLoad导入操作。 + +## 4 性能报告 + + +## 5 约束限制 + + +## FAQ diff --git a/doriswriter/pom.xml b/doriswriter/pom.xml new file mode 100755 index 0000000000..2bbf1d222f --- /dev/null +++ b/doriswriter/pom.xml @@ -0,0 +1,155 @@ + + 4.0.0 + + com.alibaba.datax + datax-all + 0.0.1-SNAPSHOT + + doriswriter + doriswriter + jar + + + + com.alibaba.datax + datax-common + ${datax-project-version} + + + slf4j-log4j12 + org.slf4j + + + + + org.slf4j + slf4j-api + + + ch.qos.logback + logback-classic + + + + com.alibaba.datax + plugin-rdbms-util + ${datax-project-version} + + + commons-codec + commons-codec + 1.9 + + + commons-logging + commons-logging + 1.1.1 + + + org.apache.httpcomponents + httpcore + 4.4.6 + + + org.apache.httpcomponents + httpclient + 4.5.3 + + + com.alibaba + fastjson + 1.2.75 + + + mysql + mysql-connector-java + 5.1.34 + + + + + + + + maven-compiler-plugin + + ${jdk-version} + ${jdk-version} + ${project-sourceEncoding} + + + + org.apache.maven.plugins + maven-shade-plugin + 3.0.0 + + + + package + + shade + + + true + + + com.alibaba.fastjson + com.dorisdb.shade.com.alibaba.fastjson + + + org.apache.http + com.dorisdb.shade.org.apache.http + + + org.apache.commons + com.dorisdb.shade.org.apache.commons + + + + + commons-codec:commons-codec + commons-logging:* + org.apache.httpcomponents:httpclient + org.apache.httpcomponents:httpcore + com.alibaba:fastjson + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + maven-assembly-plugin + + + src/main/assembly/package.xml + + datax + + + + dwzip + package + + single + + + + + + + diff --git a/doriswriter/src/main/assembly/package.xml b/doriswriter/src/main/assembly/package.xml new file mode 100755 index 0000000000..2e0880e1e4 --- /dev/null +++ b/doriswriter/src/main/assembly/package.xml @@ -0,0 +1,35 @@ + + + + dir + + false + + + src/main/resources + + plugin.json + plugin_job_template.json + + plugin/writer/doriswriter + + + target/ + + doriswriter-0.0.1-SNAPSHOT.jar + + plugin/writer/doriswriter + + + + + + false + plugin/writer/doriswriter/libs + runtime + + + diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriter.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriter.java new file mode 100755 index 0000000000..0cbe579c01 --- /dev/null +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriter.java @@ -0,0 +1,144 @@ +package com.dorisdb.connector.datax.plugin.writer.doriswriter; + +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.spi.Writer; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.util.DBUtil; +import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; +import com.dorisdb.connector.datax.plugin.writer.doriswriter.manager.DorisWriterManager; +import com.dorisdb.connector.datax.plugin.writer.doriswriter.util.DorisWriterUtil; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.Connection; +import java.util.ArrayList; +import java.util.List; + +public class DorisWriter extends Writer { + + public static class Job extends Writer.Job { + + private static final Logger LOG = LoggerFactory.getLogger(Job.class); + private Configuration originalConfig = null; + private DorisWriterOptions options; + + @Override + public void init() { + this.originalConfig = super.getPluginJobConf(); + options = new DorisWriterOptions(super.getPluginJobConf()); + options.doPretreatment(); + } + + @Override + public void preCheck(){ + this.init(); + DorisWriterUtil.preCheckPrePareSQL(options); + DorisWriterUtil.preCheckPostSQL(options); + } + + @Override + public void prepare() { + String username = options.getUsername(); + String password = options.getPassword(); + String jdbcUrl = options.getJdbcUrl(); + List renderedPreSqls = DorisWriterUtil.renderPreOrPostSqls(options.getPreSqlList(), options.getTable()); + if (null != renderedPreSqls && !renderedPreSqls.isEmpty()) { + Connection conn = DBUtil.getConnection(DataBaseType.MySql, jdbcUrl, username, password); + LOG.info("Begin to execute preSqls:[{}]. context info:{}.", String.join(";", renderedPreSqls), jdbcUrl); + DorisWriterUtil.executeSqls(conn, renderedPreSqls); + DBUtil.closeDBResources(null, null, conn); + } + } + + @Override + public List split(int mandatoryNumber) { + List configurations = new ArrayList<>(mandatoryNumber); + for (int i = 0; i < mandatoryNumber; i++) { + configurations.add(originalConfig); + } + return configurations; + } + + @Override + public void post() { + String username = options.getUsername(); + String password = options.getPassword(); + String jdbcUrl = options.getJdbcUrl(); + List renderedPostSqls = DorisWriterUtil.renderPreOrPostSqls(options.getPostSqlList(), options.getTable()); + if (null != renderedPostSqls && !renderedPostSqls.isEmpty()) { + Connection conn = DBUtil.getConnection(DataBaseType.MySql, jdbcUrl, username, password); + LOG.info("Begin to execute preSqls:[{}]. context info:{}.", String.join(";", renderedPostSqls), jdbcUrl); + DorisWriterUtil.executeSqls(conn, renderedPostSqls); + DBUtil.closeDBResources(null, null, conn); + } + } + + @Override + public void destroy() { + } + + } + + public static class Task extends Writer.Task { + private DorisWriterManager writerManager; + private DorisWriterOptions options; + + @Override + public void init() { + options = new DorisWriterOptions(super.getPluginJobConf()); + writerManager = new DorisWriterManager(options); + } + + @Override + public void prepare() { + } + + public void startWrite(RecordReceiver recordReceiver) { + try { + Record record; + while ((record = recordReceiver.getFromReader()) != null) { + if (record.getColumnNumber() != options.getColumns().size()) { + throw DataXException + .asDataXException( + DBUtilErrorCode.CONF_ERROR, + String.format( + "列配置信息有错误. 因为您配置的任务中,源头读取字段数:%s 与 目的表要写入的字段数:%s 不相等. 请检查您的配置并作出修改.", + record.getColumnNumber(), + options.getColumns().size())); + } + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < record.getColumnNumber(); i++) { + sb.append(record.getColumn(i).getRawData().toString()); + if (i < record.getColumnNumber() - 1) { + sb.append("\t"); + } + } + writerManager.writeRecord(sb.toString()); + } + } catch (Exception e) { + throw DataXException.asDataXException(DBUtilErrorCode.WRITE_DATA_ERROR, e); + } + } + + @Override + public void post() { + try { + writerManager.flush(writerManager.createBatchLabel()); + } catch (Exception e) { + throw DataXException.asDataXException(DBUtilErrorCode.WRITE_DATA_ERROR, e); + } + } + + @Override + public void destroy() {} + + @Override + public boolean supportFailOver(){ + return false; + } + } +} diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriterOptions.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriterOptions.java new file mode 100644 index 0000000000..1d9cb2be41 --- /dev/null +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriterOptions.java @@ -0,0 +1,111 @@ +package com.dorisdb.connector.datax.plugin.writer.doriswriter; + +import java.io.Serializable; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; + +import java.util.List; + +public class DorisWriterOptions implements Serializable { + + private static final long serialVersionUID = 1l; + private static final long KILO_BYTES_SCALE = 1024l; + private static final long MEGA_BYTES_SCALE = KILO_BYTES_SCALE * KILO_BYTES_SCALE; + private static final int MAX_RETRIES = 1; + private static final int BATCH_ROWS = 500000; + private static final long BATCH_BYTES = 100 * MEGA_BYTES_SCALE; + + private static final String KEY_USERNAME = "username"; + private static final String KEY_PASSWORD = "password"; + private static final String KEY_DATABASE = "database"; + private static final String KEY_TABLE = "table"; + private static final String KEY_COLUMN = "column"; + private static final String KEY_PRE_SQL = "preSql"; + private static final String KEY_POST_SQL = "postSql"; + private static final String KEY_JDBC_URL = "jdbcUrl"; + private static final String KEY_LOAD_URL = "loadUrl"; + + private final Configuration options; + + public DorisWriterOptions(Configuration options) { + this.options = options; + } + + public void doPretreatment() { + validateRequired(); + validateStreamLoadUrl(); + } + + public String getJdbcUrl() { + return options.getString(KEY_JDBC_URL); + } + + public String getDatabase() { + return options.getString(KEY_DATABASE); + } + + public String getTable() { + return options.getString(KEY_TABLE); + } + + public String getUsername() { + return options.getString(KEY_USERNAME); + } + + public String getPassword() { + return options.getString(KEY_PASSWORD); + } + + public List getLoadUrlList() { + return options.getList(KEY_LOAD_URL, String.class); + } + + public List getColumns() { + return options.getList(KEY_COLUMN, String.class); + } + + public List getPreSqlList() { + return options.getList(KEY_PRE_SQL, String.class); + } + + public List getPostSqlList() { + return options.getList(KEY_POST_SQL, String.class); + } + + public int getMaxRetries() { + return MAX_RETRIES; + } + + public int getBatchRows() { + return BATCH_ROWS; + } + + public long getBatchSize() { + return BATCH_BYTES; + } + + private void validateStreamLoadUrl() { + List urlList = getLoadUrlList(); + for (String host : urlList) { + if (host.split(":").length < 2) { + throw DataXException.asDataXException(DBUtilErrorCode.CONF_ERROR, + "loadUrl的格式不正确,请输入 `fe_ip:fe_http_ip;fe_ip:fe_http_ip`。"); + } + } + } + + private void validateRequired() { + final String[] requiredOptionKeys = new String[]{ + KEY_USERNAME, + KEY_PASSWORD, + KEY_DATABASE, + KEY_TABLE, + KEY_LOAD_URL + }; + for (String optionKey : requiredOptionKeys) { + options.getNecessaryValue(optionKey, DBUtilErrorCode.REQUIRED_VALUE); + } + } +} diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java new file mode 100644 index 0000000000..a01b906ba4 --- /dev/null +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java @@ -0,0 +1,143 @@ +package com.dorisdb.connector.datax.plugin.writer.doriswriter.manager; + +import java.io.IOException; +import java.io.Serializable; +import java.net.HttpURLConnection; +import java.net.URL; +import java.nio.charset.StandardCharsets; + +import com.alibaba.fastjson.JSON; +import com.dorisdb.connector.datax.plugin.writer.doriswriter.DorisWriterOptions; + +import org.apache.commons.codec.binary.Base64; +import org.apache.http.HttpEntity; +import org.apache.http.client.config.RequestConfig; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpPut; +import org.apache.http.entity.ByteArrayEntity; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.DefaultRedirectStrategy; +import org.apache.http.impl.client.HttpClientBuilder; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.util.EntityUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; +import java.util.Map; + + +public class DorisStreamLoadVisitor implements Serializable { + + private static final long serialVersionUID = 1L; + + private static final Logger LOG = LoggerFactory.getLogger(DorisStreamLoadVisitor.class); + + private final DorisWriterOptions writerOptions; + private int pos; + + public DorisStreamLoadVisitor(DorisWriterOptions writerOptions) { + this.writerOptions = writerOptions; + } + + public void doStreamLoad(String label, List labeledRows) throws IOException { + String host = getAvailableHost(); + if (null == host) { + throw new IOException("None of the host in `load_url` could be connected."); + } + String loadUrl = new StringBuilder(host) + .append("/api/") + .append(writerOptions.getDatabase()) + .append("/") + .append(writerOptions.getTable()) + .append("/_stream_load") + .toString(); + Map loadResult = doHttpPut(loadUrl, label, joinRows(labeledRows)); + final String keyStatus = "Status"; + if (null == loadResult || !loadResult.containsKey(keyStatus)) { + throw new IOException("Unable to flush data to doris: unknown result status."); + } + if (loadResult.get(keyStatus).equals("Fail")) { + throw new IOException( + new StringBuilder("Failed to flush data to doris.").append(loadResult.get("Message").toString()).toString() + ); + } + } + + private String getAvailableHost() { + List hostList = writerOptions.getLoadUrlList(); + if (pos >= hostList.size()) { + pos = 0; + } + for (; pos < hostList.size(); pos++) { + String host = new StringBuilder("http://").append(hostList.get(pos)).toString(); + if (tryHttpConnection(host)) { + return host; + } + } + return null; + } + + private boolean tryHttpConnection(String host) { + try { + URL url = new URL(host); + HttpURLConnection co = (HttpURLConnection) url.openConnection(); + co.setConnectTimeout(1000); + co.connect(); + co.disconnect(); + return true; + } catch (Exception e1) { + LOG.warn("Failed to connect to address:{}", host, e1); + return false; + } + } + + private byte[] joinRows(List rows) { + return String.join("\n", rows).getBytes(StandardCharsets.UTF_8); + } + + @SuppressWarnings("unchecked") + private Map doHttpPut(String loadUrl, String label, byte[] data) throws IOException { + LOG.info(String.format("Executing stream load to: '%s', size: '%s'", loadUrl, data.length)); + final HttpClientBuilder httpClientBuilder = HttpClients.custom() + .setRedirectStrategy(new DefaultRedirectStrategy() { + @Override + protected boolean isRedirectable(String method) { + return true; + } + }); + try (CloseableHttpClient httpclient = httpClientBuilder.build()) { + HttpPut httpPut = new HttpPut(loadUrl); + List cols = writerOptions.getColumns(); + if (null != cols && !cols.isEmpty()) { + httpPut.setHeader("columns", String.join(",", cols)); + } + httpPut.setHeader("Expect", "100-continue"); + httpPut.setHeader("label", label); + httpPut.setHeader("Content-Type", "application/x-www-form-urlencoded"); + httpPut.setHeader("Authorization", getBasicAuthHeader(writerOptions.getUsername(), writerOptions.getPassword())); + httpPut.setEntity(new ByteArrayEntity(data)); + httpPut.setConfig(RequestConfig.custom().setRedirectsEnabled(true).build()); + try (CloseableHttpResponse resp = httpclient.execute(httpPut)) { + int code = resp.getStatusLine().getStatusCode(); + if (200 != code) { + LOG.warn("Request failed with code:{}", code); + return null; + } + HttpEntity respEntity = resp.getEntity(); + if (null == respEntity) { + LOG.warn("Request failed with empty response."); + return null; + } + return (Map)JSON.parse(EntityUtils.toString(respEntity)); + } + } + } + + private String getBasicAuthHeader(String username, String password) { + String auth = username + ":" + password; + byte[] encodedAuth = Base64.encodeBase64(auth.getBytes()); + return new StringBuilder("Basic ").append(new String(encodedAuth)).toString(); + } + +} diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisWriterManager.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisWriterManager.java new file mode 100644 index 0000000000..86d370a572 --- /dev/null +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisWriterManager.java @@ -0,0 +1,113 @@ +package com.dorisdb.connector.datax.plugin.writer.doriswriter.manager; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import java.util.UUID; + +import com.dorisdb.connector.datax.plugin.writer.doriswriter.DorisWriterOptions; + +public class DorisWriterManager implements Serializable { + + private static final long serialVersionUID = 1L; + + private static final Logger LOG = LoggerFactory.getLogger(DorisWriterManager.class); + + private final DorisStreamLoadVisitor dorisStreamLoadVisitor; + private final DorisWriterOptions writerOptions; + + private final List buffer = new ArrayList<>(); + private int batchCount = 0; + private long batchSize = 0; + private volatile boolean closed = false; + private volatile Exception flushException; + + public DorisWriterManager(DorisWriterOptions writerOptions) { + this.writerOptions = writerOptions; + this.dorisStreamLoadVisitor = new DorisStreamLoadVisitor(writerOptions); + } + + public final synchronized void writeRecord(String record) throws IOException { + checkFlushException(); + try { + buffer.add(record); + batchCount++; + batchSize += record.length(); + if (batchCount >= writerOptions.getBatchRows() || batchSize >= writerOptions.getBatchSize()) { + flush(createBatchLabel()); + } + } catch (Exception e) { + throw new IOException("Writing records to Doris failed.", e); + } + } + + public synchronized void flush(String label) throws IOException { + checkFlushException(); + if (batchCount == 0) { + return; + } + for (int i = 0; i <= writerOptions.getMaxRetries(); i++) { + try { + tryToFlush(label); + buffer.clear(); + batchCount = 0; + batchSize = 0; + break; + } catch (IOException e) { + LOG.warn("Failed to flush batch data to doris, retry times = {}", i, e); + if (i >= writerOptions.getMaxRetries()) { + throw new IOException(e); + } + try { + Thread.sleep(1000l * (i + 1)); + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + throw new IOException("Unable to flush, interrupted while doing another attempt", e); + } + } + } + } + + public synchronized void close() { + if (!closed) { + closed = true; + + if (batchCount > 0) { + try { + flush(createBatchLabel()); + } catch (Exception e) { + throw new RuntimeException("Writing records to Doris failed.", e); + } + } + } + checkFlushException(); + } + + public String createBatchLabel() { + return UUID.randomUUID().toString(); + } + + public List getBufferedBatchList() { + return buffer; + } + + public void setBufferedBatchList(List buffer) { + this.buffer.clear(); + this.buffer.addAll(buffer); + } + + private void tryToFlush(String label) throws IOException { + // flush to Doris with stream load + dorisStreamLoadVisitor.doStreamLoad(label, buffer); + } + + private void checkFlushException() { + if (flushException != null) { + throw new RuntimeException("Writing records to Doris failed.", flushException); + } + } +} diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/util/DorisWriterUtil.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/util/DorisWriterUtil.java new file mode 100755 index 0000000000..348e519c55 --- /dev/null +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/util/DorisWriterUtil.java @@ -0,0 +1,83 @@ +package com.dorisdb.connector.datax.plugin.writer.doriswriter.util; + +import com.alibaba.datax.plugin.rdbms.util.DBUtil; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; +import com.alibaba.datax.plugin.rdbms.util.RdbmsException; +import com.alibaba.datax.plugin.rdbms.writer.Constant; +import com.alibaba.druid.sql.parser.ParserException; +import com.dorisdb.connector.datax.plugin.writer.doriswriter.DorisWriterOptions; +import com.google.common.base.Strings; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.Connection; +import java.sql.Statement; +import java.util.*; + +public final class DorisWriterUtil { + private static final Logger LOG = LoggerFactory.getLogger(DorisWriterUtil.class); + + private DorisWriterUtil() {} + + public static List renderPreOrPostSqls(List preOrPostSqls, String tableName) { + if (null == preOrPostSqls) { + return Collections.emptyList(); + } + List renderedSqls = new ArrayList<>(); + for (String sql : preOrPostSqls) { + if (!Strings.isNullOrEmpty(sql)) { + renderedSqls.add(sql.replace(Constant.TABLE_NAME_PLACEHOLDER, tableName)); + } + } + return renderedSqls; + } + + public static void executeSqls(Connection conn, List sqls) { + Statement stmt = null; + String currentSql = null; + try { + stmt = conn.createStatement(); + for (String sql : sqls) { + currentSql = sql; + DBUtil.executeSqlWithoutResultSet(stmt, sql); + } + } catch (Exception e) { + throw RdbmsException.asQueryException(DataBaseType.MySql, e, currentSql, null, null); + } finally { + DBUtil.closeDBResources(null, stmt, null); + } + } + + public static void preCheckPrePareSQL(DorisWriterOptions options) { + String table = options.getTable(); + List preSqls = options.getPreSqlList(); + List renderedPreSqls = DorisWriterUtil.renderPreOrPostSqls(preSqls, table); + if (null != renderedPreSqls && !renderedPreSqls.isEmpty()) { + LOG.info("Begin to preCheck preSqls:[{}].", String.join(";", renderedPreSqls)); + for (String sql : renderedPreSqls) { + try { + DBUtil.sqlValid(sql, DataBaseType.MySql); + } catch (ParserException e) { + throw RdbmsException.asPreSQLParserException(DataBaseType.MySql,e,sql); + } + } + } + } + + public static void preCheckPostSQL(DorisWriterOptions options) { + String table = options.getTable(); + List postSqls = options.getPostSqlList(); + List renderedPostSqls = DorisWriterUtil.renderPreOrPostSqls(postSqls, table); + if (null != renderedPostSqls && !renderedPostSqls.isEmpty()) { + LOG.info("Begin to preCheck postSqls:[{}].", String.join(";", renderedPostSqls)); + for(String sql : renderedPostSqls) { + try { + DBUtil.sqlValid(sql, DataBaseType.MySql); + } catch (ParserException e){ + throw RdbmsException.asPostSQLParserException(DataBaseType.MySql,e,sql); + } + } + } + } +} diff --git a/doriswriter/src/main/resources/plugin.json b/doriswriter/src/main/resources/plugin.json new file mode 100755 index 0000000000..081ddace9d --- /dev/null +++ b/doriswriter/src/main/resources/plugin.json @@ -0,0 +1,6 @@ +{ + "name": "doriswriter", + "class": "com.dorisdb.connector.datax.plugin.writer.doriswriter.DorisWriter", + "description": "useScene: prod. mechanism: DorisStreamLoad. warn: The more you know about the database, the less problems you encounter.", + "developer": "dorisdb" +} \ No newline at end of file diff --git a/doriswriter/src/main/resources/plugin_job_template.json b/doriswriter/src/main/resources/plugin_job_template.json new file mode 100644 index 0000000000..ee1744c8f2 --- /dev/null +++ b/doriswriter/src/main/resources/plugin_job_template.json @@ -0,0 +1,14 @@ +{ + "name": "doriswriter", + "parameter": { + "username": "", + "password": "", + "database": "", + "table": "", + "column": [], + "preSql": [], + "postSql": [], + "jdbcUrl": "", + "loadUrl": [] + } +} \ No newline at end of file diff --git a/package.xml b/package.xml index 49e3c4ecb3..347ee81a5c 100755 --- a/package.xml +++ b/package.xml @@ -189,6 +189,13 @@ datax + + doriswriter/target/datax/ + + **/*.* + + datax + drdswriter/target/datax/ diff --git a/pom.xml b/pom.xml index 8f3b827ed9..96ad433310 100644 --- a/pom.xml +++ b/pom.xml @@ -71,6 +71,7 @@ mysqlwriter + doriswriter drdswriter odpswriter txtfilewriter @@ -97,7 +98,7 @@ gdbwriter cassandrawriter clickhousewriter - oscarwriter + plugin-rdbms-util plugin-unstructured-storage-util From adfb2581d3d04b29de6c88ab329804b6aee021e0 Mon Sep 17 00:00:00 2001 From: fariel Date: Fri, 12 Mar 2021 18:53:07 +0800 Subject: [PATCH 02/50] fix null data convertion --- .../connector/datax/plugin/writer/doriswriter/DorisWriter.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriter.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriter.java index 0cbe579c01..05cef7a475 100755 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriter.java +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriter.java @@ -112,7 +112,8 @@ public void startWrite(RecordReceiver recordReceiver) { } StringBuilder sb = new StringBuilder(); for (int i = 0; i < record.getColumnNumber(); i++) { - sb.append(record.getColumn(i).getRawData().toString()); + Object value = record.getColumn(i).getRawData(); + sb.append(null == value ? "\\N" : value); if (i < record.getColumnNumber() - 1) { sb.append("\t"); } From 51b42804bebe5bdba2eb390b238394b18ff08799 Mon Sep 17 00:00:00 2001 From: fariel Date: Sat, 13 Mar 2021 17:08:46 +0800 Subject: [PATCH 03/50] add debug log --- .../doriswriter/manager/DorisStreamLoadVisitor.java | 8 +++----- .../writer/doriswriter/manager/DorisWriterManager.java | 5 +---- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java index a01b906ba4..90e4fdbb5d 100644 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java @@ -1,7 +1,6 @@ package com.dorisdb.connector.datax.plugin.writer.doriswriter.manager; import java.io.IOException; -import java.io.Serializable; import java.net.HttpURLConnection; import java.net.URL; import java.nio.charset.StandardCharsets; @@ -27,9 +26,7 @@ import java.util.Map; -public class DorisStreamLoadVisitor implements Serializable { - - private static final long serialVersionUID = 1L; +public class DorisStreamLoadVisitor { private static final Logger LOG = LoggerFactory.getLogger(DorisStreamLoadVisitor.class); @@ -57,9 +54,10 @@ public void doStreamLoad(String label, List labeledRows) throws IOExcept if (null == loadResult || !loadResult.containsKey(keyStatus)) { throw new IOException("Unable to flush data to doris: unknown result status."); } + LOG.debug(new StringBuilder("StreamLoad response:\n").append(JSON.toJSONString(loadResult)).toString()); if (loadResult.get(keyStatus).equals("Fail")) { throw new IOException( - new StringBuilder("Failed to flush data to doris.").append(loadResult.get("Message").toString()).toString() + new StringBuilder("Failed to flush data to doris.\n").append(JSON.toJSONString(loadResult)).toString() ); } } diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisWriterManager.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisWriterManager.java index 86d370a572..a5338ca9da 100644 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisWriterManager.java +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisWriterManager.java @@ -4,16 +4,13 @@ import org.slf4j.LoggerFactory; import java.io.IOException; -import java.io.Serializable; import java.util.ArrayList; import java.util.List; import java.util.UUID; import com.dorisdb.connector.datax.plugin.writer.doriswriter.DorisWriterOptions; -public class DorisWriterManager implements Serializable { - - private static final long serialVersionUID = 1L; +public class DorisWriterManager { private static final Logger LOG = LoggerFactory.getLogger(DorisWriterManager.class); From 8ba8e4973b51ac393f5d7dd1813c13d091832cb7 Mon Sep 17 00:00:00 2001 From: fariel Date: Mon, 15 Mar 2021 14:33:04 +0800 Subject: [PATCH 04/50] add support: `loadProps` --- .../writer/doriswriter/DorisWriter.java | 14 ++-- .../doriswriter/DorisWriterOptions.java | 24 ++++++ .../manager/DorisStreamLoadVisitor.java | 13 ++- .../doriswriter/row/DorisCsvSerializer.java | 79 +++++++++++++++++++ .../doriswriter/row/DorisISerializer.java | 11 +++ .../doriswriter/row/DorisJsonSerializer.java | 33 ++++++++ .../row/DorisSerializerFactory.java | 22 ++++++ 7 files changed, 186 insertions(+), 10 deletions(-) create mode 100644 doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisCsvSerializer.java create mode 100644 doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisISerializer.java create mode 100644 doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisJsonSerializer.java create mode 100644 doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisSerializerFactory.java diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriter.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriter.java index 05cef7a475..d88581ec8f 100755 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriter.java +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriter.java @@ -9,6 +9,8 @@ import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; import com.alibaba.datax.plugin.rdbms.util.DataBaseType; import com.dorisdb.connector.datax.plugin.writer.doriswriter.manager.DorisWriterManager; +import com.dorisdb.connector.datax.plugin.writer.doriswriter.row.DorisISerializer; +import com.dorisdb.connector.datax.plugin.writer.doriswriter.row.DorisSerializerFactory; import com.dorisdb.connector.datax.plugin.writer.doriswriter.util.DorisWriterUtil; import org.slf4j.Logger; @@ -86,11 +88,13 @@ public void destroy() { public static class Task extends Writer.Task { private DorisWriterManager writerManager; private DorisWriterOptions options; + private DorisISerializer rowSerializer; @Override public void init() { options = new DorisWriterOptions(super.getPluginJobConf()); writerManager = new DorisWriterManager(options); + rowSerializer = DorisSerializerFactory.createSerializer(options); } @Override @@ -110,15 +114,7 @@ public void startWrite(RecordReceiver recordReceiver) { record.getColumnNumber(), options.getColumns().size())); } - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < record.getColumnNumber(); i++) { - Object value = record.getColumn(i).getRawData(); - sb.append(null == value ? "\\N" : value); - if (i < record.getColumnNumber() - 1) { - sb.append("\t"); - } - } - writerManager.writeRecord(sb.toString()); + writerManager.writeRecord(rowSerializer.serialize(record)); } } catch (Exception e) { throw DataXException.asDataXException(DBUtilErrorCode.WRITE_DATA_ERROR, e); diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriterOptions.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriterOptions.java index 1d9cb2be41..b4dbb0a369 100644 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriterOptions.java +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriterOptions.java @@ -7,6 +7,7 @@ import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; import java.util.List; +import java.util.Map; public class DorisWriterOptions implements Serializable { @@ -17,6 +18,11 @@ public class DorisWriterOptions implements Serializable { private static final int BATCH_ROWS = 500000; private static final long BATCH_BYTES = 100 * MEGA_BYTES_SCALE; + private static final String KEY_LOAD_PROPS_FORMAT = "format"; + public enum StreamLoadFormat { + CSV, JSON; + } + private static final String KEY_USERNAME = "username"; private static final String KEY_PASSWORD = "password"; private static final String KEY_DATABASE = "database"; @@ -26,6 +32,7 @@ public class DorisWriterOptions implements Serializable { private static final String KEY_POST_SQL = "postSql"; private static final String KEY_JDBC_URL = "jdbcUrl"; private static final String KEY_LOAD_URL = "loadUrl"; + private static final String KEY_LOAD_PROPS = "loadProps"; private final Configuration options; @@ -74,6 +81,10 @@ public List getPostSqlList() { return options.getList(KEY_POST_SQL, String.class); } + public Map getLoadProps() { + return options.getMap(KEY_LOAD_PROPS); + } + public int getMaxRetries() { return MAX_RETRIES; } @@ -86,6 +97,18 @@ public long getBatchSize() { return BATCH_BYTES; } + public StreamLoadFormat getStreamLoadFormat() { + Map loadProps = getLoadProps(); + if (null == loadProps) { + return StreamLoadFormat.CSV; + } + if (loadProps.containsKey(KEY_LOAD_PROPS_FORMAT) + && StreamLoadFormat.JSON.name().equalsIgnoreCase(String.valueOf(loadProps.get(KEY_LOAD_PROPS_FORMAT)))) { + return StreamLoadFormat.JSON; + } + return StreamLoadFormat.CSV; + } + private void validateStreamLoadUrl() { List urlList = getLoadUrlList(); for (String host : urlList) { @@ -102,6 +125,7 @@ private void validateRequired() { KEY_PASSWORD, KEY_DATABASE, KEY_TABLE, + KEY_COLUMN, KEY_LOAD_URL }; for (String optionKey : requiredOptionKeys) { diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java index 90e4fdbb5d..68621d8f8b 100644 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java @@ -91,7 +91,13 @@ private boolean tryHttpConnection(String host) { } private byte[] joinRows(List rows) { - return String.join("\n", rows).getBytes(StandardCharsets.UTF_8); + if (DorisWriterOptions.StreamLoadFormat.CSV.equals(writerOptions.getStreamLoadFormat())) { + return String.join("\n", rows).getBytes(StandardCharsets.UTF_8); + } + if (DorisWriterOptions.StreamLoadFormat.JSON.equals(writerOptions.getStreamLoadFormat())) { + return new StringBuilder("[").append(String.join(",", rows)).append("]").toString().getBytes(StandardCharsets.UTF_8); + } + throw new RuntimeException("Failed to join rows data, unsupported `format` from stream load properties:"); } @SuppressWarnings("unchecked") @@ -110,6 +116,11 @@ protected boolean isRedirectable(String method) { if (null != cols && !cols.isEmpty()) { httpPut.setHeader("columns", String.join(",", cols)); } + if (null != writerOptions.getLoadProps()) { + for (Map.Entry entry : writerOptions.getLoadProps().entrySet()) { + httpPut.setHeader(entry.getKey(), String.valueOf(entry.getValue())); + } + } httpPut.setHeader("Expect", "100-continue"); httpPut.setHeader("label", label); httpPut.setHeader("Content-Type", "application/x-www-form-urlencoded"); diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisCsvSerializer.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisCsvSerializer.java new file mode 100644 index 0000000000..7a22b2121d --- /dev/null +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisCsvSerializer.java @@ -0,0 +1,79 @@ +package com.dorisdb.connector.datax.plugin.writer.doriswriter.row; + +import java.io.StringWriter; + +import com.alibaba.datax.common.element.Record; + +import com.google.common.base.Strings; + +public class DorisCsvSerializer implements DorisISerializer { + + private static final long serialVersionUID = 1L; + + private final String HEX_STRING = "0123456789ABCDEF"; + + private final String columnSeparator; + + public DorisCsvSerializer(String sp) { + this.columnSeparator = parseByteSeparator(sp); + } + + @Override + public String serialize(Record row) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < row.getColumnNumber(); i++) { + Object value = row.getColumn(i).getRawData(); + sb.append(null == value ? "\\N" : value); + if (i < row.getColumnNumber() - 1) { + sb.append(columnSeparator); + } + } + return sb.toString(); + } + + private String parseByteSeparator(String sp) { + if (Strings.isNullOrEmpty(sp)) { + // `\t` by default + return "\t"; + } + if (!sp.toUpperCase().startsWith("\\X")) { + return sp; + } + String hexStr = sp.substring(2); + // check hex str + if (hexStr.isEmpty()) { + throw new RuntimeException("Failed to parse column_separator: `Hex str is empty`"); + } + if (hexStr.length() % 2 != 0) { + throw new RuntimeException("Failed to parse column_separator: `Hex str length error`"); + } + for (char hexChar : hexStr.toUpperCase().toCharArray()) { + if (HEX_STRING.indexOf(hexChar) == -1) { + throw new RuntimeException("Failed to parse column_separator: `Hex str format error`"); + } + } + // transform to separator + StringWriter writer = new StringWriter(); + for (byte b : hexStrToBytes(hexStr)) { + writer.append((char) b); + } + return writer.toString(); + } + + private byte[] hexStrToBytes(String hexStr) { + String upperHexStr = hexStr.toUpperCase(); + int length = upperHexStr.length() / 2; + char[] hexChars = upperHexStr.toCharArray(); + byte[] bytes = new byte[length]; + for (int i = 0; i < length; i++) { + int pos = i * 2; + bytes[i] = (byte) (charToByte(hexChars[pos]) << 4 | charToByte(hexChars[pos + 1])); + } + return bytes; + } + + private byte charToByte(char c) { + return (byte) HEX_STRING.indexOf(c); + } + +} diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisISerializer.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisISerializer.java new file mode 100644 index 0000000000..92a732885f --- /dev/null +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisISerializer.java @@ -0,0 +1,11 @@ +package com.dorisdb.connector.datax.plugin.writer.doriswriter.row; + +import java.io.Serializable; + +import com.alibaba.datax.common.element.Record; + +public interface DorisISerializer extends Serializable { + + String serialize(Record row); + +} diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisJsonSerializer.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisJsonSerializer.java new file mode 100644 index 0000000000..5da3e9aeae --- /dev/null +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisJsonSerializer.java @@ -0,0 +1,33 @@ +package com.dorisdb.connector.datax.plugin.writer.doriswriter.row; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import com.alibaba.datax.common.element.Record; +import com.alibaba.fastjson.JSON; + +public class DorisJsonSerializer implements DorisISerializer { + + private static final long serialVersionUID = 1L; + + private final List fieldNames; + + public DorisJsonSerializer(List fieldNames) { + this.fieldNames = fieldNames; + } + + @Override + public String serialize(Record row) { + if (null == fieldNames) { + return ""; + } + Map rowMap = new HashMap<>(fieldNames.size()); + int idx = 0; + for (String fieldName : fieldNames) { + rowMap.put(fieldName, row.getColumn(idx++).getRawData()); + } + return JSON.toJSONString(rowMap); + } + +} diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisSerializerFactory.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisSerializerFactory.java new file mode 100644 index 0000000000..0816399e01 --- /dev/null +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisSerializerFactory.java @@ -0,0 +1,22 @@ +package com.dorisdb.connector.datax.plugin.writer.doriswriter.row; + +import java.util.Map; + +import com.dorisdb.connector.datax.plugin.writer.doriswriter.DorisWriterOptions; + +public class DorisSerializerFactory { + + private DorisSerializerFactory() {} + + public static DorisISerializer createSerializer(DorisWriterOptions writerOptions) { + if (DorisWriterOptions.StreamLoadFormat.CSV.equals(writerOptions.getStreamLoadFormat())) { + Map props = writerOptions.getLoadProps(); + return new DorisCsvSerializer(null == props || !props.containsKey("column_separator") ? null : String.valueOf(props.get("column_separator"))); + } + if (DorisWriterOptions.StreamLoadFormat.JSON.equals(writerOptions.getStreamLoadFormat())) { + return new DorisJsonSerializer(writerOptions.getColumns()); + } + throw new RuntimeException("Failed to create row serializer, unsupported `format` from stream load properties."); + } + +} From 70a480f5624e1daee425b8763ebedccc476ea093 Mon Sep 17 00:00:00 2001 From: fariel Date: Thu, 18 Mar 2021 16:18:04 +0800 Subject: [PATCH 05/50] modify doc && fix length calculating --- doriswriter/doc/doriswriter.md | 11 ++++++++++- .../doriswriter/manager/DorisWriterManager.java | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/doriswriter/doc/doriswriter.md b/doriswriter/doc/doriswriter.md index 715afa8cbe..13600b5b10 100644 --- a/doriswriter/doc/doriswriter.md +++ b/doriswriter/doc/doriswriter.md @@ -67,7 +67,8 @@ DorisWriter 插件实现了写入数据到 Doris 主库的目的表的功能。 "preSql": [], "postSql": [], "jdbcUrl": "jdbc:mysql://172.28.17.100:9030/", - "loadUrl": ["172.28.17.100:8030", "172.28.17.100:8030"] + "loadUrl": ["172.28.17.100:8030", "172.28.17.100:8030"], + "loadProps": {} } } } @@ -156,6 +157,14 @@ DorisWriter 插件实现了写入数据到 Doris 主库的目的表的功能。 * 默认值:无
+* **loadProps** + + * 描述:StreamLoad 的请求参数,详情参照StreamLoad介绍页面。
+ + * 必选:否
+ + * 默认值:无
+ ### 3.3 类型转换 diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisWriterManager.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisWriterManager.java index a5338ca9da..da24448f13 100644 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisWriterManager.java +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisWriterManager.java @@ -33,7 +33,7 @@ public final synchronized void writeRecord(String record) throws IOException { try { buffer.add(record); batchCount++; - batchSize += record.length(); + batchSize += record.getBytes().length; if (batchCount >= writerOptions.getBatchRows() || batchSize >= writerOptions.getBatchSize()) { flush(createBatchLabel()); } From 5cd07ab08432647f4081cf18de9f6a2c91c68d32 Mon Sep 17 00:00:00 2001 From: fariel Date: Mon, 22 Mar 2021 20:03:38 +0800 Subject: [PATCH 06/50] fix date datetime timestamp convertion --- doriswriter/pom.xml | 6 +++ .../doriswriter/row/DorisBaseSerializer.java | 52 +++++++++++++++++++ .../doriswriter/row/DorisCsvSerializer.java | 4 +- .../doriswriter/row/DorisJsonSerializer.java | 5 +- 4 files changed, 63 insertions(+), 4 deletions(-) create mode 100644 doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisBaseSerializer.java diff --git a/doriswriter/pom.xml b/doriswriter/pom.xml index 2bbf1d222f..1524e3586a 100755 --- a/doriswriter/pom.xml +++ b/doriswriter/pom.xml @@ -41,6 +41,11 @@ commons-codec 1.9 + + org.apache.commons + commons-lang3 + 3.12.0 + commons-logging commons-logging @@ -108,6 +113,7 @@ + org.apache.commons:commons-lang3 commons-codec:commons-codec commons-logging:* org.apache.httpcomponents:httpclient diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisBaseSerializer.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisBaseSerializer.java new file mode 100644 index 0000000000..a46fc4421f --- /dev/null +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisBaseSerializer.java @@ -0,0 +1,52 @@ +package com.dorisdb.connector.datax.plugin.writer.doriswriter.row; + +import java.util.TimeZone; + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.DateColumn; +import com.alibaba.datax.common.element.Column.Type; +import com.alibaba.datax.common.util.Configuration; + +import org.apache.commons.lang3.time.DateFormatUtils; + +public class DorisBaseSerializer { + + static String datetimeFormat = "yyyy-MM-dd HH:mm:ss"; + + static String dateFormat = "yyyy-MM-dd"; + + static String timeFormat = "HH:mm:ss"; + + static String timeZone = "GMT+8"; + + static TimeZone timeZoner = TimeZone.getTimeZone(DorisBaseSerializer.timeZone); + + static void init(final Configuration configuration) { + DorisBaseSerializer.datetimeFormat = configuration.getString("common.column.datetimeFormat", datetimeFormat); + DorisBaseSerializer.timeFormat = configuration.getString("common.column.timeFormat", timeFormat); + DorisBaseSerializer.dateFormat = configuration.getString("common.column.dateFormat", dateFormat); + DorisBaseSerializer.timeZone = configuration.getString("common.column.timeZone", DorisBaseSerializer.timeZone); + DorisBaseSerializer.timeZoner = TimeZone.getTimeZone(DorisBaseSerializer.timeZone); + } + + protected String fieldConvertion(Column col) { + if (null == col.getRawData()) { + return null; + } + if (Type.DATE != col.getType()) { + return col.asString(); + } + DateColumn.DateType type = ((DateColumn)col).getSubType(); + if (type == DateColumn.DateType.DATE) { + return DateFormatUtils.format(col.asDate(), DorisBaseSerializer.dateFormat, DorisBaseSerializer.timeZoner); + } + if (type == DateColumn.DateType.TIME) { + return DateFormatUtils.format(col.asDate(), DorisBaseSerializer.timeFormat, DorisBaseSerializer.timeZoner); + } + if (type == DateColumn.DateType.DATETIME) { + return DateFormatUtils.format(col.asDate(), DorisBaseSerializer.datetimeFormat, DorisBaseSerializer.timeZoner); + } + return null; + } + +} diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisCsvSerializer.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisCsvSerializer.java index 7a22b2121d..866c8d87ce 100644 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisCsvSerializer.java +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisCsvSerializer.java @@ -6,7 +6,7 @@ import com.google.common.base.Strings; -public class DorisCsvSerializer implements DorisISerializer { +public class DorisCsvSerializer extends DorisBaseSerializer implements DorisISerializer { private static final long serialVersionUID = 1L; @@ -22,7 +22,7 @@ public DorisCsvSerializer(String sp) { public String serialize(Record row) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < row.getColumnNumber(); i++) { - Object value = row.getColumn(i).getRawData(); + String value = fieldConvertion(row.getColumn(i)); sb.append(null == value ? "\\N" : value); if (i < row.getColumnNumber() - 1) { sb.append(columnSeparator); diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisJsonSerializer.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisJsonSerializer.java index 5da3e9aeae..3f74ac587b 100644 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisJsonSerializer.java +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisJsonSerializer.java @@ -7,7 +7,7 @@ import com.alibaba.datax.common.element.Record; import com.alibaba.fastjson.JSON; -public class DorisJsonSerializer implements DorisISerializer { +public class DorisJsonSerializer extends DorisBaseSerializer implements DorisISerializer { private static final long serialVersionUID = 1L; @@ -25,7 +25,8 @@ public String serialize(Record row) { Map rowMap = new HashMap<>(fieldNames.size()); int idx = 0; for (String fieldName : fieldNames) { - rowMap.put(fieldName, row.getColumn(idx++).getRawData()); + rowMap.put(fieldName, fieldConvertion(row.getColumn(idx))); + idx++; } return JSON.toJSONString(rowMap); } From dee644c9603a43d01982c8f7fb401e99852150f8 Mon Sep 17 00:00:00 2001 From: fariel Date: Wed, 24 Mar 2021 15:17:39 +0800 Subject: [PATCH 07/50] modify doc --- doriswriter/doc/doriswriter.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/doriswriter/doc/doriswriter.md b/doriswriter/doc/doriswriter.md index 13600b5b10..c7aef09370 100644 --- a/doriswriter/doc/doriswriter.md +++ b/doriswriter/doc/doriswriter.md @@ -168,7 +168,15 @@ DorisWriter 插件实现了写入数据到 Doris 主库的目的表的功能。 ### 3.3 类型转换 -传入的数据均会被转为字符串,并以`\t`作为列分隔符,`\n`作为行分隔符,组成`csv`文件进行StreamLoad导入操作。 +默认传入的数据均会被转为字符串,并以`\t`作为列分隔符,`\n`作为行分隔符,组成`csv`文件进行StreamLoad导入操作。 + +如需更改导入格式为`json`, 则正确配置 `loadProps` 即可: +```json +"loadProps": { + "format": "json", + "strip_outer_array": true +} +``` ## 4 性能报告 From ee9243cfe28cf36ac00e611cec824f4239474a9e Mon Sep 17 00:00:00 2001 From: fariel Date: Thu, 25 Mar 2021 15:49:41 +0800 Subject: [PATCH 08/50] aync flush version --- .../writer/doriswriter/DorisWriter.java | 2 +- .../doriswriter/DorisWriterOptions.java | 6 + .../doriswriter/manager/DorisFlushTuple.java | 20 ++++ .../manager/DorisStreamLoadVisitor.java | 5 +- .../manager/DorisWriterManager.java | 108 ++++++++++++------ 5 files changed, 101 insertions(+), 40 deletions(-) create mode 100644 doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisFlushTuple.java diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriter.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriter.java index d88581ec8f..8b3c414d92 100755 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriter.java +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriter.java @@ -124,7 +124,7 @@ public void startWrite(RecordReceiver recordReceiver) { @Override public void post() { try { - writerManager.flush(writerManager.createBatchLabel()); + writerManager.close(); } catch (Exception e) { throw DataXException.asDataXException(DBUtilErrorCode.WRITE_DATA_ERROR, e); } diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriterOptions.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriterOptions.java index b4dbb0a369..4229e2563f 100644 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriterOptions.java +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriterOptions.java @@ -32,6 +32,7 @@ public enum StreamLoadFormat { private static final String KEY_POST_SQL = "postSql"; private static final String KEY_JDBC_URL = "jdbcUrl"; private static final String KEY_LOAD_URL = "loadUrl"; + private static final String KEY_FLUSH_QUEUE_LENGTH = "flushQueueLength"; private static final String KEY_LOAD_PROPS = "loadProps"; private final Configuration options; @@ -96,6 +97,11 @@ public int getBatchRows() { public long getBatchSize() { return BATCH_BYTES; } + + public int getFlushQueueLength() { + Integer len = options.getInt(KEY_FLUSH_QUEUE_LENGTH); + return null == len ? 1 : len; + } public StreamLoadFormat getStreamLoadFormat() { Map loadProps = getLoadProps(); diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisFlushTuple.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisFlushTuple.java new file mode 100644 index 0000000000..24bcc9c6c0 --- /dev/null +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisFlushTuple.java @@ -0,0 +1,20 @@ +package com.dorisdb.connector.datax.plugin.writer.doriswriter.manager; + +import java.util.List; + +public class DorisFlushTuple { + + private String label; + private Long bytes; + private List rows; + + public DorisFlushTuple(String label, Long bytes, List rows) { + this.label = label; + this.bytes = bytes; + this.rows = rows; + } + + public String getLabel() { return label; } + public Long getBytes() { return bytes; } + public List getRows() { return rows; } +} \ No newline at end of file diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java index 68621d8f8b..7ebfced603 100644 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java @@ -37,7 +37,7 @@ public DorisStreamLoadVisitor(DorisWriterOptions writerOptions) { this.writerOptions = writerOptions; } - public void doStreamLoad(String label, List labeledRows) throws IOException { + public void doStreamLoad(DorisFlushTuple flushData) throws IOException { String host = getAvailableHost(); if (null == host) { throw new IOException("None of the host in `load_url` could be connected."); @@ -49,7 +49,8 @@ public void doStreamLoad(String label, List labeledRows) throws IOExcept .append(writerOptions.getTable()) .append("/_stream_load") .toString(); - Map loadResult = doHttpPut(loadUrl, label, joinRows(labeledRows)); + LOG.debug(String.format("Start to join batch data: rows[%d] bytes[%d] label[%s].", flushData.getRows().size(), flushData.getBytes(), flushData.getLabel())); + Map loadResult = doHttpPut(loadUrl, flushData.getLabel(), joinRows(flushData.getRows())); final String keyStatus = "Status"; if (null == loadResult || !loadResult.containsKey(keyStatus)) { throw new IOException("Unable to flush data to doris: unknown result status."); diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisWriterManager.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisWriterManager.java index da24448f13..d7e9ad33fe 100644 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisWriterManager.java +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisWriterManager.java @@ -7,8 +7,10 @@ import java.util.ArrayList; import java.util.List; import java.util.UUID; +import java.util.concurrent.LinkedBlockingDeque; import com.dorisdb.connector.datax.plugin.writer.doriswriter.DorisWriterOptions; +import com.google.common.base.Strings; public class DorisWriterManager { @@ -22,10 +24,13 @@ public class DorisWriterManager { private long batchSize = 0; private volatile boolean closed = false; private volatile Exception flushException; + private final LinkedBlockingDeque flushQueue; public DorisWriterManager(DorisWriterOptions writerOptions) { this.writerOptions = writerOptions; this.dorisStreamLoadVisitor = new DorisStreamLoadVisitor(writerOptions); + flushQueue = new LinkedBlockingDeque<>(writerOptions.getFlushQueueLength()); + this.startAsyncFlushing(); } public final synchronized void writeRecord(String record) throws IOException { @@ -35,50 +40,42 @@ public final synchronized void writeRecord(String record) throws IOException { batchCount++; batchSize += record.getBytes().length; if (batchCount >= writerOptions.getBatchRows() || batchSize >= writerOptions.getBatchSize()) { - flush(createBatchLabel()); + String label = createBatchLabel(); + LOG.debug(String.format("Doris buffer Sinking triggered: rows[%d] label[%s].", batchCount, label)); + flush(label, false); } } catch (Exception e) { throw new IOException("Writing records to Doris failed.", e); } } - public synchronized void flush(String label) throws IOException { + public synchronized void flush(String label, boolean waitUtilDone) throws Exception { checkFlushException(); if (batchCount == 0) { + if (waitUtilDone) { + waitAsyncFlushingDone(); + } return; } - for (int i = 0; i <= writerOptions.getMaxRetries(); i++) { - try { - tryToFlush(label); - buffer.clear(); - batchCount = 0; - batchSize = 0; - break; - } catch (IOException e) { - LOG.warn("Failed to flush batch data to doris, retry times = {}", i, e); - if (i >= writerOptions.getMaxRetries()) { - throw new IOException(e); - } - try { - Thread.sleep(1000l * (i + 1)); - } catch (InterruptedException ex) { - Thread.currentThread().interrupt(); - throw new IOException("Unable to flush, interrupted while doing another attempt", e); - } - } + flushQueue.put(new DorisFlushTuple(label, batchSize, new ArrayList<>(buffer))); + if (waitUtilDone) { + // wait the last flush + waitAsyncFlushingDone(); } + buffer.clear(); + batchCount = 0; + batchSize = 0; } public synchronized void close() { if (!closed) { - closed = true; - - if (batchCount > 0) { - try { - flush(createBatchLabel()); - } catch (Exception e) { - throw new RuntimeException("Writing records to Doris failed.", e); - } + closed = true; + try { + String label = createBatchLabel(); + if (batchCount > 0) LOG.debug(String.format("Doris Sink is about to close: label[%s].", label)); + flush(label, true); + } catch (Exception e) { + throw new RuntimeException("Writing records to Doris failed.", e); } } checkFlushException(); @@ -88,18 +85,55 @@ public String createBatchLabel() { return UUID.randomUUID().toString(); } - public List getBufferedBatchList() { - return buffer; + private void startAsyncFlushing() { + // start flush thread + Thread flushThread = new Thread(new Runnable(){ + public void run() { + while(true) { + try { + asyncFlush(); + } catch (Exception e) { + flushException = e; + } + } + } + }); + flushThread.setDaemon(true); + flushThread.start(); } - public void setBufferedBatchList(List buffer) { - this.buffer.clear(); - this.buffer.addAll(buffer); + private void waitAsyncFlushingDone() throws InterruptedException { + // wait previous flushings + for (int i = 0; i <= writerOptions.getFlushQueueLength(); i++) { + flushQueue.put(new DorisFlushTuple("", 0l, null)); + } } - private void tryToFlush(String label) throws IOException { - // flush to Doris with stream load - dorisStreamLoadVisitor.doStreamLoad(label, buffer); + private void asyncFlush() throws Exception { + DorisFlushTuple flushData = flushQueue.take(); + if (Strings.isNullOrEmpty(flushData.getLabel())) { + return; + } + LOG.debug(String.format("Async stream load: rows[%d] bytes[%d] label[%s].", flushData.getRows().size(), flushData.getBytes(), flushData.getLabel())); + for (int i = 0; i <= writerOptions.getMaxRetries(); i++) { + try { + // flush to Doris with stream load + dorisStreamLoadVisitor.doStreamLoad(flushData); + LOG.info(String.format("Async stream load finished: label[%s].", flushData.getLabel())); + break; + } catch (Exception e) { + LOG.warn("Failed to flush batch data to doris, retry times = {}", i, e); + if (i >= writerOptions.getMaxRetries()) { + throw new IOException(e); + } + try { + Thread.sleep(1000l * (i + 1)); + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + throw new IOException("Unable to flush, interrupted while doing another attempt", e); + } + } + } } private void checkFlushException() { From 048e40cfea80798f926293eb5c97bcd016a5eb90 Mon Sep 17 00:00:00 2001 From: fariel Date: Wed, 7 Apr 2021 15:23:47 +0800 Subject: [PATCH 09/50] add readme --- doriswriter/doc/doriswriter.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/doriswriter/doc/doriswriter.md b/doriswriter/doc/doriswriter.md index c7aef09370..80b2229b11 100644 --- a/doriswriter/doc/doriswriter.md +++ b/doriswriter/doc/doriswriter.md @@ -169,8 +169,14 @@ DorisWriter 插件实现了写入数据到 Doris 主库的目的表的功能。 ### 3.3 类型转换 默认传入的数据均会被转为字符串,并以`\t`作为列分隔符,`\n`作为行分隔符,组成`csv`文件进行StreamLoad导入操作。 +如需更改列分隔符, 则正确配置 `loadProps` 即可: +```json +"loadProps": { + "column_separator": "\\x01" +} +``` -如需更改导入格式为`json`, 则正确配置 `loadProps` 即可: +如需更改导入格式为`json`, 则正确配置 `loadProps` 即可: ```json "loadProps": { "format": "json", From cac8434bbbb9adb6d62dbfd94a836efc5ea389b2 Mon Sep 17 00:00:00 2001 From: fariel Date: Wed, 7 Apr 2021 15:54:21 +0800 Subject: [PATCH 10/50] make batch size smaller to satisfy be limitations --- .../datax/plugin/writer/doriswriter/DorisWriterOptions.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriterOptions.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriterOptions.java index 4229e2563f..bad4ed73d5 100644 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriterOptions.java +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriterOptions.java @@ -16,7 +16,7 @@ public class DorisWriterOptions implements Serializable { private static final long MEGA_BYTES_SCALE = KILO_BYTES_SCALE * KILO_BYTES_SCALE; private static final int MAX_RETRIES = 1; private static final int BATCH_ROWS = 500000; - private static final long BATCH_BYTES = 100 * MEGA_BYTES_SCALE; + private static final long BATCH_BYTES = 90 * MEGA_BYTES_SCALE; private static final String KEY_LOAD_PROPS_FORMAT = "format"; public enum StreamLoadFormat { From 342c044aaa4567a9709ca97ee23a9a75c6ad3962 Mon Sep 17 00:00:00 2001 From: fariel Date: Fri, 30 Apr 2021 10:55:02 +0800 Subject: [PATCH 11/50] add `maxBatchRows` `maxBatchSize` --- .../plugin/writer/doriswriter/DorisWriterOptions.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriterOptions.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriterOptions.java index bad4ed73d5..9add926d94 100644 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriterOptions.java +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriterOptions.java @@ -31,6 +31,8 @@ public enum StreamLoadFormat { private static final String KEY_PRE_SQL = "preSql"; private static final String KEY_POST_SQL = "postSql"; private static final String KEY_JDBC_URL = "jdbcUrl"; + private static final String KEY_MAX_BATCH_ROWS = "maxBatchRows"; + private static final String KEY_MAX_BATCH_SIZE = "maxBatchSize"; private static final String KEY_LOAD_URL = "loadUrl"; private static final String KEY_FLUSH_QUEUE_LENGTH = "flushQueueLength"; private static final String KEY_LOAD_PROPS = "loadProps"; @@ -91,11 +93,13 @@ public int getMaxRetries() { } public int getBatchRows() { - return BATCH_ROWS; + Integer rows = options.getInt(KEY_MAX_BATCH_ROWS); + return null == rows ? BATCH_ROWS : rows; } public long getBatchSize() { - return BATCH_BYTES; + Long size = options.getLong(KEY_MAX_BATCH_SIZE); + return null == size ? BATCH_BYTES : size; } public int getFlushQueueLength() { From a8f70b1f2c69619e911338307e555d011100f869 Mon Sep 17 00:00:00 2001 From: fariel Date: Fri, 30 Apr 2021 18:31:25 +0800 Subject: [PATCH 12/50] add row_delimiter parameter --- doriswriter/doc/doriswriter.md | 19 ++++++- .../manager/DorisStreamLoadVisitor.java | 9 ++- .../doriswriter/row/DorisCsvSerializer.java | 49 +---------------- .../doriswriter/row/DorisDelimiterParser.java | 55 +++++++++++++++++++ 4 files changed, 82 insertions(+), 50 deletions(-) create mode 100644 doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisDelimiterParser.java diff --git a/doriswriter/doc/doriswriter.md b/doriswriter/doc/doriswriter.md index 80b2229b11..fbe6e4bbee 100644 --- a/doriswriter/doc/doriswriter.md +++ b/doriswriter/doc/doriswriter.md @@ -157,6 +157,22 @@ DorisWriter 插件实现了写入数据到 Doris 主库的目的表的功能。 * 默认值:无
+* **maxBatchRows** + + * 描述:单次StreamLoad导入的最大行数
+ + * 必选:否
+ + * 默认值:500000 (50W)
+ +* **maxBatchSize** + + * 描述:单次StreamLoad导入的最大字节数。
+ + * 必选:否
+ + * 默认值:104857600 (100M) + * **loadProps** * 描述:StreamLoad 的请求参数,详情参照StreamLoad介绍页面。
@@ -172,7 +188,8 @@ DorisWriter 插件实现了写入数据到 Doris 主库的目的表的功能。 如需更改列分隔符, 则正确配置 `loadProps` 即可: ```json "loadProps": { - "column_separator": "\\x01" + "column_separator": "\\x01", + "row_delimiter": "\\x02" } ``` diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java index 7ebfced603..1598d8f4cd 100644 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java @@ -3,10 +3,12 @@ import java.io.IOException; import java.net.HttpURLConnection; import java.net.URL; +import java.net.URLEncoder; import java.nio.charset.StandardCharsets; import com.alibaba.fastjson.JSON; import com.dorisdb.connector.datax.plugin.writer.doriswriter.DorisWriterOptions; +import com.dorisdb.connector.datax.plugin.writer.doriswriter.row.DorisDelimiterParser; import org.apache.commons.codec.binary.Base64; import org.apache.http.HttpEntity; @@ -93,7 +95,12 @@ private boolean tryHttpConnection(String host) { private byte[] joinRows(List rows) { if (DorisWriterOptions.StreamLoadFormat.CSV.equals(writerOptions.getStreamLoadFormat())) { - return String.join("\n", rows).getBytes(StandardCharsets.UTF_8); + Map props = writerOptions.getLoadProps(); + String lineDelimiter = "\n"; + if (null != props && props.containsKey("row_delimiter")) { + lineDelimiter = DorisDelimiterParser.parse(String.valueOf(props.get("row_delimiter")), "\n"); + } + return (String.join(lineDelimiter, rows) + lineDelimiter).getBytes(StandardCharsets.UTF_8); } if (DorisWriterOptions.StreamLoadFormat.JSON.equals(writerOptions.getStreamLoadFormat())) { return new StringBuilder("[").append(String.join(",", rows)).append("]").toString().getBytes(StandardCharsets.UTF_8); diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisCsvSerializer.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisCsvSerializer.java index 866c8d87ce..862e0b7356 100644 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisCsvSerializer.java +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisCsvSerializer.java @@ -10,12 +10,10 @@ public class DorisCsvSerializer extends DorisBaseSerializer implements DorisISer private static final long serialVersionUID = 1L; - private final String HEX_STRING = "0123456789ABCDEF"; - private final String columnSeparator; public DorisCsvSerializer(String sp) { - this.columnSeparator = parseByteSeparator(sp); + this.columnSeparator = DorisDelimiterParser.parse(sp, "\t"); } @Override @@ -31,49 +29,4 @@ public String serialize(Record row) { return sb.toString(); } - private String parseByteSeparator(String sp) { - if (Strings.isNullOrEmpty(sp)) { - // `\t` by default - return "\t"; - } - if (!sp.toUpperCase().startsWith("\\X")) { - return sp; - } - String hexStr = sp.substring(2); - // check hex str - if (hexStr.isEmpty()) { - throw new RuntimeException("Failed to parse column_separator: `Hex str is empty`"); - } - if (hexStr.length() % 2 != 0) { - throw new RuntimeException("Failed to parse column_separator: `Hex str length error`"); - } - for (char hexChar : hexStr.toUpperCase().toCharArray()) { - if (HEX_STRING.indexOf(hexChar) == -1) { - throw new RuntimeException("Failed to parse column_separator: `Hex str format error`"); - } - } - // transform to separator - StringWriter writer = new StringWriter(); - for (byte b : hexStrToBytes(hexStr)) { - writer.append((char) b); - } - return writer.toString(); - } - - private byte[] hexStrToBytes(String hexStr) { - String upperHexStr = hexStr.toUpperCase(); - int length = upperHexStr.length() / 2; - char[] hexChars = upperHexStr.toCharArray(); - byte[] bytes = new byte[length]; - for (int i = 0; i < length; i++) { - int pos = i * 2; - bytes[i] = (byte) (charToByte(hexChars[pos]) << 4 | charToByte(hexChars[pos + 1])); - } - return bytes; - } - - private byte charToByte(char c) { - return (byte) HEX_STRING.indexOf(c); - } - } diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisDelimiterParser.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisDelimiterParser.java new file mode 100644 index 0000000000..3fd58fa5d9 --- /dev/null +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisDelimiterParser.java @@ -0,0 +1,55 @@ +package com.dorisdb.connector.datax.plugin.writer.doriswriter.row; + +import java.io.StringWriter; + +import com.google.common.base.Strings; + +public class DorisDelimiterParser { + + private static final String HEX_STRING = "0123456789ABCDEF"; + + public static String parse(String sp, String dSp) throws RuntimeException { + if (Strings.isNullOrEmpty(sp)) { + return dSp; + } + if (!sp.toUpperCase().startsWith("\\X")) { + return sp; + } + String hexStr = sp.substring(2); + // check hex str + if (hexStr.isEmpty()) { + throw new RuntimeException("Failed to parse delimiter: `Hex str is empty`"); + } + if (hexStr.length() % 2 != 0) { + throw new RuntimeException("Failed to parse delimiter: `Hex str length error`"); + } + for (char hexChar : hexStr.toUpperCase().toCharArray()) { + if (HEX_STRING.indexOf(hexChar) == -1) { + throw new RuntimeException("Failed to parse delimiter: `Hex str format error`"); + } + } + // transform to separator + StringWriter writer = new StringWriter(); + for (byte b : hexStrToBytes(hexStr)) { + writer.append((char) b); + } + return writer.toString(); + } + + private static byte[] hexStrToBytes(String hexStr) { + String upperHexStr = hexStr.toUpperCase(); + int length = upperHexStr.length() / 2; + char[] hexChars = upperHexStr.toCharArray(); + byte[] bytes = new byte[length]; + for (int i = 0; i < length; i++) { + int pos = i * 2; + bytes[i] = (byte) (charToByte(hexChars[pos]) << 4 | charToByte(hexChars[pos + 1])); + } + return bytes; + } + + private static byte charToByte(char c) { + return (byte) HEX_STRING.indexOf(c); + } + +} From b936f0562a1a9aaf3621143b5551c1d44da37bd0 Mon Sep 17 00:00:00 2001 From: fariel Date: Wed, 19 May 2021 17:26:20 +0800 Subject: [PATCH 13/50] convert bit && bool -> int --- .../plugin/writer/doriswriter/row/DorisBaseSerializer.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisBaseSerializer.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisBaseSerializer.java index a46fc4421f..8e77d71950 100644 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisBaseSerializer.java +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisBaseSerializer.java @@ -33,6 +33,9 @@ protected String fieldConvertion(Column col) { if (null == col.getRawData()) { return null; } + if (Type.BOOL == col.getType()) { + return String.valueOf(col.asLong()); + } if (Type.DATE != col.getType()) { return col.asString(); } From 0d0dd3b75c6f3bf612c0b94c83a4dfb35c999c24 Mon Sep 17 00:00:00 2001 From: fariel Date: Fri, 21 May 2021 13:23:11 +0800 Subject: [PATCH 14/50] optimize joinrows function --- .../manager/DorisStreamLoadVisitor.java | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java index 1598d8f4cd..8568c7b66f 100644 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java @@ -100,10 +100,25 @@ private byte[] joinRows(List rows) { if (null != props && props.containsKey("row_delimiter")) { lineDelimiter = DorisDelimiterParser.parse(String.valueOf(props.get("row_delimiter")), "\n"); } - return (String.join(lineDelimiter, rows) + lineDelimiter).getBytes(StandardCharsets.UTF_8); + StringBuilder sb = new StringBuilder(); + for (String row : rows) { + sb.append(row).append(lineDelimiter); + } + return sb.toString().getBytes(StandardCharsets.UTF_8); } if (DorisWriterOptions.StreamLoadFormat.JSON.equals(writerOptions.getStreamLoadFormat())) { - return new StringBuilder("[").append(String.join(",", rows)).append("]").toString().getBytes(StandardCharsets.UTF_8); + StringBuilder sb = new StringBuilder(); + sb.append("["); + boolean isFirstElement = true; + for (String row : rows) { + if (!isFirstElement) { + sb.append(","); + } + sb.append(row); + isFirstElement = false; + } + sb.append("]"); + return sb.toString().getBytes(StandardCharsets.UTF_8); } throw new RuntimeException("Failed to join rows data, unsupported `format` from stream load properties:"); } From f4d25be8fee14ad87428522edd5d88503d6cff88 Mon Sep 17 00:00:00 2001 From: fariel Date: Thu, 1 Jul 2021 11:17:17 +0800 Subject: [PATCH 15/50] use internal date convertion --- .../doriswriter/row/DorisBaseSerializer.java | 41 +------------------ 1 file changed, 2 insertions(+), 39 deletions(-) diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisBaseSerializer.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisBaseSerializer.java index 8e77d71950..9876b0f0c1 100644 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisBaseSerializer.java +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisBaseSerializer.java @@ -1,34 +1,10 @@ package com.dorisdb.connector.datax.plugin.writer.doriswriter.row; -import java.util.TimeZone; - import com.alibaba.datax.common.element.Column; -import com.alibaba.datax.common.element.DateColumn; import com.alibaba.datax.common.element.Column.Type; -import com.alibaba.datax.common.util.Configuration; - -import org.apache.commons.lang3.time.DateFormatUtils; public class DorisBaseSerializer { - static String datetimeFormat = "yyyy-MM-dd HH:mm:ss"; - - static String dateFormat = "yyyy-MM-dd"; - - static String timeFormat = "HH:mm:ss"; - - static String timeZone = "GMT+8"; - - static TimeZone timeZoner = TimeZone.getTimeZone(DorisBaseSerializer.timeZone); - - static void init(final Configuration configuration) { - DorisBaseSerializer.datetimeFormat = configuration.getString("common.column.datetimeFormat", datetimeFormat); - DorisBaseSerializer.timeFormat = configuration.getString("common.column.timeFormat", timeFormat); - DorisBaseSerializer.dateFormat = configuration.getString("common.column.dateFormat", dateFormat); - DorisBaseSerializer.timeZone = configuration.getString("common.column.timeZone", DorisBaseSerializer.timeZone); - DorisBaseSerializer.timeZoner = TimeZone.getTimeZone(DorisBaseSerializer.timeZone); - } - protected String fieldConvertion(Column col) { if (null == col.getRawData()) { return null; @@ -36,20 +12,7 @@ protected String fieldConvertion(Column col) { if (Type.BOOL == col.getType()) { return String.valueOf(col.asLong()); } - if (Type.DATE != col.getType()) { - return col.asString(); - } - DateColumn.DateType type = ((DateColumn)col).getSubType(); - if (type == DateColumn.DateType.DATE) { - return DateFormatUtils.format(col.asDate(), DorisBaseSerializer.dateFormat, DorisBaseSerializer.timeZoner); - } - if (type == DateColumn.DateType.TIME) { - return DateFormatUtils.format(col.asDate(), DorisBaseSerializer.timeFormat, DorisBaseSerializer.timeZoner); - } - if (type == DateColumn.DateType.DATETIME) { - return DateFormatUtils.format(col.asDate(), DorisBaseSerializer.datetimeFormat, DorisBaseSerializer.timeZoner); - } - return null; + return col.asString(); } - + } From fd42a23b686425e2d1c456aeafec0798965974da Mon Sep 17 00:00:00 2001 From: fariel Date: Thu, 22 Jul 2021 18:52:47 +0800 Subject: [PATCH 16/50] optimize heap memory usage --- .../manager/DorisStreamLoadVisitor.java | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java index 8568c7b66f..f792bfafa3 100644 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java @@ -3,7 +3,7 @@ import java.io.IOException; import java.net.HttpURLConnection; import java.net.URL; -import java.net.URLEncoder; +import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import com.alibaba.fastjson.JSON; @@ -52,7 +52,7 @@ public void doStreamLoad(DorisFlushTuple flushData) throws IOException { .append("/_stream_load") .toString(); LOG.debug(String.format("Start to join batch data: rows[%d] bytes[%d] label[%s].", flushData.getRows().size(), flushData.getBytes(), flushData.getLabel())); - Map loadResult = doHttpPut(loadUrl, flushData.getLabel(), joinRows(flushData.getRows())); + Map loadResult = doHttpPut(loadUrl, flushData.getLabel(), joinRows(flushData.getRows(), flushData.getBytes().intValue())); final String keyStatus = "Status"; if (null == loadResult || !loadResult.containsKey(keyStatus)) { throw new IOException("Unable to flush data to doris: unknown result status."); @@ -93,32 +93,32 @@ private boolean tryHttpConnection(String host) { } } - private byte[] joinRows(List rows) { + private byte[] joinRows(List rows, int totalBytes) { if (DorisWriterOptions.StreamLoadFormat.CSV.equals(writerOptions.getStreamLoadFormat())) { Map props = writerOptions.getLoadProps(); - String lineDelimiter = "\n"; - if (null != props && props.containsKey("row_delimiter")) { - lineDelimiter = DorisDelimiterParser.parse(String.valueOf(props.get("row_delimiter")), "\n"); - } - StringBuilder sb = new StringBuilder(); + ByteBuffer bos = ByteBuffer.allocate(totalBytes + rows.size()); + byte[] lineDelimiter = DorisDelimiterParser.parse(String.valueOf(props.get("row_delimiter")), "\n").getBytes(StandardCharsets.UTF_8); for (String row : rows) { - sb.append(row).append(lineDelimiter); + bos.put(row.getBytes(StandardCharsets.UTF_8)); + bos.put(lineDelimiter); } - return sb.toString().getBytes(StandardCharsets.UTF_8); + return bos.array(); } + if (DorisWriterOptions.StreamLoadFormat.JSON.equals(writerOptions.getStreamLoadFormat())) { - StringBuilder sb = new StringBuilder(); - sb.append("["); + ByteBuffer bos = ByteBuffer.allocate(totalBytes + rows.size() + 1); + bos.put("[".getBytes(StandardCharsets.UTF_8)); + byte[] jsonDelimiter = ",".getBytes(StandardCharsets.UTF_8); boolean isFirstElement = true; for (String row : rows) { if (!isFirstElement) { - sb.append(","); + bos.put(jsonDelimiter); } - sb.append(row); + bos.put(row.getBytes(StandardCharsets.UTF_8)); isFirstElement = false; } - sb.append("]"); - return sb.toString().getBytes(StandardCharsets.UTF_8); + bos.put("]".getBytes(StandardCharsets.UTF_8)); + return bos.array(); } throw new RuntimeException("Failed to join rows data, unsupported `format` from stream load properties:"); } From f9391a78e713d30e13d26735d9614b8e50143cc0 Mon Sep 17 00:00:00 2001 From: fariel Date: Fri, 23 Jul 2021 14:38:43 +0800 Subject: [PATCH 17/50] optimize buffer allocated logic --- .../writer/doriswriter/manager/DorisStreamLoadVisitor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java index f792bfafa3..0bea3bdc5c 100644 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java @@ -106,7 +106,7 @@ private byte[] joinRows(List rows, int totalBytes) { } if (DorisWriterOptions.StreamLoadFormat.JSON.equals(writerOptions.getStreamLoadFormat())) { - ByteBuffer bos = ByteBuffer.allocate(totalBytes + rows.size() + 1); + ByteBuffer bos = ByteBuffer.allocate(totalBytes + (rows.isEmpty() ? 2 : rows.size() + 1)); bos.put("[".getBytes(StandardCharsets.UTF_8)); byte[] jsonDelimiter = ",".getBytes(StandardCharsets.UTF_8); boolean isFirstElement = true; From 293286ea142da8904988ef64c888e1f2cccea719 Mon Sep 17 00:00:00 2001 From: fariel Date: Fri, 13 Aug 2021 10:29:03 +0800 Subject: [PATCH 18/50] remove ` from columns --- .../datax/plugin/writer/doriswriter/DorisWriterOptions.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriterOptions.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriterOptions.java index 9add926d94..1b3f525f72 100644 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriterOptions.java +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriterOptions.java @@ -8,6 +8,7 @@ import java.util.List; import java.util.Map; +import java.util.stream.Collectors; public class DorisWriterOptions implements Serializable { @@ -73,7 +74,7 @@ public List getLoadUrlList() { } public List getColumns() { - return options.getList(KEY_COLUMN, String.class); + return options.getList(KEY_COLUMN, String.class).stream().map(str -> str.replace("`", "")).collect(Collectors.toList()); } public List getPreSqlList() { From 70975a0ed54c0f7ae3d067dc2fd3f414f2b105c4 Mon Sep 17 00:00:00 2001 From: fariel Date: Fri, 13 Aug 2021 10:29:22 +0800 Subject: [PATCH 19/50] fix bufferoverflow --- .../writer/doriswriter/manager/DorisStreamLoadVisitor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java index 0bea3bdc5c..7556f72e20 100644 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java +++ b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java @@ -96,8 +96,8 @@ private boolean tryHttpConnection(String host) { private byte[] joinRows(List rows, int totalBytes) { if (DorisWriterOptions.StreamLoadFormat.CSV.equals(writerOptions.getStreamLoadFormat())) { Map props = writerOptions.getLoadProps(); - ByteBuffer bos = ByteBuffer.allocate(totalBytes + rows.size()); byte[] lineDelimiter = DorisDelimiterParser.parse(String.valueOf(props.get("row_delimiter")), "\n").getBytes(StandardCharsets.UTF_8); + ByteBuffer bos = ByteBuffer.allocate(totalBytes + rows.size() * lineDelimiter.length); for (String row : rows) { bos.put(row.getBytes(StandardCharsets.UTF_8)); bos.put(lineDelimiter); From 881ab49359e258fe675e8bf223a09e90605d08dd Mon Sep 17 00:00:00 2001 From: fariel Date: Tue, 31 Aug 2021 20:34:30 +0800 Subject: [PATCH 20/50] add starrocks writer --- .../doriswriter/row/DorisISerializer.java | 11 ----- .../row/DorisSerializerFactory.java | 22 ---------- doriswriter/src/main/resources/plugin.json | 6 --- package.xml | 2 +- pom.xml | 2 +- .../doc/starrockswriter.md | 20 +++++----- {doriswriter => starrockswriter}/pom.xml | 10 ++--- .../src/main/assembly/package.xml | 8 ++-- .../writer/doriswriter/StarRocksWriter.java | 40 +++++++++---------- .../doriswriter/StarRocksWriterOptions.java | 6 +-- .../manager/StarRocksFlushTuple.java | 6 +-- .../manager/StarRocksStreamLoadVisitor.java | 26 ++++++------ .../manager/StarRocksWriterManager.java | 40 +++++++++---------- .../row/StarRocksBaseSerializer.java | 4 +- .../row/StarRocksCsvSerializer.java | 8 ++-- .../row/StarRocksDelimiterParser.java | 4 +- .../doriswriter/row/StarRocksISerializer.java | 11 +++++ .../row/StarRocksJsonSerializer.java | 6 +-- .../row/StarRocksSerializerFactory.java | 22 ++++++++++ .../doriswriter/util/StarRocksWriterUtil.java | 18 ++++----- .../src/main/resources/plugin.json | 6 +++ .../main/resources/plugin_job_template.json | 2 +- 22 files changed, 140 insertions(+), 140 deletions(-) delete mode 100644 doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisISerializer.java delete mode 100644 doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisSerializerFactory.java delete mode 100755 doriswriter/src/main/resources/plugin.json rename doriswriter/doc/doriswriter.md => starrockswriter/doc/starrockswriter.md (84%) rename {doriswriter => starrockswriter}/pom.xml (93%) rename {doriswriter => starrockswriter}/src/main/assembly/package.xml (76%) rename doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriter.java => starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/StarRocksWriter.java (73%) rename doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriterOptions.java => starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/StarRocksWriterOptions.java (96%) rename doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisFlushTuple.java => starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/manager/StarRocksFlushTuple.java (64%) rename doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java => starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/manager/StarRocksStreamLoadVisitor.java (84%) rename doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisWriterManager.java => starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/manager/StarRocksWriterManager.java (70%) rename doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisBaseSerializer.java => starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksBaseSerializer.java (76%) rename doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisCsvSerializer.java => starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksCsvSerializer.java (69%) rename doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisDelimiterParser.java => starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksDelimiterParser.java (93%) create mode 100644 starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksISerializer.java rename doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisJsonSerializer.java => starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksJsonSerializer.java (75%) create mode 100644 starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksSerializerFactory.java rename doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/util/DorisWriterUtil.java => starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/util/StarRocksWriterUtil.java (79%) create mode 100755 starrockswriter/src/main/resources/plugin.json rename {doriswriter => starrockswriter}/src/main/resources/plugin_job_template.json (88%) diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisISerializer.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisISerializer.java deleted file mode 100644 index 92a732885f..0000000000 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisISerializer.java +++ /dev/null @@ -1,11 +0,0 @@ -package com.dorisdb.connector.datax.plugin.writer.doriswriter.row; - -import java.io.Serializable; - -import com.alibaba.datax.common.element.Record; - -public interface DorisISerializer extends Serializable { - - String serialize(Record row); - -} diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisSerializerFactory.java b/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisSerializerFactory.java deleted file mode 100644 index 0816399e01..0000000000 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisSerializerFactory.java +++ /dev/null @@ -1,22 +0,0 @@ -package com.dorisdb.connector.datax.plugin.writer.doriswriter.row; - -import java.util.Map; - -import com.dorisdb.connector.datax.plugin.writer.doriswriter.DorisWriterOptions; - -public class DorisSerializerFactory { - - private DorisSerializerFactory() {} - - public static DorisISerializer createSerializer(DorisWriterOptions writerOptions) { - if (DorisWriterOptions.StreamLoadFormat.CSV.equals(writerOptions.getStreamLoadFormat())) { - Map props = writerOptions.getLoadProps(); - return new DorisCsvSerializer(null == props || !props.containsKey("column_separator") ? null : String.valueOf(props.get("column_separator"))); - } - if (DorisWriterOptions.StreamLoadFormat.JSON.equals(writerOptions.getStreamLoadFormat())) { - return new DorisJsonSerializer(writerOptions.getColumns()); - } - throw new RuntimeException("Failed to create row serializer, unsupported `format` from stream load properties."); - } - -} diff --git a/doriswriter/src/main/resources/plugin.json b/doriswriter/src/main/resources/plugin.json deleted file mode 100755 index 081ddace9d..0000000000 --- a/doriswriter/src/main/resources/plugin.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "name": "doriswriter", - "class": "com.dorisdb.connector.datax.plugin.writer.doriswriter.DorisWriter", - "description": "useScene: prod. mechanism: DorisStreamLoad. warn: The more you know about the database, the less problems you encounter.", - "developer": "dorisdb" -} \ No newline at end of file diff --git a/package.xml b/package.xml index 347ee81a5c..4a2e1fc888 100755 --- a/package.xml +++ b/package.xml @@ -190,7 +190,7 @@ datax
- doriswriter/target/datax/ + starrockswriter/target/datax/ **/*.* diff --git a/pom.xml b/pom.xml index 96ad433310..5f194b06ed 100644 --- a/pom.xml +++ b/pom.xml @@ -71,7 +71,7 @@ mysqlwriter - doriswriter + starrockswriter drdswriter odpswriter txtfilewriter diff --git a/doriswriter/doc/doriswriter.md b/starrockswriter/doc/starrockswriter.md similarity index 84% rename from doriswriter/doc/doriswriter.md rename to starrockswriter/doc/starrockswriter.md index fbe6e4bbee..f471b75212 100644 --- a/doriswriter/doc/doriswriter.md +++ b/starrockswriter/doc/starrockswriter.md @@ -1,4 +1,4 @@ -# DataX DorisWriter +# DataX StarRocksWriter --- @@ -6,19 +6,19 @@ ## 1 快速介绍 -DorisWriter 插件实现了写入数据到 Doris 主库的目的表的功能。在底层实现上, DorisWriter 通过Streamload以csv格式导入数据至Doris。 +StarRocksWriter 插件实现了写入数据到 StarRocks 主库的目的表的功能。在底层实现上, StarRocksWriter 通过Streamload以csv格式导入数据至StarRocks。 ## 2 实现原理 - DorisWriter 通过Streamload以csv格式导入数据至Doris, 内部将`reader`读取的数据进行缓存后批量导入至Doris,以提高写入性能。 + StarRocksWriter 通过Streamload以csv格式导入数据至StarRocks, 内部将`reader`读取的数据进行缓存后批量导入至StarRocks,以提高写入性能。 ## 3 功能说明 ### 3.1 配置样例 -* 这里使用一份从内存Mysql读取数据后导入至Doris。 +* 这里使用一份从内存Mysql读取数据后导入至StarRocks。 ```json { @@ -57,7 +57,7 @@ DorisWriter 插件实现了写入数据到 Doris 主库的目的表的功能。 } }, "writer": { - "name": "doriswriter", + "name": "starrockswriter", "parameter": { "username": "xxxx", "password": "xxxx", @@ -83,7 +83,7 @@ DorisWriter 插件实现了写入数据到 Doris 主库的目的表的功能。 * **username** - * 描述:Doris数据库的用户名
+ * 描述:StarRocks数据库的用户名
* 必选:是
@@ -91,7 +91,7 @@ DorisWriter 插件实现了写入数据到 Doris 主库的目的表的功能。 * **password** - * 描述:Doris数据库的密码
+ * 描述:StarRocks数据库的密码
* 必选:是
@@ -99,7 +99,7 @@ DorisWriter 插件实现了写入数据到 Doris 主库的目的表的功能。 * **database** - * 描述:Doris表的数据库名称。 + * 描述:StarRocks表的数据库名称。 * 必选:是
@@ -107,7 +107,7 @@ DorisWriter 插件实现了写入数据到 Doris 主库的目的表的功能。 * **table** - * 描述:Doris表的表名称。 + * 描述:StarRocks表的表名称。 * 必选:是
@@ -115,7 +115,7 @@ DorisWriter 插件实现了写入数据到 Doris 主库的目的表的功能。 * **loadUrl** - * 描述:Doris FE的地址用于Streamload,可以为多个fe地址,`fe_ip:fe_http_port`。 + * 描述:StarRocks FE的地址用于Streamload,可以为多个fe地址,`fe_ip:fe_http_port`。 * 必选:是
diff --git a/doriswriter/pom.xml b/starrockswriter/pom.xml similarity index 93% rename from doriswriter/pom.xml rename to starrockswriter/pom.xml index 1524e3586a..f36fc4b578 100755 --- a/doriswriter/pom.xml +++ b/starrockswriter/pom.xml @@ -6,8 +6,8 @@ datax-all 0.0.1-SNAPSHOT - doriswriter - doriswriter + starrockswriter + starrockswriter jar @@ -100,15 +100,15 @@ com.alibaba.fastjson - com.dorisdb.shade.com.alibaba.fastjson + com.starrocks.shade.com.alibaba.fastjson org.apache.http - com.dorisdb.shade.org.apache.http + com.starrocks.shade.org.apache.http org.apache.commons - com.dorisdb.shade.org.apache.commons + com.starrocks.shade.org.apache.commons diff --git a/doriswriter/src/main/assembly/package.xml b/starrockswriter/src/main/assembly/package.xml similarity index 76% rename from doriswriter/src/main/assembly/package.xml rename to starrockswriter/src/main/assembly/package.xml index 2e0880e1e4..79ca7be84a 100755 --- a/doriswriter/src/main/assembly/package.xml +++ b/starrockswriter/src/main/assembly/package.xml @@ -14,21 +14,21 @@ plugin.json plugin_job_template.json - plugin/writer/doriswriter + plugin/writer/starrockswriter
target/ - doriswriter-0.0.1-SNAPSHOT.jar + starrockswriter-0.0.1-SNAPSHOT.jar - plugin/writer/doriswriter + plugin/writer/starrockswriter false - plugin/writer/doriswriter/libs + plugin/writer/starrockswriter/libs runtime diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriter.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/StarRocksWriter.java similarity index 73% rename from doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriter.java rename to starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/StarRocksWriter.java index 8b3c414d92..666a99d962 100755 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriter.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/StarRocksWriter.java @@ -1,4 +1,4 @@ -package com.dorisdb.connector.datax.plugin.writer.doriswriter; +package com.starrocks.connector.datax.plugin.writer.starrockswriter; import com.alibaba.datax.common.element.Record; import com.alibaba.datax.common.exception.DataXException; @@ -8,10 +8,10 @@ import com.alibaba.datax.plugin.rdbms.util.DBUtil; import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; import com.alibaba.datax.plugin.rdbms.util.DataBaseType; -import com.dorisdb.connector.datax.plugin.writer.doriswriter.manager.DorisWriterManager; -import com.dorisdb.connector.datax.plugin.writer.doriswriter.row.DorisISerializer; -import com.dorisdb.connector.datax.plugin.writer.doriswriter.row.DorisSerializerFactory; -import com.dorisdb.connector.datax.plugin.writer.doriswriter.util.DorisWriterUtil; +import com.starrocks.connector.datax.plugin.writer.starrockswriter.manager.StarRocksWriterManager; +import com.starrocks.connector.datax.plugin.writer.starrockswriter.row.StarRocksISerializer; +import com.starrocks.connector.datax.plugin.writer.starrockswriter.row.StarRocksSerializerFactory; +import com.starrocks.connector.datax.plugin.writer.starrockswriter.util.StarRocksWriterUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -20,26 +20,26 @@ import java.util.ArrayList; import java.util.List; -public class DorisWriter extends Writer { +public class StarRocksWriter extends Writer { public static class Job extends Writer.Job { private static final Logger LOG = LoggerFactory.getLogger(Job.class); private Configuration originalConfig = null; - private DorisWriterOptions options; + private StarRocksWriterOptions options; @Override public void init() { this.originalConfig = super.getPluginJobConf(); - options = new DorisWriterOptions(super.getPluginJobConf()); + options = new StarRocksWriterOptions(super.getPluginJobConf()); options.doPretreatment(); } @Override public void preCheck(){ this.init(); - DorisWriterUtil.preCheckPrePareSQL(options); - DorisWriterUtil.preCheckPostSQL(options); + StarRocksWriterUtil.preCheckPrePareSQL(options); + StarRocksWriterUtil.preCheckPostSQL(options); } @Override @@ -47,11 +47,11 @@ public void prepare() { String username = options.getUsername(); String password = options.getPassword(); String jdbcUrl = options.getJdbcUrl(); - List renderedPreSqls = DorisWriterUtil.renderPreOrPostSqls(options.getPreSqlList(), options.getTable()); + List renderedPreSqls = StarRocksWriterUtil.renderPreOrPostSqls(options.getPreSqlList(), options.getTable()); if (null != renderedPreSqls && !renderedPreSqls.isEmpty()) { Connection conn = DBUtil.getConnection(DataBaseType.MySql, jdbcUrl, username, password); LOG.info("Begin to execute preSqls:[{}]. context info:{}.", String.join(";", renderedPreSqls), jdbcUrl); - DorisWriterUtil.executeSqls(conn, renderedPreSqls); + StarRocksWriterUtil.executeSqls(conn, renderedPreSqls); DBUtil.closeDBResources(null, null, conn); } } @@ -70,11 +70,11 @@ public void post() { String username = options.getUsername(); String password = options.getPassword(); String jdbcUrl = options.getJdbcUrl(); - List renderedPostSqls = DorisWriterUtil.renderPreOrPostSqls(options.getPostSqlList(), options.getTable()); + List renderedPostSqls = StarRocksWriterUtil.renderPreOrPostSqls(options.getPostSqlList(), options.getTable()); if (null != renderedPostSqls && !renderedPostSqls.isEmpty()) { Connection conn = DBUtil.getConnection(DataBaseType.MySql, jdbcUrl, username, password); LOG.info("Begin to execute preSqls:[{}]. context info:{}.", String.join(";", renderedPostSqls), jdbcUrl); - DorisWriterUtil.executeSqls(conn, renderedPostSqls); + StarRocksWriterUtil.executeSqls(conn, renderedPostSqls); DBUtil.closeDBResources(null, null, conn); } } @@ -86,15 +86,15 @@ public void destroy() { } public static class Task extends Writer.Task { - private DorisWriterManager writerManager; - private DorisWriterOptions options; - private DorisISerializer rowSerializer; + private StarRocksWriterManager writerManager; + private StarRocksWriterOptions options; + private StarRocksISerializer rowSerializer; @Override public void init() { - options = new DorisWriterOptions(super.getPluginJobConf()); - writerManager = new DorisWriterManager(options); - rowSerializer = DorisSerializerFactory.createSerializer(options); + options = new StarRocksWriterOptions(super.getPluginJobConf()); + writerManager = new StarRocksWriterManager(options); + rowSerializer = StarRocksSerializerFactory.createSerializer(options); } @Override diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriterOptions.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/StarRocksWriterOptions.java similarity index 96% rename from doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriterOptions.java rename to starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/StarRocksWriterOptions.java index 1b3f525f72..5180512f59 100644 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/DorisWriterOptions.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/StarRocksWriterOptions.java @@ -1,4 +1,4 @@ -package com.dorisdb.connector.datax.plugin.writer.doriswriter; +package com.starrocks.connector.datax.plugin.writer.starrockswriter; import java.io.Serializable; @@ -10,7 +10,7 @@ import java.util.Map; import java.util.stream.Collectors; -public class DorisWriterOptions implements Serializable { +public class StarRocksWriterOptions implements Serializable { private static final long serialVersionUID = 1l; private static final long KILO_BYTES_SCALE = 1024l; @@ -40,7 +40,7 @@ public enum StreamLoadFormat { private final Configuration options; - public DorisWriterOptions(Configuration options) { + public StarRocksWriterOptions(Configuration options) { this.options = options; } diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisFlushTuple.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/manager/StarRocksFlushTuple.java similarity index 64% rename from doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisFlushTuple.java rename to starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/manager/StarRocksFlushTuple.java index 24bcc9c6c0..cd8c663b18 100644 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisFlushTuple.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/manager/StarRocksFlushTuple.java @@ -1,14 +1,14 @@ -package com.dorisdb.connector.datax.plugin.writer.doriswriter.manager; +package com.starrocks.connector.datax.plugin.writer.starrockswriter.manager; import java.util.List; -public class DorisFlushTuple { +public class StarRocksFlushTuple { private String label; private Long bytes; private List rows; - public DorisFlushTuple(String label, Long bytes, List rows) { + public StarRocksFlushTuple(String label, Long bytes, List rows) { this.label = label; this.bytes = bytes; this.rows = rows; diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/manager/StarRocksStreamLoadVisitor.java similarity index 84% rename from doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java rename to starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/manager/StarRocksStreamLoadVisitor.java index 7556f72e20..84fc5200d0 100644 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisStreamLoadVisitor.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/manager/StarRocksStreamLoadVisitor.java @@ -1,4 +1,4 @@ -package com.dorisdb.connector.datax.plugin.writer.doriswriter.manager; +package com.starrocks.connector.datax.plugin.writer.starrockswriter.manager; import java.io.IOException; import java.net.HttpURLConnection; @@ -7,8 +7,8 @@ import java.nio.charset.StandardCharsets; import com.alibaba.fastjson.JSON; -import com.dorisdb.connector.datax.plugin.writer.doriswriter.DorisWriterOptions; -import com.dorisdb.connector.datax.plugin.writer.doriswriter.row.DorisDelimiterParser; +import com.starrocks.connector.datax.plugin.writer.starrockswriter.StarRocksWriterOptions; +import com.starrocks.connector.datax.plugin.writer.starrockswriter.row.StarRocksDelimiterParser; import org.apache.commons.codec.binary.Base64; import org.apache.http.HttpEntity; @@ -28,18 +28,18 @@ import java.util.Map; -public class DorisStreamLoadVisitor { +public class StarRocksStreamLoadVisitor { - private static final Logger LOG = LoggerFactory.getLogger(DorisStreamLoadVisitor.class); + private static final Logger LOG = LoggerFactory.getLogger(StarRocksStreamLoadVisitor.class); - private final DorisWriterOptions writerOptions; + private final StarRocksWriterOptions writerOptions; private int pos; - public DorisStreamLoadVisitor(DorisWriterOptions writerOptions) { + public StarRocksStreamLoadVisitor(StarRocksWriterOptions writerOptions) { this.writerOptions = writerOptions; } - public void doStreamLoad(DorisFlushTuple flushData) throws IOException { + public void doStreamLoad(StarRocksFlushTuple flushData) throws IOException { String host = getAvailableHost(); if (null == host) { throw new IOException("None of the host in `load_url` could be connected."); @@ -55,12 +55,12 @@ public void doStreamLoad(DorisFlushTuple flushData) throws IOException { Map loadResult = doHttpPut(loadUrl, flushData.getLabel(), joinRows(flushData.getRows(), flushData.getBytes().intValue())); final String keyStatus = "Status"; if (null == loadResult || !loadResult.containsKey(keyStatus)) { - throw new IOException("Unable to flush data to doris: unknown result status."); + throw new IOException("Unable to flush data to StarRocks: unknown result status."); } LOG.debug(new StringBuilder("StreamLoad response:\n").append(JSON.toJSONString(loadResult)).toString()); if (loadResult.get(keyStatus).equals("Fail")) { throw new IOException( - new StringBuilder("Failed to flush data to doris.\n").append(JSON.toJSONString(loadResult)).toString() + new StringBuilder("Failed to flush data to StarRocks.\n").append(JSON.toJSONString(loadResult)).toString() ); } } @@ -94,9 +94,9 @@ private boolean tryHttpConnection(String host) { } private byte[] joinRows(List rows, int totalBytes) { - if (DorisWriterOptions.StreamLoadFormat.CSV.equals(writerOptions.getStreamLoadFormat())) { + if (StarRocksWriterOptions.StreamLoadFormat.CSV.equals(writerOptions.getStreamLoadFormat())) { Map props = writerOptions.getLoadProps(); - byte[] lineDelimiter = DorisDelimiterParser.parse(String.valueOf(props.get("row_delimiter")), "\n").getBytes(StandardCharsets.UTF_8); + byte[] lineDelimiter = StarRocksDelimiterParser.parse(String.valueOf(props.get("row_delimiter")), "\n").getBytes(StandardCharsets.UTF_8); ByteBuffer bos = ByteBuffer.allocate(totalBytes + rows.size() * lineDelimiter.length); for (String row : rows) { bos.put(row.getBytes(StandardCharsets.UTF_8)); @@ -105,7 +105,7 @@ private byte[] joinRows(List rows, int totalBytes) { return bos.array(); } - if (DorisWriterOptions.StreamLoadFormat.JSON.equals(writerOptions.getStreamLoadFormat())) { + if (StarRocksWriterOptions.StreamLoadFormat.JSON.equals(writerOptions.getStreamLoadFormat())) { ByteBuffer bos = ByteBuffer.allocate(totalBytes + (rows.isEmpty() ? 2 : rows.size() + 1)); bos.put("[".getBytes(StandardCharsets.UTF_8)); byte[] jsonDelimiter = ",".getBytes(StandardCharsets.UTF_8); diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisWriterManager.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/manager/StarRocksWriterManager.java similarity index 70% rename from doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisWriterManager.java rename to starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/manager/StarRocksWriterManager.java index d7e9ad33fe..4e53adebcb 100644 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/manager/DorisWriterManager.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/manager/StarRocksWriterManager.java @@ -1,4 +1,4 @@ -package com.dorisdb.connector.datax.plugin.writer.doriswriter.manager; +package com.starrocks.connector.datax.plugin.writer.starrockswriter.manager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -9,26 +9,26 @@ import java.util.UUID; import java.util.concurrent.LinkedBlockingDeque; -import com.dorisdb.connector.datax.plugin.writer.doriswriter.DorisWriterOptions; +import com.starrocks.connector.datax.plugin.writer.starrockswriter.StarRocksWriterOptions; import com.google.common.base.Strings; -public class DorisWriterManager { +public class StarRocksWriterManager { - private static final Logger LOG = LoggerFactory.getLogger(DorisWriterManager.class); + private static final Logger LOG = LoggerFactory.getLogger(StarRocksWriterManager.class); - private final DorisStreamLoadVisitor dorisStreamLoadVisitor; - private final DorisWriterOptions writerOptions; + private final StarRocksStreamLoadVisitor starrocksStreamLoadVisitor; + private final StarRocksWriterOptions writerOptions; private final List buffer = new ArrayList<>(); private int batchCount = 0; private long batchSize = 0; private volatile boolean closed = false; private volatile Exception flushException; - private final LinkedBlockingDeque flushQueue; + private final LinkedBlockingDeque flushQueue; - public DorisWriterManager(DorisWriterOptions writerOptions) { + public StarRocksWriterManager(StarRocksWriterOptions writerOptions) { this.writerOptions = writerOptions; - this.dorisStreamLoadVisitor = new DorisStreamLoadVisitor(writerOptions); + this.starrocksStreamLoadVisitor = new StarRocksStreamLoadVisitor(writerOptions); flushQueue = new LinkedBlockingDeque<>(writerOptions.getFlushQueueLength()); this.startAsyncFlushing(); } @@ -41,11 +41,11 @@ public final synchronized void writeRecord(String record) throws IOException { batchSize += record.getBytes().length; if (batchCount >= writerOptions.getBatchRows() || batchSize >= writerOptions.getBatchSize()) { String label = createBatchLabel(); - LOG.debug(String.format("Doris buffer Sinking triggered: rows[%d] label[%s].", batchCount, label)); + LOG.debug(String.format("StarRocks buffer Sinking triggered: rows[%d] label[%s].", batchCount, label)); flush(label, false); } } catch (Exception e) { - throw new IOException("Writing records to Doris failed.", e); + throw new IOException("Writing records to StarRocks failed.", e); } } @@ -57,7 +57,7 @@ public synchronized void flush(String label, boolean waitUtilDone) throws Except } return; } - flushQueue.put(new DorisFlushTuple(label, batchSize, new ArrayList<>(buffer))); + flushQueue.put(new StarRocksFlushTuple(label, batchSize, new ArrayList<>(buffer))); if (waitUtilDone) { // wait the last flush waitAsyncFlushingDone(); @@ -72,10 +72,10 @@ public synchronized void close() { closed = true; try { String label = createBatchLabel(); - if (batchCount > 0) LOG.debug(String.format("Doris Sink is about to close: label[%s].", label)); + if (batchCount > 0) LOG.debug(String.format("StarRocks Sink is about to close: label[%s].", label)); flush(label, true); } catch (Exception e) { - throw new RuntimeException("Writing records to Doris failed.", e); + throw new RuntimeException("Writing records to StarRocks failed.", e); } } checkFlushException(); @@ -105,24 +105,24 @@ public void run() { private void waitAsyncFlushingDone() throws InterruptedException { // wait previous flushings for (int i = 0; i <= writerOptions.getFlushQueueLength(); i++) { - flushQueue.put(new DorisFlushTuple("", 0l, null)); + flushQueue.put(new StarRocksFlushTuple("", 0l, null)); } } private void asyncFlush() throws Exception { - DorisFlushTuple flushData = flushQueue.take(); + StarRocksFlushTuple flushData = flushQueue.take(); if (Strings.isNullOrEmpty(flushData.getLabel())) { return; } LOG.debug(String.format("Async stream load: rows[%d] bytes[%d] label[%s].", flushData.getRows().size(), flushData.getBytes(), flushData.getLabel())); for (int i = 0; i <= writerOptions.getMaxRetries(); i++) { try { - // flush to Doris with stream load - dorisStreamLoadVisitor.doStreamLoad(flushData); + // flush to StarRocks with stream load + starrocksStreamLoadVisitor.doStreamLoad(flushData); LOG.info(String.format("Async stream load finished: label[%s].", flushData.getLabel())); break; } catch (Exception e) { - LOG.warn("Failed to flush batch data to doris, retry times = {}", i, e); + LOG.warn("Failed to flush batch data to StarRocks, retry times = {}", i, e); if (i >= writerOptions.getMaxRetries()) { throw new IOException(e); } @@ -138,7 +138,7 @@ private void asyncFlush() throws Exception { private void checkFlushException() { if (flushException != null) { - throw new RuntimeException("Writing records to Doris failed.", flushException); + throw new RuntimeException("Writing records to StarRocks failed.", flushException); } } } diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisBaseSerializer.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksBaseSerializer.java similarity index 76% rename from doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisBaseSerializer.java rename to starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksBaseSerializer.java index 9876b0f0c1..77d25f1235 100644 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisBaseSerializer.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksBaseSerializer.java @@ -1,9 +1,9 @@ -package com.dorisdb.connector.datax.plugin.writer.doriswriter.row; +package com.starrocks.connector.datax.plugin.writer.starrockswriter.row; import com.alibaba.datax.common.element.Column; import com.alibaba.datax.common.element.Column.Type; -public class DorisBaseSerializer { +public class StarRocksBaseSerializer { protected String fieldConvertion(Column col) { if (null == col.getRawData()) { diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisCsvSerializer.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksCsvSerializer.java similarity index 69% rename from doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisCsvSerializer.java rename to starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksCsvSerializer.java index 862e0b7356..1366d57097 100644 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisCsvSerializer.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksCsvSerializer.java @@ -1,4 +1,4 @@ -package com.dorisdb.connector.datax.plugin.writer.doriswriter.row; +package com.starrocks.connector.datax.plugin.writer.starrockswriter.row; import java.io.StringWriter; @@ -6,14 +6,14 @@ import com.google.common.base.Strings; -public class DorisCsvSerializer extends DorisBaseSerializer implements DorisISerializer { +public class StarRocksCsvSerializer extends StarRocksBaseSerializer implements StarRocksISerializer { private static final long serialVersionUID = 1L; private final String columnSeparator; - public DorisCsvSerializer(String sp) { - this.columnSeparator = DorisDelimiterParser.parse(sp, "\t"); + public StarRocksCsvSerializer(String sp) { + this.columnSeparator = StarRocksDelimiterParser.parse(sp, "\t"); } @Override diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisDelimiterParser.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksDelimiterParser.java similarity index 93% rename from doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisDelimiterParser.java rename to starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksDelimiterParser.java index 3fd58fa5d9..04301e0f13 100644 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisDelimiterParser.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksDelimiterParser.java @@ -1,10 +1,10 @@ -package com.dorisdb.connector.datax.plugin.writer.doriswriter.row; +package com.starrocks.connector.datax.plugin.writer.starrockswriter.row; import java.io.StringWriter; import com.google.common.base.Strings; -public class DorisDelimiterParser { +public class StarRocksDelimiterParser { private static final String HEX_STRING = "0123456789ABCDEF"; diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksISerializer.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksISerializer.java new file mode 100644 index 0000000000..7bcb89739c --- /dev/null +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksISerializer.java @@ -0,0 +1,11 @@ +package com.starrocks.connector.datax.plugin.writer.starrockswriter.row; + +import java.io.Serializable; + +import com.alibaba.datax.common.element.Record; + +public interface StarRocksISerializer extends Serializable { + + String serialize(Record row); + +} diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisJsonSerializer.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksJsonSerializer.java similarity index 75% rename from doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisJsonSerializer.java rename to starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksJsonSerializer.java index 3f74ac587b..60faa1be63 100644 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/row/DorisJsonSerializer.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksJsonSerializer.java @@ -1,4 +1,4 @@ -package com.dorisdb.connector.datax.plugin.writer.doriswriter.row; +package com.starrocks.connector.datax.plugin.writer.starrockswriter.row; import java.util.HashMap; import java.util.List; @@ -7,13 +7,13 @@ import com.alibaba.datax.common.element.Record; import com.alibaba.fastjson.JSON; -public class DorisJsonSerializer extends DorisBaseSerializer implements DorisISerializer { +public class StarRocksJsonSerializer extends StarRocksBaseSerializer implements StarRocksISerializer { private static final long serialVersionUID = 1L; private final List fieldNames; - public DorisJsonSerializer(List fieldNames) { + public StarRocksJsonSerializer(List fieldNames) { this.fieldNames = fieldNames; } diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksSerializerFactory.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksSerializerFactory.java new file mode 100644 index 0000000000..85f446cd93 --- /dev/null +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksSerializerFactory.java @@ -0,0 +1,22 @@ +package com.starrocks.connector.datax.plugin.writer.starrockswriter.row; + +import java.util.Map; + +import com.starrocks.connector.datax.plugin.writer.starrockswriter.StarRocksWriterOptions; + +public class StarRocksSerializerFactory { + + private StarRocksSerializerFactory() {} + + public static StarRocksISerializer createSerializer(StarRocksWriterOptions writerOptions) { + if (StarRocksWriterOptions.StreamLoadFormat.CSV.equals(writerOptions.getStreamLoadFormat())) { + Map props = writerOptions.getLoadProps(); + return new StarRocksCsvSerializer(null == props || !props.containsKey("column_separator") ? null : String.valueOf(props.get("column_separator"))); + } + if (StarRocksWriterOptions.StreamLoadFormat.JSON.equals(writerOptions.getStreamLoadFormat())) { + return new StarRocksJsonSerializer(writerOptions.getColumns()); + } + throw new RuntimeException("Failed to create row serializer, unsupported `format` from stream load properties."); + } + +} diff --git a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/util/DorisWriterUtil.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/util/StarRocksWriterUtil.java similarity index 79% rename from doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/util/DorisWriterUtil.java rename to starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/util/StarRocksWriterUtil.java index 348e519c55..c3b5d8d1d0 100755 --- a/doriswriter/src/main/java/com/dorisdb/connector/datax/plugin/writer/doriswriter/util/DorisWriterUtil.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/util/StarRocksWriterUtil.java @@ -1,11 +1,11 @@ -package com.dorisdb.connector.datax.plugin.writer.doriswriter.util; +package com.starrocks.connector.datax.plugin.writer.starrockswriter.util; import com.alibaba.datax.plugin.rdbms.util.DBUtil; import com.alibaba.datax.plugin.rdbms.util.DataBaseType; import com.alibaba.datax.plugin.rdbms.util.RdbmsException; import com.alibaba.datax.plugin.rdbms.writer.Constant; import com.alibaba.druid.sql.parser.ParserException; -import com.dorisdb.connector.datax.plugin.writer.doriswriter.DorisWriterOptions; +import com.starrocks.connector.datax.plugin.writer.starrockswriter.StarRocksWriterOptions; import com.google.common.base.Strings; import org.slf4j.Logger; @@ -15,10 +15,10 @@ import java.sql.Statement; import java.util.*; -public final class DorisWriterUtil { - private static final Logger LOG = LoggerFactory.getLogger(DorisWriterUtil.class); +public final class StarRocksWriterUtil { + private static final Logger LOG = LoggerFactory.getLogger(StarRocksWriterUtil.class); - private DorisWriterUtil() {} + private StarRocksWriterUtil() {} public static List renderPreOrPostSqls(List preOrPostSqls, String tableName) { if (null == preOrPostSqls) { @@ -49,10 +49,10 @@ public static void executeSqls(Connection conn, List sqls) { } } - public static void preCheckPrePareSQL(DorisWriterOptions options) { + public static void preCheckPrePareSQL(StarRocksWriterOptions options) { String table = options.getTable(); List preSqls = options.getPreSqlList(); - List renderedPreSqls = DorisWriterUtil.renderPreOrPostSqls(preSqls, table); + List renderedPreSqls = StarRocksWriterUtil.renderPreOrPostSqls(preSqls, table); if (null != renderedPreSqls && !renderedPreSqls.isEmpty()) { LOG.info("Begin to preCheck preSqls:[{}].", String.join(";", renderedPreSqls)); for (String sql : renderedPreSqls) { @@ -65,10 +65,10 @@ public static void preCheckPrePareSQL(DorisWriterOptions options) { } } - public static void preCheckPostSQL(DorisWriterOptions options) { + public static void preCheckPostSQL(StarRocksWriterOptions options) { String table = options.getTable(); List postSqls = options.getPostSqlList(); - List renderedPostSqls = DorisWriterUtil.renderPreOrPostSqls(postSqls, table); + List renderedPostSqls = StarRocksWriterUtil.renderPreOrPostSqls(postSqls, table); if (null != renderedPostSqls && !renderedPostSqls.isEmpty()) { LOG.info("Begin to preCheck postSqls:[{}].", String.join(";", renderedPostSqls)); for(String sql : renderedPostSqls) { diff --git a/starrockswriter/src/main/resources/plugin.json b/starrockswriter/src/main/resources/plugin.json new file mode 100755 index 0000000000..8edec1e01e --- /dev/null +++ b/starrockswriter/src/main/resources/plugin.json @@ -0,0 +1,6 @@ +{ + "name": "starrockswriter", + "class": "com.starrocks.connector.datax.plugin.writer.starrockswriter.StarRocksWriter", + "description": "useScene: prod. mechanism: StarRocksStreamLoad. warn: The more you know about the database, the less problems you encounter.", + "developer": "starrocks" +} \ No newline at end of file diff --git a/doriswriter/src/main/resources/plugin_job_template.json b/starrockswriter/src/main/resources/plugin_job_template.json similarity index 88% rename from doriswriter/src/main/resources/plugin_job_template.json rename to starrockswriter/src/main/resources/plugin_job_template.json index ee1744c8f2..ca5c99d088 100644 --- a/doriswriter/src/main/resources/plugin_job_template.json +++ b/starrockswriter/src/main/resources/plugin_job_template.json @@ -1,5 +1,5 @@ { - "name": "doriswriter", + "name": "starrockswriter", "parameter": { "username": "", "password": "", From 835e5deb745b4dbdd9b161f35e7889ca2e0f0675 Mon Sep 17 00:00:00 2001 From: fariel Date: Wed, 1 Sep 2021 16:51:01 +0800 Subject: [PATCH 21/50] fix renaming issue --- .../starrockswriter/StarRocksWriter.java | 141 ++++++++++++++ .../StarRocksWriterOptions.java | 146 +++++++++++++++ .../manager/StarRocksFlushTuple.java | 20 ++ .../manager/StarRocksStreamLoadVisitor.java | 175 ++++++++++++++++++ .../manager/StarRocksWriterManager.java | 144 ++++++++++++++ .../row/StarRocksBaseSerializer.java | 18 ++ .../row/StarRocksCsvSerializer.java | 32 ++++ .../row/StarRocksDelimiterParser.java | 55 ++++++ .../row/StarRocksISerializer.java | 11 ++ .../row/StarRocksJsonSerializer.java | 34 ++++ .../row/StarRocksSerializerFactory.java | 22 +++ .../util/StarRocksWriterUtil.java | 83 +++++++++ 12 files changed, 881 insertions(+) create mode 100755 starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriter.java create mode 100644 starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriterOptions.java create mode 100644 starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksFlushTuple.java create mode 100644 starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java create mode 100644 starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java create mode 100644 starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksBaseSerializer.java create mode 100644 starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksCsvSerializer.java create mode 100644 starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksDelimiterParser.java create mode 100644 starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksISerializer.java create mode 100644 starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksJsonSerializer.java create mode 100644 starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksSerializerFactory.java create mode 100755 starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/util/StarRocksWriterUtil.java diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriter.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriter.java new file mode 100755 index 0000000000..666a99d962 --- /dev/null +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriter.java @@ -0,0 +1,141 @@ +package com.starrocks.connector.datax.plugin.writer.starrockswriter; + +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.spi.Writer; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.util.DBUtil; +import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; +import com.starrocks.connector.datax.plugin.writer.starrockswriter.manager.StarRocksWriterManager; +import com.starrocks.connector.datax.plugin.writer.starrockswriter.row.StarRocksISerializer; +import com.starrocks.connector.datax.plugin.writer.starrockswriter.row.StarRocksSerializerFactory; +import com.starrocks.connector.datax.plugin.writer.starrockswriter.util.StarRocksWriterUtil; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.Connection; +import java.util.ArrayList; +import java.util.List; + +public class StarRocksWriter extends Writer { + + public static class Job extends Writer.Job { + + private static final Logger LOG = LoggerFactory.getLogger(Job.class); + private Configuration originalConfig = null; + private StarRocksWriterOptions options; + + @Override + public void init() { + this.originalConfig = super.getPluginJobConf(); + options = new StarRocksWriterOptions(super.getPluginJobConf()); + options.doPretreatment(); + } + + @Override + public void preCheck(){ + this.init(); + StarRocksWriterUtil.preCheckPrePareSQL(options); + StarRocksWriterUtil.preCheckPostSQL(options); + } + + @Override + public void prepare() { + String username = options.getUsername(); + String password = options.getPassword(); + String jdbcUrl = options.getJdbcUrl(); + List renderedPreSqls = StarRocksWriterUtil.renderPreOrPostSqls(options.getPreSqlList(), options.getTable()); + if (null != renderedPreSqls && !renderedPreSqls.isEmpty()) { + Connection conn = DBUtil.getConnection(DataBaseType.MySql, jdbcUrl, username, password); + LOG.info("Begin to execute preSqls:[{}]. context info:{}.", String.join(";", renderedPreSqls), jdbcUrl); + StarRocksWriterUtil.executeSqls(conn, renderedPreSqls); + DBUtil.closeDBResources(null, null, conn); + } + } + + @Override + public List split(int mandatoryNumber) { + List configurations = new ArrayList<>(mandatoryNumber); + for (int i = 0; i < mandatoryNumber; i++) { + configurations.add(originalConfig); + } + return configurations; + } + + @Override + public void post() { + String username = options.getUsername(); + String password = options.getPassword(); + String jdbcUrl = options.getJdbcUrl(); + List renderedPostSqls = StarRocksWriterUtil.renderPreOrPostSqls(options.getPostSqlList(), options.getTable()); + if (null != renderedPostSqls && !renderedPostSqls.isEmpty()) { + Connection conn = DBUtil.getConnection(DataBaseType.MySql, jdbcUrl, username, password); + LOG.info("Begin to execute preSqls:[{}]. context info:{}.", String.join(";", renderedPostSqls), jdbcUrl); + StarRocksWriterUtil.executeSqls(conn, renderedPostSqls); + DBUtil.closeDBResources(null, null, conn); + } + } + + @Override + public void destroy() { + } + + } + + public static class Task extends Writer.Task { + private StarRocksWriterManager writerManager; + private StarRocksWriterOptions options; + private StarRocksISerializer rowSerializer; + + @Override + public void init() { + options = new StarRocksWriterOptions(super.getPluginJobConf()); + writerManager = new StarRocksWriterManager(options); + rowSerializer = StarRocksSerializerFactory.createSerializer(options); + } + + @Override + public void prepare() { + } + + public void startWrite(RecordReceiver recordReceiver) { + try { + Record record; + while ((record = recordReceiver.getFromReader()) != null) { + if (record.getColumnNumber() != options.getColumns().size()) { + throw DataXException + .asDataXException( + DBUtilErrorCode.CONF_ERROR, + String.format( + "列配置信息有错误. 因为您配置的任务中,源头读取字段数:%s 与 目的表要写入的字段数:%s 不相等. 请检查您的配置并作出修改.", + record.getColumnNumber(), + options.getColumns().size())); + } + writerManager.writeRecord(rowSerializer.serialize(record)); + } + } catch (Exception e) { + throw DataXException.asDataXException(DBUtilErrorCode.WRITE_DATA_ERROR, e); + } + } + + @Override + public void post() { + try { + writerManager.close(); + } catch (Exception e) { + throw DataXException.asDataXException(DBUtilErrorCode.WRITE_DATA_ERROR, e); + } + } + + @Override + public void destroy() {} + + @Override + public boolean supportFailOver(){ + return false; + } + } +} diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriterOptions.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriterOptions.java new file mode 100644 index 0000000000..5180512f59 --- /dev/null +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriterOptions.java @@ -0,0 +1,146 @@ +package com.starrocks.connector.datax.plugin.writer.starrockswriter; + +import java.io.Serializable; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; + +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +public class StarRocksWriterOptions implements Serializable { + + private static final long serialVersionUID = 1l; + private static final long KILO_BYTES_SCALE = 1024l; + private static final long MEGA_BYTES_SCALE = KILO_BYTES_SCALE * KILO_BYTES_SCALE; + private static final int MAX_RETRIES = 1; + private static final int BATCH_ROWS = 500000; + private static final long BATCH_BYTES = 90 * MEGA_BYTES_SCALE; + + private static final String KEY_LOAD_PROPS_FORMAT = "format"; + public enum StreamLoadFormat { + CSV, JSON; + } + + private static final String KEY_USERNAME = "username"; + private static final String KEY_PASSWORD = "password"; + private static final String KEY_DATABASE = "database"; + private static final String KEY_TABLE = "table"; + private static final String KEY_COLUMN = "column"; + private static final String KEY_PRE_SQL = "preSql"; + private static final String KEY_POST_SQL = "postSql"; + private static final String KEY_JDBC_URL = "jdbcUrl"; + private static final String KEY_MAX_BATCH_ROWS = "maxBatchRows"; + private static final String KEY_MAX_BATCH_SIZE = "maxBatchSize"; + private static final String KEY_LOAD_URL = "loadUrl"; + private static final String KEY_FLUSH_QUEUE_LENGTH = "flushQueueLength"; + private static final String KEY_LOAD_PROPS = "loadProps"; + + private final Configuration options; + + public StarRocksWriterOptions(Configuration options) { + this.options = options; + } + + public void doPretreatment() { + validateRequired(); + validateStreamLoadUrl(); + } + + public String getJdbcUrl() { + return options.getString(KEY_JDBC_URL); + } + + public String getDatabase() { + return options.getString(KEY_DATABASE); + } + + public String getTable() { + return options.getString(KEY_TABLE); + } + + public String getUsername() { + return options.getString(KEY_USERNAME); + } + + public String getPassword() { + return options.getString(KEY_PASSWORD); + } + + public List getLoadUrlList() { + return options.getList(KEY_LOAD_URL, String.class); + } + + public List getColumns() { + return options.getList(KEY_COLUMN, String.class).stream().map(str -> str.replace("`", "")).collect(Collectors.toList()); + } + + public List getPreSqlList() { + return options.getList(KEY_PRE_SQL, String.class); + } + + public List getPostSqlList() { + return options.getList(KEY_POST_SQL, String.class); + } + + public Map getLoadProps() { + return options.getMap(KEY_LOAD_PROPS); + } + + public int getMaxRetries() { + return MAX_RETRIES; + } + + public int getBatchRows() { + Integer rows = options.getInt(KEY_MAX_BATCH_ROWS); + return null == rows ? BATCH_ROWS : rows; + } + + public long getBatchSize() { + Long size = options.getLong(KEY_MAX_BATCH_SIZE); + return null == size ? BATCH_BYTES : size; + } + + public int getFlushQueueLength() { + Integer len = options.getInt(KEY_FLUSH_QUEUE_LENGTH); + return null == len ? 1 : len; + } + + public StreamLoadFormat getStreamLoadFormat() { + Map loadProps = getLoadProps(); + if (null == loadProps) { + return StreamLoadFormat.CSV; + } + if (loadProps.containsKey(KEY_LOAD_PROPS_FORMAT) + && StreamLoadFormat.JSON.name().equalsIgnoreCase(String.valueOf(loadProps.get(KEY_LOAD_PROPS_FORMAT)))) { + return StreamLoadFormat.JSON; + } + return StreamLoadFormat.CSV; + } + + private void validateStreamLoadUrl() { + List urlList = getLoadUrlList(); + for (String host : urlList) { + if (host.split(":").length < 2) { + throw DataXException.asDataXException(DBUtilErrorCode.CONF_ERROR, + "loadUrl的格式不正确,请输入 `fe_ip:fe_http_ip;fe_ip:fe_http_ip`。"); + } + } + } + + private void validateRequired() { + final String[] requiredOptionKeys = new String[]{ + KEY_USERNAME, + KEY_PASSWORD, + KEY_DATABASE, + KEY_TABLE, + KEY_COLUMN, + KEY_LOAD_URL + }; + for (String optionKey : requiredOptionKeys) { + options.getNecessaryValue(optionKey, DBUtilErrorCode.REQUIRED_VALUE); + } + } +} diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksFlushTuple.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksFlushTuple.java new file mode 100644 index 0000000000..cd8c663b18 --- /dev/null +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksFlushTuple.java @@ -0,0 +1,20 @@ +package com.starrocks.connector.datax.plugin.writer.starrockswriter.manager; + +import java.util.List; + +public class StarRocksFlushTuple { + + private String label; + private Long bytes; + private List rows; + + public StarRocksFlushTuple(String label, Long bytes, List rows) { + this.label = label; + this.bytes = bytes; + this.rows = rows; + } + + public String getLabel() { return label; } + public Long getBytes() { return bytes; } + public List getRows() { return rows; } +} \ No newline at end of file diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java new file mode 100644 index 0000000000..15e93e87e1 --- /dev/null +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java @@ -0,0 +1,175 @@ +package com.starrocks.connector.datax.plugin.writer.starrockswriter.manager; + +import java.io.IOException; +import java.net.HttpURLConnection; +import java.net.URL; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; + +import com.alibaba.fastjson.JSON; +import com.starrocks.connector.datax.plugin.writer.starrockswriter.StarRocksWriterOptions; +import com.starrocks.connector.datax.plugin.writer.starrockswriter.row.StarRocksDelimiterParser; + +import org.apache.commons.codec.binary.Base64; +import org.apache.http.HttpEntity; +import org.apache.http.client.config.RequestConfig; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpPut; +import org.apache.http.entity.ByteArrayEntity; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.DefaultRedirectStrategy; +import org.apache.http.impl.client.HttpClientBuilder; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.util.EntityUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; +import java.util.Map; + + +public class StarRocksStreamLoadVisitor { + + private static final Logger LOG = LoggerFactory.getLogger(StarRocksStreamLoadVisitor.class); + + private final StarRocksWriterOptions writerOptions; + private int pos; + + public StarRocksStreamLoadVisitor(StarRocksWriterOptions writerOptions) { + this.writerOptions = writerOptions; + } + + public void doStreamLoad(StarRocksFlushTuple flushData) throws IOException { + String host = getAvailableHost(); + if (null == host) { + throw new IOException("None of the host in `load_url` could be connected."); + } + String loadUrl = new StringBuilder(host) + .append("/api/") + .append(writerOptions.getDatabase()) + .append("/") + .append(writerOptions.getTable()) + .append("/_stream_load") + .toString(); + LOG.debug(String.format("Start to join batch data: rows[%d] bytes[%d] label[%s].", flushData.getRows().size(), flushData.getBytes(), flushData.getLabel())); + Map loadResult = doHttpPut(loadUrl, flushData.getLabel(), joinRows(flushData.getRows(), flushData.getBytes().intValue())); + final String keyStatus = "Status"; + if (null == loadResult || !loadResult.containsKey(keyStatus)) { + throw new IOException("Unable to flush data to StarRocks: unknown result status."); + } + LOG.debug(new StringBuilder("StreamLoad response:\n").append(JSON.toJSONString(loadResult)).toString()); + if (loadResult.get(keyStatus).equals("Fail")) { + throw new IOException( + new StringBuilder("Failed to flush data to StarRocks.\n").append(JSON.toJSONString(loadResult)).toString() + ); + } + } + + private String getAvailableHost() { + List hostList = writerOptions.getLoadUrlList(); + if (pos >= hostList.size()) { + pos = 0; + } + for (; pos < hostList.size(); pos++) { + String host = new StringBuilder("http://").append(hostList.get(pos)).toString(); + if (tryHttpConnection(host)) { + return host; + } + } + return null; + } + + private boolean tryHttpConnection(String host) { + try { + URL url = new URL(host); + HttpURLConnection co = (HttpURLConnection) url.openConnection(); + co.setConnectTimeout(1000); + co.connect(); + co.disconnect(); + return true; + } catch (Exception e1) { + LOG.warn("Failed to connect to address:{}", host, e1); + return false; + } + } + + private byte[] joinRows(List rows, int totalBytes) { + if (StarRocksWriterOptions.StreamLoadFormat.CSV.equals(writerOptions.getStreamLoadFormat())) { + Map props = writerOptions.getLoadProps(); + byte[] lineDelimiter = (props.containsKey("row_delimiter") ? StarRocksDelimiterParser.parse(String.valueOf(props.get("row_delimiter")), "\n") : "\n").getBytes(StandardCharsets.UTF_8); + ByteBuffer bos = ByteBuffer.allocate(totalBytes + rows.size() * lineDelimiter.length); + for (String row : rows) { + bos.put(row.getBytes(StandardCharsets.UTF_8)); + bos.put(lineDelimiter); + } + return bos.array(); + } + + if (StarRocksWriterOptions.StreamLoadFormat.JSON.equals(writerOptions.getStreamLoadFormat())) { + ByteBuffer bos = ByteBuffer.allocate(totalBytes + (rows.isEmpty() ? 2 : rows.size() + 1)); + bos.put("[".getBytes(StandardCharsets.UTF_8)); + byte[] jsonDelimiter = ",".getBytes(StandardCharsets.UTF_8); + boolean isFirstElement = true; + for (String row : rows) { + if (!isFirstElement) { + bos.put(jsonDelimiter); + } + bos.put(row.getBytes(StandardCharsets.UTF_8)); + isFirstElement = false; + } + bos.put("]".getBytes(StandardCharsets.UTF_8)); + return bos.array(); + } + throw new RuntimeException("Failed to join rows data, unsupported `format` from stream load properties:"); + } + + @SuppressWarnings("unchecked") + private Map doHttpPut(String loadUrl, String label, byte[] data) throws IOException { + LOG.info(String.format("Executing stream load to: '%s', size: '%s'", loadUrl, data.length)); + final HttpClientBuilder httpClientBuilder = HttpClients.custom() + .setRedirectStrategy(new DefaultRedirectStrategy() { + @Override + protected boolean isRedirectable(String method) { + return true; + } + }); + try (CloseableHttpClient httpclient = httpClientBuilder.build()) { + HttpPut httpPut = new HttpPut(loadUrl); + List cols = writerOptions.getColumns(); + if (null != cols && !cols.isEmpty()) { + httpPut.setHeader("columns", String.join(",", cols)); + } + if (null != writerOptions.getLoadProps()) { + for (Map.Entry entry : writerOptions.getLoadProps().entrySet()) { + httpPut.setHeader(entry.getKey(), String.valueOf(entry.getValue())); + } + } + httpPut.setHeader("Expect", "100-continue"); + httpPut.setHeader("label", label); + httpPut.setHeader("Content-Type", "application/x-www-form-urlencoded"); + httpPut.setHeader("Authorization", getBasicAuthHeader(writerOptions.getUsername(), writerOptions.getPassword())); + httpPut.setEntity(new ByteArrayEntity(data)); + httpPut.setConfig(RequestConfig.custom().setRedirectsEnabled(true).build()); + try (CloseableHttpResponse resp = httpclient.execute(httpPut)) { + int code = resp.getStatusLine().getStatusCode(); + if (200 != code) { + LOG.warn("Request failed with code:{}", code); + return null; + } + HttpEntity respEntity = resp.getEntity(); + if (null == respEntity) { + LOG.warn("Request failed with empty response."); + return null; + } + return (Map)JSON.parse(EntityUtils.toString(respEntity)); + } + } + } + + private String getBasicAuthHeader(String username, String password) { + String auth = username + ":" + password; + byte[] encodedAuth = Base64.encodeBase64(auth.getBytes()); + return new StringBuilder("Basic ").append(new String(encodedAuth)).toString(); + } + +} diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java new file mode 100644 index 0000000000..e523442597 --- /dev/null +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java @@ -0,0 +1,144 @@ +package com.starrocks.connector.datax.plugin.writer.starrockswriter.manager; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.UUID; +import java.util.concurrent.LinkedBlockingDeque; + +import com.google.common.base.Strings; +import com.starrocks.connector.datax.plugin.writer.starrockswriter.StarRocksWriterOptions; + +public class StarRocksWriterManager { + + private static final Logger LOG = LoggerFactory.getLogger(StarRocksWriterManager.class); + + private final StarRocksStreamLoadVisitor starrocksStreamLoadVisitor; + private final StarRocksWriterOptions writerOptions; + + private final List buffer = new ArrayList<>(); + private int batchCount = 0; + private long batchSize = 0; + private volatile boolean closed = false; + private volatile Exception flushException; + private final LinkedBlockingDeque flushQueue; + + public StarRocksWriterManager(StarRocksWriterOptions writerOptions) { + this.writerOptions = writerOptions; + this.starrocksStreamLoadVisitor = new StarRocksStreamLoadVisitor(writerOptions); + flushQueue = new LinkedBlockingDeque<>(writerOptions.getFlushQueueLength()); + this.startAsyncFlushing(); + } + + public final synchronized void writeRecord(String record) throws IOException { + checkFlushException(); + try { + buffer.add(record); + batchCount++; + batchSize += record.getBytes().length; + if (batchCount >= writerOptions.getBatchRows() || batchSize >= writerOptions.getBatchSize()) { + String label = createBatchLabel(); + LOG.debug(String.format("StarRocks buffer Sinking triggered: rows[%d] label[%s].", batchCount, label)); + flush(label, false); + } + } catch (Exception e) { + throw new IOException("Writing records to StarRocks failed.", e); + } + } + + public synchronized void flush(String label, boolean waitUtilDone) throws Exception { + checkFlushException(); + if (batchCount == 0) { + if (waitUtilDone) { + waitAsyncFlushingDone(); + } + return; + } + flushQueue.put(new StarRocksFlushTuple(label, batchSize, new ArrayList<>(buffer))); + if (waitUtilDone) { + // wait the last flush + waitAsyncFlushingDone(); + } + buffer.clear(); + batchCount = 0; + batchSize = 0; + } + + public synchronized void close() { + if (!closed) { + closed = true; + try { + String label = createBatchLabel(); + if (batchCount > 0) LOG.debug(String.format("StarRocks Sink is about to close: label[%s].", label)); + flush(label, true); + } catch (Exception e) { + throw new RuntimeException("Writing records to StarRocks failed.", e); + } + } + checkFlushException(); + } + + public String createBatchLabel() { + return UUID.randomUUID().toString(); + } + + private void startAsyncFlushing() { + // start flush thread + Thread flushThread = new Thread(new Runnable(){ + public void run() { + while(true) { + try { + asyncFlush(); + } catch (Exception e) { + flushException = e; + } + } + } + }); + flushThread.setDaemon(true); + flushThread.start(); + } + + private void waitAsyncFlushingDone() throws InterruptedException { + // wait previous flushings + for (int i = 0; i <= writerOptions.getFlushQueueLength(); i++) { + flushQueue.put(new StarRocksFlushTuple("", 0l, null)); + } + } + + private void asyncFlush() throws Exception { + StarRocksFlushTuple flushData = flushQueue.take(); + if (Strings.isNullOrEmpty(flushData.getLabel())) { + return; + } + LOG.debug(String.format("Async stream load: rows[%d] bytes[%d] label[%s].", flushData.getRows().size(), flushData.getBytes(), flushData.getLabel())); + for (int i = 0; i <= writerOptions.getMaxRetries(); i++) { + try { + // flush to StarRocks with stream load + starrocksStreamLoadVisitor.doStreamLoad(flushData); + LOG.info(String.format("Async stream load finished: label[%s].", flushData.getLabel())); + break; + } catch (Exception e) { + LOG.warn("Failed to flush batch data to StarRocks, retry times = {}", i, e); + if (i >= writerOptions.getMaxRetries()) { + throw new IOException(e); + } + try { + Thread.sleep(1000l * (i + 1)); + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + throw new IOException("Unable to flush, interrupted while doing another attempt", e); + } + } + } + } + + private void checkFlushException() { + if (flushException != null) { + throw new RuntimeException("Writing records to StarRocks failed.", flushException); + } + } +} diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksBaseSerializer.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksBaseSerializer.java new file mode 100644 index 0000000000..77d25f1235 --- /dev/null +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksBaseSerializer.java @@ -0,0 +1,18 @@ +package com.starrocks.connector.datax.plugin.writer.starrockswriter.row; + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.Column.Type; + +public class StarRocksBaseSerializer { + + protected String fieldConvertion(Column col) { + if (null == col.getRawData()) { + return null; + } + if (Type.BOOL == col.getType()) { + return String.valueOf(col.asLong()); + } + return col.asString(); + } + +} diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksCsvSerializer.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksCsvSerializer.java new file mode 100644 index 0000000000..1366d57097 --- /dev/null +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksCsvSerializer.java @@ -0,0 +1,32 @@ +package com.starrocks.connector.datax.plugin.writer.starrockswriter.row; + +import java.io.StringWriter; + +import com.alibaba.datax.common.element.Record; + +import com.google.common.base.Strings; + +public class StarRocksCsvSerializer extends StarRocksBaseSerializer implements StarRocksISerializer { + + private static final long serialVersionUID = 1L; + + private final String columnSeparator; + + public StarRocksCsvSerializer(String sp) { + this.columnSeparator = StarRocksDelimiterParser.parse(sp, "\t"); + } + + @Override + public String serialize(Record row) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < row.getColumnNumber(); i++) { + String value = fieldConvertion(row.getColumn(i)); + sb.append(null == value ? "\\N" : value); + if (i < row.getColumnNumber() - 1) { + sb.append(columnSeparator); + } + } + return sb.toString(); + } + +} diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksDelimiterParser.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksDelimiterParser.java new file mode 100644 index 0000000000..04301e0f13 --- /dev/null +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksDelimiterParser.java @@ -0,0 +1,55 @@ +package com.starrocks.connector.datax.plugin.writer.starrockswriter.row; + +import java.io.StringWriter; + +import com.google.common.base.Strings; + +public class StarRocksDelimiterParser { + + private static final String HEX_STRING = "0123456789ABCDEF"; + + public static String parse(String sp, String dSp) throws RuntimeException { + if (Strings.isNullOrEmpty(sp)) { + return dSp; + } + if (!sp.toUpperCase().startsWith("\\X")) { + return sp; + } + String hexStr = sp.substring(2); + // check hex str + if (hexStr.isEmpty()) { + throw new RuntimeException("Failed to parse delimiter: `Hex str is empty`"); + } + if (hexStr.length() % 2 != 0) { + throw new RuntimeException("Failed to parse delimiter: `Hex str length error`"); + } + for (char hexChar : hexStr.toUpperCase().toCharArray()) { + if (HEX_STRING.indexOf(hexChar) == -1) { + throw new RuntimeException("Failed to parse delimiter: `Hex str format error`"); + } + } + // transform to separator + StringWriter writer = new StringWriter(); + for (byte b : hexStrToBytes(hexStr)) { + writer.append((char) b); + } + return writer.toString(); + } + + private static byte[] hexStrToBytes(String hexStr) { + String upperHexStr = hexStr.toUpperCase(); + int length = upperHexStr.length() / 2; + char[] hexChars = upperHexStr.toCharArray(); + byte[] bytes = new byte[length]; + for (int i = 0; i < length; i++) { + int pos = i * 2; + bytes[i] = (byte) (charToByte(hexChars[pos]) << 4 | charToByte(hexChars[pos + 1])); + } + return bytes; + } + + private static byte charToByte(char c) { + return (byte) HEX_STRING.indexOf(c); + } + +} diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksISerializer.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksISerializer.java new file mode 100644 index 0000000000..7bcb89739c --- /dev/null +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksISerializer.java @@ -0,0 +1,11 @@ +package com.starrocks.connector.datax.plugin.writer.starrockswriter.row; + +import java.io.Serializable; + +import com.alibaba.datax.common.element.Record; + +public interface StarRocksISerializer extends Serializable { + + String serialize(Record row); + +} diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksJsonSerializer.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksJsonSerializer.java new file mode 100644 index 0000000000..60faa1be63 --- /dev/null +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksJsonSerializer.java @@ -0,0 +1,34 @@ +package com.starrocks.connector.datax.plugin.writer.starrockswriter.row; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import com.alibaba.datax.common.element.Record; +import com.alibaba.fastjson.JSON; + +public class StarRocksJsonSerializer extends StarRocksBaseSerializer implements StarRocksISerializer { + + private static final long serialVersionUID = 1L; + + private final List fieldNames; + + public StarRocksJsonSerializer(List fieldNames) { + this.fieldNames = fieldNames; + } + + @Override + public String serialize(Record row) { + if (null == fieldNames) { + return ""; + } + Map rowMap = new HashMap<>(fieldNames.size()); + int idx = 0; + for (String fieldName : fieldNames) { + rowMap.put(fieldName, fieldConvertion(row.getColumn(idx))); + idx++; + } + return JSON.toJSONString(rowMap); + } + +} diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksSerializerFactory.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksSerializerFactory.java new file mode 100644 index 0000000000..85f446cd93 --- /dev/null +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksSerializerFactory.java @@ -0,0 +1,22 @@ +package com.starrocks.connector.datax.plugin.writer.starrockswriter.row; + +import java.util.Map; + +import com.starrocks.connector.datax.plugin.writer.starrockswriter.StarRocksWriterOptions; + +public class StarRocksSerializerFactory { + + private StarRocksSerializerFactory() {} + + public static StarRocksISerializer createSerializer(StarRocksWriterOptions writerOptions) { + if (StarRocksWriterOptions.StreamLoadFormat.CSV.equals(writerOptions.getStreamLoadFormat())) { + Map props = writerOptions.getLoadProps(); + return new StarRocksCsvSerializer(null == props || !props.containsKey("column_separator") ? null : String.valueOf(props.get("column_separator"))); + } + if (StarRocksWriterOptions.StreamLoadFormat.JSON.equals(writerOptions.getStreamLoadFormat())) { + return new StarRocksJsonSerializer(writerOptions.getColumns()); + } + throw new RuntimeException("Failed to create row serializer, unsupported `format` from stream load properties."); + } + +} diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/util/StarRocksWriterUtil.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/util/StarRocksWriterUtil.java new file mode 100755 index 0000000000..c3b5d8d1d0 --- /dev/null +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/util/StarRocksWriterUtil.java @@ -0,0 +1,83 @@ +package com.starrocks.connector.datax.plugin.writer.starrockswriter.util; + +import com.alibaba.datax.plugin.rdbms.util.DBUtil; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; +import com.alibaba.datax.plugin.rdbms.util.RdbmsException; +import com.alibaba.datax.plugin.rdbms.writer.Constant; +import com.alibaba.druid.sql.parser.ParserException; +import com.starrocks.connector.datax.plugin.writer.starrockswriter.StarRocksWriterOptions; +import com.google.common.base.Strings; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.Connection; +import java.sql.Statement; +import java.util.*; + +public final class StarRocksWriterUtil { + private static final Logger LOG = LoggerFactory.getLogger(StarRocksWriterUtil.class); + + private StarRocksWriterUtil() {} + + public static List renderPreOrPostSqls(List preOrPostSqls, String tableName) { + if (null == preOrPostSqls) { + return Collections.emptyList(); + } + List renderedSqls = new ArrayList<>(); + for (String sql : preOrPostSqls) { + if (!Strings.isNullOrEmpty(sql)) { + renderedSqls.add(sql.replace(Constant.TABLE_NAME_PLACEHOLDER, tableName)); + } + } + return renderedSqls; + } + + public static void executeSqls(Connection conn, List sqls) { + Statement stmt = null; + String currentSql = null; + try { + stmt = conn.createStatement(); + for (String sql : sqls) { + currentSql = sql; + DBUtil.executeSqlWithoutResultSet(stmt, sql); + } + } catch (Exception e) { + throw RdbmsException.asQueryException(DataBaseType.MySql, e, currentSql, null, null); + } finally { + DBUtil.closeDBResources(null, stmt, null); + } + } + + public static void preCheckPrePareSQL(StarRocksWriterOptions options) { + String table = options.getTable(); + List preSqls = options.getPreSqlList(); + List renderedPreSqls = StarRocksWriterUtil.renderPreOrPostSqls(preSqls, table); + if (null != renderedPreSqls && !renderedPreSqls.isEmpty()) { + LOG.info("Begin to preCheck preSqls:[{}].", String.join(";", renderedPreSqls)); + for (String sql : renderedPreSqls) { + try { + DBUtil.sqlValid(sql, DataBaseType.MySql); + } catch (ParserException e) { + throw RdbmsException.asPreSQLParserException(DataBaseType.MySql,e,sql); + } + } + } + } + + public static void preCheckPostSQL(StarRocksWriterOptions options) { + String table = options.getTable(); + List postSqls = options.getPostSqlList(); + List renderedPostSqls = StarRocksWriterUtil.renderPreOrPostSqls(postSqls, table); + if (null != renderedPostSqls && !renderedPostSqls.isEmpty()) { + LOG.info("Begin to preCheck postSqls:[{}].", String.join(";", renderedPostSqls)); + for(String sql : renderedPostSqls) { + try { + DBUtil.sqlValid(sql, DataBaseType.MySql); + } catch (ParserException e){ + throw RdbmsException.asPostSQLParserException(DataBaseType.MySql,e,sql); + } + } + } + } +} From 0582da63a51b63768c67bd75a5096dec53e4f788 Mon Sep 17 00:00:00 2001 From: fariel Date: Wed, 1 Sep 2021 17:08:12 +0800 Subject: [PATCH 22/50] remove doriswriter --- .../writer/doriswriter/StarRocksWriter.java | 141 -------------- .../doriswriter/StarRocksWriterOptions.java | 146 --------------- .../manager/StarRocksFlushTuple.java | 20 -- .../manager/StarRocksStreamLoadVisitor.java | 175 ------------------ .../manager/StarRocksWriterManager.java | 144 -------------- .../row/StarRocksBaseSerializer.java | 18 -- .../row/StarRocksCsvSerializer.java | 32 ---- .../row/StarRocksDelimiterParser.java | 55 ------ .../doriswriter/row/StarRocksISerializer.java | 11 -- .../row/StarRocksJsonSerializer.java | 34 ---- .../row/StarRocksSerializerFactory.java | 22 --- .../doriswriter/util/StarRocksWriterUtil.java | 83 --------- 12 files changed, 881 deletions(-) delete mode 100755 starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/StarRocksWriter.java delete mode 100644 starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/StarRocksWriterOptions.java delete mode 100644 starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/manager/StarRocksFlushTuple.java delete mode 100644 starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/manager/StarRocksStreamLoadVisitor.java delete mode 100644 starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/manager/StarRocksWriterManager.java delete mode 100644 starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksBaseSerializer.java delete mode 100644 starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksCsvSerializer.java delete mode 100644 starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksDelimiterParser.java delete mode 100644 starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksISerializer.java delete mode 100644 starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksJsonSerializer.java delete mode 100644 starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksSerializerFactory.java delete mode 100755 starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/util/StarRocksWriterUtil.java diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/StarRocksWriter.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/StarRocksWriter.java deleted file mode 100755 index 666a99d962..0000000000 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/StarRocksWriter.java +++ /dev/null @@ -1,141 +0,0 @@ -package com.starrocks.connector.datax.plugin.writer.starrockswriter; - -import com.alibaba.datax.common.element.Record; -import com.alibaba.datax.common.exception.DataXException; -import com.alibaba.datax.common.plugin.RecordReceiver; -import com.alibaba.datax.common.spi.Writer; -import com.alibaba.datax.common.util.Configuration; -import com.alibaba.datax.plugin.rdbms.util.DBUtil; -import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; -import com.alibaba.datax.plugin.rdbms.util.DataBaseType; -import com.starrocks.connector.datax.plugin.writer.starrockswriter.manager.StarRocksWriterManager; -import com.starrocks.connector.datax.plugin.writer.starrockswriter.row.StarRocksISerializer; -import com.starrocks.connector.datax.plugin.writer.starrockswriter.row.StarRocksSerializerFactory; -import com.starrocks.connector.datax.plugin.writer.starrockswriter.util.StarRocksWriterUtil; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.sql.Connection; -import java.util.ArrayList; -import java.util.List; - -public class StarRocksWriter extends Writer { - - public static class Job extends Writer.Job { - - private static final Logger LOG = LoggerFactory.getLogger(Job.class); - private Configuration originalConfig = null; - private StarRocksWriterOptions options; - - @Override - public void init() { - this.originalConfig = super.getPluginJobConf(); - options = new StarRocksWriterOptions(super.getPluginJobConf()); - options.doPretreatment(); - } - - @Override - public void preCheck(){ - this.init(); - StarRocksWriterUtil.preCheckPrePareSQL(options); - StarRocksWriterUtil.preCheckPostSQL(options); - } - - @Override - public void prepare() { - String username = options.getUsername(); - String password = options.getPassword(); - String jdbcUrl = options.getJdbcUrl(); - List renderedPreSqls = StarRocksWriterUtil.renderPreOrPostSqls(options.getPreSqlList(), options.getTable()); - if (null != renderedPreSqls && !renderedPreSqls.isEmpty()) { - Connection conn = DBUtil.getConnection(DataBaseType.MySql, jdbcUrl, username, password); - LOG.info("Begin to execute preSqls:[{}]. context info:{}.", String.join(";", renderedPreSqls), jdbcUrl); - StarRocksWriterUtil.executeSqls(conn, renderedPreSqls); - DBUtil.closeDBResources(null, null, conn); - } - } - - @Override - public List split(int mandatoryNumber) { - List configurations = new ArrayList<>(mandatoryNumber); - for (int i = 0; i < mandatoryNumber; i++) { - configurations.add(originalConfig); - } - return configurations; - } - - @Override - public void post() { - String username = options.getUsername(); - String password = options.getPassword(); - String jdbcUrl = options.getJdbcUrl(); - List renderedPostSqls = StarRocksWriterUtil.renderPreOrPostSqls(options.getPostSqlList(), options.getTable()); - if (null != renderedPostSqls && !renderedPostSqls.isEmpty()) { - Connection conn = DBUtil.getConnection(DataBaseType.MySql, jdbcUrl, username, password); - LOG.info("Begin to execute preSqls:[{}]. context info:{}.", String.join(";", renderedPostSqls), jdbcUrl); - StarRocksWriterUtil.executeSqls(conn, renderedPostSqls); - DBUtil.closeDBResources(null, null, conn); - } - } - - @Override - public void destroy() { - } - - } - - public static class Task extends Writer.Task { - private StarRocksWriterManager writerManager; - private StarRocksWriterOptions options; - private StarRocksISerializer rowSerializer; - - @Override - public void init() { - options = new StarRocksWriterOptions(super.getPluginJobConf()); - writerManager = new StarRocksWriterManager(options); - rowSerializer = StarRocksSerializerFactory.createSerializer(options); - } - - @Override - public void prepare() { - } - - public void startWrite(RecordReceiver recordReceiver) { - try { - Record record; - while ((record = recordReceiver.getFromReader()) != null) { - if (record.getColumnNumber() != options.getColumns().size()) { - throw DataXException - .asDataXException( - DBUtilErrorCode.CONF_ERROR, - String.format( - "列配置信息有错误. 因为您配置的任务中,源头读取字段数:%s 与 目的表要写入的字段数:%s 不相等. 请检查您的配置并作出修改.", - record.getColumnNumber(), - options.getColumns().size())); - } - writerManager.writeRecord(rowSerializer.serialize(record)); - } - } catch (Exception e) { - throw DataXException.asDataXException(DBUtilErrorCode.WRITE_DATA_ERROR, e); - } - } - - @Override - public void post() { - try { - writerManager.close(); - } catch (Exception e) { - throw DataXException.asDataXException(DBUtilErrorCode.WRITE_DATA_ERROR, e); - } - } - - @Override - public void destroy() {} - - @Override - public boolean supportFailOver(){ - return false; - } - } -} diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/StarRocksWriterOptions.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/StarRocksWriterOptions.java deleted file mode 100644 index 5180512f59..0000000000 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/StarRocksWriterOptions.java +++ /dev/null @@ -1,146 +0,0 @@ -package com.starrocks.connector.datax.plugin.writer.starrockswriter; - -import java.io.Serializable; - -import com.alibaba.datax.common.exception.DataXException; -import com.alibaba.datax.common.util.Configuration; -import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; - -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; - -public class StarRocksWriterOptions implements Serializable { - - private static final long serialVersionUID = 1l; - private static final long KILO_BYTES_SCALE = 1024l; - private static final long MEGA_BYTES_SCALE = KILO_BYTES_SCALE * KILO_BYTES_SCALE; - private static final int MAX_RETRIES = 1; - private static final int BATCH_ROWS = 500000; - private static final long BATCH_BYTES = 90 * MEGA_BYTES_SCALE; - - private static final String KEY_LOAD_PROPS_FORMAT = "format"; - public enum StreamLoadFormat { - CSV, JSON; - } - - private static final String KEY_USERNAME = "username"; - private static final String KEY_PASSWORD = "password"; - private static final String KEY_DATABASE = "database"; - private static final String KEY_TABLE = "table"; - private static final String KEY_COLUMN = "column"; - private static final String KEY_PRE_SQL = "preSql"; - private static final String KEY_POST_SQL = "postSql"; - private static final String KEY_JDBC_URL = "jdbcUrl"; - private static final String KEY_MAX_BATCH_ROWS = "maxBatchRows"; - private static final String KEY_MAX_BATCH_SIZE = "maxBatchSize"; - private static final String KEY_LOAD_URL = "loadUrl"; - private static final String KEY_FLUSH_QUEUE_LENGTH = "flushQueueLength"; - private static final String KEY_LOAD_PROPS = "loadProps"; - - private final Configuration options; - - public StarRocksWriterOptions(Configuration options) { - this.options = options; - } - - public void doPretreatment() { - validateRequired(); - validateStreamLoadUrl(); - } - - public String getJdbcUrl() { - return options.getString(KEY_JDBC_URL); - } - - public String getDatabase() { - return options.getString(KEY_DATABASE); - } - - public String getTable() { - return options.getString(KEY_TABLE); - } - - public String getUsername() { - return options.getString(KEY_USERNAME); - } - - public String getPassword() { - return options.getString(KEY_PASSWORD); - } - - public List getLoadUrlList() { - return options.getList(KEY_LOAD_URL, String.class); - } - - public List getColumns() { - return options.getList(KEY_COLUMN, String.class).stream().map(str -> str.replace("`", "")).collect(Collectors.toList()); - } - - public List getPreSqlList() { - return options.getList(KEY_PRE_SQL, String.class); - } - - public List getPostSqlList() { - return options.getList(KEY_POST_SQL, String.class); - } - - public Map getLoadProps() { - return options.getMap(KEY_LOAD_PROPS); - } - - public int getMaxRetries() { - return MAX_RETRIES; - } - - public int getBatchRows() { - Integer rows = options.getInt(KEY_MAX_BATCH_ROWS); - return null == rows ? BATCH_ROWS : rows; - } - - public long getBatchSize() { - Long size = options.getLong(KEY_MAX_BATCH_SIZE); - return null == size ? BATCH_BYTES : size; - } - - public int getFlushQueueLength() { - Integer len = options.getInt(KEY_FLUSH_QUEUE_LENGTH); - return null == len ? 1 : len; - } - - public StreamLoadFormat getStreamLoadFormat() { - Map loadProps = getLoadProps(); - if (null == loadProps) { - return StreamLoadFormat.CSV; - } - if (loadProps.containsKey(KEY_LOAD_PROPS_FORMAT) - && StreamLoadFormat.JSON.name().equalsIgnoreCase(String.valueOf(loadProps.get(KEY_LOAD_PROPS_FORMAT)))) { - return StreamLoadFormat.JSON; - } - return StreamLoadFormat.CSV; - } - - private void validateStreamLoadUrl() { - List urlList = getLoadUrlList(); - for (String host : urlList) { - if (host.split(":").length < 2) { - throw DataXException.asDataXException(DBUtilErrorCode.CONF_ERROR, - "loadUrl的格式不正确,请输入 `fe_ip:fe_http_ip;fe_ip:fe_http_ip`。"); - } - } - } - - private void validateRequired() { - final String[] requiredOptionKeys = new String[]{ - KEY_USERNAME, - KEY_PASSWORD, - KEY_DATABASE, - KEY_TABLE, - KEY_COLUMN, - KEY_LOAD_URL - }; - for (String optionKey : requiredOptionKeys) { - options.getNecessaryValue(optionKey, DBUtilErrorCode.REQUIRED_VALUE); - } - } -} diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/manager/StarRocksFlushTuple.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/manager/StarRocksFlushTuple.java deleted file mode 100644 index cd8c663b18..0000000000 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/manager/StarRocksFlushTuple.java +++ /dev/null @@ -1,20 +0,0 @@ -package com.starrocks.connector.datax.plugin.writer.starrockswriter.manager; - -import java.util.List; - -public class StarRocksFlushTuple { - - private String label; - private Long bytes; - private List rows; - - public StarRocksFlushTuple(String label, Long bytes, List rows) { - this.label = label; - this.bytes = bytes; - this.rows = rows; - } - - public String getLabel() { return label; } - public Long getBytes() { return bytes; } - public List getRows() { return rows; } -} \ No newline at end of file diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/manager/StarRocksStreamLoadVisitor.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/manager/StarRocksStreamLoadVisitor.java deleted file mode 100644 index 84fc5200d0..0000000000 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/manager/StarRocksStreamLoadVisitor.java +++ /dev/null @@ -1,175 +0,0 @@ -package com.starrocks.connector.datax.plugin.writer.starrockswriter.manager; - -import java.io.IOException; -import java.net.HttpURLConnection; -import java.net.URL; -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; - -import com.alibaba.fastjson.JSON; -import com.starrocks.connector.datax.plugin.writer.starrockswriter.StarRocksWriterOptions; -import com.starrocks.connector.datax.plugin.writer.starrockswriter.row.StarRocksDelimiterParser; - -import org.apache.commons.codec.binary.Base64; -import org.apache.http.HttpEntity; -import org.apache.http.client.config.RequestConfig; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpPut; -import org.apache.http.entity.ByteArrayEntity; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.DefaultRedirectStrategy; -import org.apache.http.impl.client.HttpClientBuilder; -import org.apache.http.impl.client.HttpClients; -import org.apache.http.util.EntityUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.List; -import java.util.Map; - - -public class StarRocksStreamLoadVisitor { - - private static final Logger LOG = LoggerFactory.getLogger(StarRocksStreamLoadVisitor.class); - - private final StarRocksWriterOptions writerOptions; - private int pos; - - public StarRocksStreamLoadVisitor(StarRocksWriterOptions writerOptions) { - this.writerOptions = writerOptions; - } - - public void doStreamLoad(StarRocksFlushTuple flushData) throws IOException { - String host = getAvailableHost(); - if (null == host) { - throw new IOException("None of the host in `load_url` could be connected."); - } - String loadUrl = new StringBuilder(host) - .append("/api/") - .append(writerOptions.getDatabase()) - .append("/") - .append(writerOptions.getTable()) - .append("/_stream_load") - .toString(); - LOG.debug(String.format("Start to join batch data: rows[%d] bytes[%d] label[%s].", flushData.getRows().size(), flushData.getBytes(), flushData.getLabel())); - Map loadResult = doHttpPut(loadUrl, flushData.getLabel(), joinRows(flushData.getRows(), flushData.getBytes().intValue())); - final String keyStatus = "Status"; - if (null == loadResult || !loadResult.containsKey(keyStatus)) { - throw new IOException("Unable to flush data to StarRocks: unknown result status."); - } - LOG.debug(new StringBuilder("StreamLoad response:\n").append(JSON.toJSONString(loadResult)).toString()); - if (loadResult.get(keyStatus).equals("Fail")) { - throw new IOException( - new StringBuilder("Failed to flush data to StarRocks.\n").append(JSON.toJSONString(loadResult)).toString() - ); - } - } - - private String getAvailableHost() { - List hostList = writerOptions.getLoadUrlList(); - if (pos >= hostList.size()) { - pos = 0; - } - for (; pos < hostList.size(); pos++) { - String host = new StringBuilder("http://").append(hostList.get(pos)).toString(); - if (tryHttpConnection(host)) { - return host; - } - } - return null; - } - - private boolean tryHttpConnection(String host) { - try { - URL url = new URL(host); - HttpURLConnection co = (HttpURLConnection) url.openConnection(); - co.setConnectTimeout(1000); - co.connect(); - co.disconnect(); - return true; - } catch (Exception e1) { - LOG.warn("Failed to connect to address:{}", host, e1); - return false; - } - } - - private byte[] joinRows(List rows, int totalBytes) { - if (StarRocksWriterOptions.StreamLoadFormat.CSV.equals(writerOptions.getStreamLoadFormat())) { - Map props = writerOptions.getLoadProps(); - byte[] lineDelimiter = StarRocksDelimiterParser.parse(String.valueOf(props.get("row_delimiter")), "\n").getBytes(StandardCharsets.UTF_8); - ByteBuffer bos = ByteBuffer.allocate(totalBytes + rows.size() * lineDelimiter.length); - for (String row : rows) { - bos.put(row.getBytes(StandardCharsets.UTF_8)); - bos.put(lineDelimiter); - } - return bos.array(); - } - - if (StarRocksWriterOptions.StreamLoadFormat.JSON.equals(writerOptions.getStreamLoadFormat())) { - ByteBuffer bos = ByteBuffer.allocate(totalBytes + (rows.isEmpty() ? 2 : rows.size() + 1)); - bos.put("[".getBytes(StandardCharsets.UTF_8)); - byte[] jsonDelimiter = ",".getBytes(StandardCharsets.UTF_8); - boolean isFirstElement = true; - for (String row : rows) { - if (!isFirstElement) { - bos.put(jsonDelimiter); - } - bos.put(row.getBytes(StandardCharsets.UTF_8)); - isFirstElement = false; - } - bos.put("]".getBytes(StandardCharsets.UTF_8)); - return bos.array(); - } - throw new RuntimeException("Failed to join rows data, unsupported `format` from stream load properties:"); - } - - @SuppressWarnings("unchecked") - private Map doHttpPut(String loadUrl, String label, byte[] data) throws IOException { - LOG.info(String.format("Executing stream load to: '%s', size: '%s'", loadUrl, data.length)); - final HttpClientBuilder httpClientBuilder = HttpClients.custom() - .setRedirectStrategy(new DefaultRedirectStrategy() { - @Override - protected boolean isRedirectable(String method) { - return true; - } - }); - try (CloseableHttpClient httpclient = httpClientBuilder.build()) { - HttpPut httpPut = new HttpPut(loadUrl); - List cols = writerOptions.getColumns(); - if (null != cols && !cols.isEmpty()) { - httpPut.setHeader("columns", String.join(",", cols)); - } - if (null != writerOptions.getLoadProps()) { - for (Map.Entry entry : writerOptions.getLoadProps().entrySet()) { - httpPut.setHeader(entry.getKey(), String.valueOf(entry.getValue())); - } - } - httpPut.setHeader("Expect", "100-continue"); - httpPut.setHeader("label", label); - httpPut.setHeader("Content-Type", "application/x-www-form-urlencoded"); - httpPut.setHeader("Authorization", getBasicAuthHeader(writerOptions.getUsername(), writerOptions.getPassword())); - httpPut.setEntity(new ByteArrayEntity(data)); - httpPut.setConfig(RequestConfig.custom().setRedirectsEnabled(true).build()); - try (CloseableHttpResponse resp = httpclient.execute(httpPut)) { - int code = resp.getStatusLine().getStatusCode(); - if (200 != code) { - LOG.warn("Request failed with code:{}", code); - return null; - } - HttpEntity respEntity = resp.getEntity(); - if (null == respEntity) { - LOG.warn("Request failed with empty response."); - return null; - } - return (Map)JSON.parse(EntityUtils.toString(respEntity)); - } - } - } - - private String getBasicAuthHeader(String username, String password) { - String auth = username + ":" + password; - byte[] encodedAuth = Base64.encodeBase64(auth.getBytes()); - return new StringBuilder("Basic ").append(new String(encodedAuth)).toString(); - } - -} diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/manager/StarRocksWriterManager.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/manager/StarRocksWriterManager.java deleted file mode 100644 index 4e53adebcb..0000000000 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/manager/StarRocksWriterManager.java +++ /dev/null @@ -1,144 +0,0 @@ -package com.starrocks.connector.datax.plugin.writer.starrockswriter.manager; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.UUID; -import java.util.concurrent.LinkedBlockingDeque; - -import com.starrocks.connector.datax.plugin.writer.starrockswriter.StarRocksWriterOptions; -import com.google.common.base.Strings; - -public class StarRocksWriterManager { - - private static final Logger LOG = LoggerFactory.getLogger(StarRocksWriterManager.class); - - private final StarRocksStreamLoadVisitor starrocksStreamLoadVisitor; - private final StarRocksWriterOptions writerOptions; - - private final List buffer = new ArrayList<>(); - private int batchCount = 0; - private long batchSize = 0; - private volatile boolean closed = false; - private volatile Exception flushException; - private final LinkedBlockingDeque flushQueue; - - public StarRocksWriterManager(StarRocksWriterOptions writerOptions) { - this.writerOptions = writerOptions; - this.starrocksStreamLoadVisitor = new StarRocksStreamLoadVisitor(writerOptions); - flushQueue = new LinkedBlockingDeque<>(writerOptions.getFlushQueueLength()); - this.startAsyncFlushing(); - } - - public final synchronized void writeRecord(String record) throws IOException { - checkFlushException(); - try { - buffer.add(record); - batchCount++; - batchSize += record.getBytes().length; - if (batchCount >= writerOptions.getBatchRows() || batchSize >= writerOptions.getBatchSize()) { - String label = createBatchLabel(); - LOG.debug(String.format("StarRocks buffer Sinking triggered: rows[%d] label[%s].", batchCount, label)); - flush(label, false); - } - } catch (Exception e) { - throw new IOException("Writing records to StarRocks failed.", e); - } - } - - public synchronized void flush(String label, boolean waitUtilDone) throws Exception { - checkFlushException(); - if (batchCount == 0) { - if (waitUtilDone) { - waitAsyncFlushingDone(); - } - return; - } - flushQueue.put(new StarRocksFlushTuple(label, batchSize, new ArrayList<>(buffer))); - if (waitUtilDone) { - // wait the last flush - waitAsyncFlushingDone(); - } - buffer.clear(); - batchCount = 0; - batchSize = 0; - } - - public synchronized void close() { - if (!closed) { - closed = true; - try { - String label = createBatchLabel(); - if (batchCount > 0) LOG.debug(String.format("StarRocks Sink is about to close: label[%s].", label)); - flush(label, true); - } catch (Exception e) { - throw new RuntimeException("Writing records to StarRocks failed.", e); - } - } - checkFlushException(); - } - - public String createBatchLabel() { - return UUID.randomUUID().toString(); - } - - private void startAsyncFlushing() { - // start flush thread - Thread flushThread = new Thread(new Runnable(){ - public void run() { - while(true) { - try { - asyncFlush(); - } catch (Exception e) { - flushException = e; - } - } - } - }); - flushThread.setDaemon(true); - flushThread.start(); - } - - private void waitAsyncFlushingDone() throws InterruptedException { - // wait previous flushings - for (int i = 0; i <= writerOptions.getFlushQueueLength(); i++) { - flushQueue.put(new StarRocksFlushTuple("", 0l, null)); - } - } - - private void asyncFlush() throws Exception { - StarRocksFlushTuple flushData = flushQueue.take(); - if (Strings.isNullOrEmpty(flushData.getLabel())) { - return; - } - LOG.debug(String.format("Async stream load: rows[%d] bytes[%d] label[%s].", flushData.getRows().size(), flushData.getBytes(), flushData.getLabel())); - for (int i = 0; i <= writerOptions.getMaxRetries(); i++) { - try { - // flush to StarRocks with stream load - starrocksStreamLoadVisitor.doStreamLoad(flushData); - LOG.info(String.format("Async stream load finished: label[%s].", flushData.getLabel())); - break; - } catch (Exception e) { - LOG.warn("Failed to flush batch data to StarRocks, retry times = {}", i, e); - if (i >= writerOptions.getMaxRetries()) { - throw new IOException(e); - } - try { - Thread.sleep(1000l * (i + 1)); - } catch (InterruptedException ex) { - Thread.currentThread().interrupt(); - throw new IOException("Unable to flush, interrupted while doing another attempt", e); - } - } - } - } - - private void checkFlushException() { - if (flushException != null) { - throw new RuntimeException("Writing records to StarRocks failed.", flushException); - } - } -} diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksBaseSerializer.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksBaseSerializer.java deleted file mode 100644 index 77d25f1235..0000000000 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksBaseSerializer.java +++ /dev/null @@ -1,18 +0,0 @@ -package com.starrocks.connector.datax.plugin.writer.starrockswriter.row; - -import com.alibaba.datax.common.element.Column; -import com.alibaba.datax.common.element.Column.Type; - -public class StarRocksBaseSerializer { - - protected String fieldConvertion(Column col) { - if (null == col.getRawData()) { - return null; - } - if (Type.BOOL == col.getType()) { - return String.valueOf(col.asLong()); - } - return col.asString(); - } - -} diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksCsvSerializer.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksCsvSerializer.java deleted file mode 100644 index 1366d57097..0000000000 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksCsvSerializer.java +++ /dev/null @@ -1,32 +0,0 @@ -package com.starrocks.connector.datax.plugin.writer.starrockswriter.row; - -import java.io.StringWriter; - -import com.alibaba.datax.common.element.Record; - -import com.google.common.base.Strings; - -public class StarRocksCsvSerializer extends StarRocksBaseSerializer implements StarRocksISerializer { - - private static final long serialVersionUID = 1L; - - private final String columnSeparator; - - public StarRocksCsvSerializer(String sp) { - this.columnSeparator = StarRocksDelimiterParser.parse(sp, "\t"); - } - - @Override - public String serialize(Record row) { - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < row.getColumnNumber(); i++) { - String value = fieldConvertion(row.getColumn(i)); - sb.append(null == value ? "\\N" : value); - if (i < row.getColumnNumber() - 1) { - sb.append(columnSeparator); - } - } - return sb.toString(); - } - -} diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksDelimiterParser.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksDelimiterParser.java deleted file mode 100644 index 04301e0f13..0000000000 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksDelimiterParser.java +++ /dev/null @@ -1,55 +0,0 @@ -package com.starrocks.connector.datax.plugin.writer.starrockswriter.row; - -import java.io.StringWriter; - -import com.google.common.base.Strings; - -public class StarRocksDelimiterParser { - - private static final String HEX_STRING = "0123456789ABCDEF"; - - public static String parse(String sp, String dSp) throws RuntimeException { - if (Strings.isNullOrEmpty(sp)) { - return dSp; - } - if (!sp.toUpperCase().startsWith("\\X")) { - return sp; - } - String hexStr = sp.substring(2); - // check hex str - if (hexStr.isEmpty()) { - throw new RuntimeException("Failed to parse delimiter: `Hex str is empty`"); - } - if (hexStr.length() % 2 != 0) { - throw new RuntimeException("Failed to parse delimiter: `Hex str length error`"); - } - for (char hexChar : hexStr.toUpperCase().toCharArray()) { - if (HEX_STRING.indexOf(hexChar) == -1) { - throw new RuntimeException("Failed to parse delimiter: `Hex str format error`"); - } - } - // transform to separator - StringWriter writer = new StringWriter(); - for (byte b : hexStrToBytes(hexStr)) { - writer.append((char) b); - } - return writer.toString(); - } - - private static byte[] hexStrToBytes(String hexStr) { - String upperHexStr = hexStr.toUpperCase(); - int length = upperHexStr.length() / 2; - char[] hexChars = upperHexStr.toCharArray(); - byte[] bytes = new byte[length]; - for (int i = 0; i < length; i++) { - int pos = i * 2; - bytes[i] = (byte) (charToByte(hexChars[pos]) << 4 | charToByte(hexChars[pos + 1])); - } - return bytes; - } - - private static byte charToByte(char c) { - return (byte) HEX_STRING.indexOf(c); - } - -} diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksISerializer.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksISerializer.java deleted file mode 100644 index 7bcb89739c..0000000000 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksISerializer.java +++ /dev/null @@ -1,11 +0,0 @@ -package com.starrocks.connector.datax.plugin.writer.starrockswriter.row; - -import java.io.Serializable; - -import com.alibaba.datax.common.element.Record; - -public interface StarRocksISerializer extends Serializable { - - String serialize(Record row); - -} diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksJsonSerializer.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksJsonSerializer.java deleted file mode 100644 index 60faa1be63..0000000000 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksJsonSerializer.java +++ /dev/null @@ -1,34 +0,0 @@ -package com.starrocks.connector.datax.plugin.writer.starrockswriter.row; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import com.alibaba.datax.common.element.Record; -import com.alibaba.fastjson.JSON; - -public class StarRocksJsonSerializer extends StarRocksBaseSerializer implements StarRocksISerializer { - - private static final long serialVersionUID = 1L; - - private final List fieldNames; - - public StarRocksJsonSerializer(List fieldNames) { - this.fieldNames = fieldNames; - } - - @Override - public String serialize(Record row) { - if (null == fieldNames) { - return ""; - } - Map rowMap = new HashMap<>(fieldNames.size()); - int idx = 0; - for (String fieldName : fieldNames) { - rowMap.put(fieldName, fieldConvertion(row.getColumn(idx))); - idx++; - } - return JSON.toJSONString(rowMap); - } - -} diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksSerializerFactory.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksSerializerFactory.java deleted file mode 100644 index 85f446cd93..0000000000 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/row/StarRocksSerializerFactory.java +++ /dev/null @@ -1,22 +0,0 @@ -package com.starrocks.connector.datax.plugin.writer.starrockswriter.row; - -import java.util.Map; - -import com.starrocks.connector.datax.plugin.writer.starrockswriter.StarRocksWriterOptions; - -public class StarRocksSerializerFactory { - - private StarRocksSerializerFactory() {} - - public static StarRocksISerializer createSerializer(StarRocksWriterOptions writerOptions) { - if (StarRocksWriterOptions.StreamLoadFormat.CSV.equals(writerOptions.getStreamLoadFormat())) { - Map props = writerOptions.getLoadProps(); - return new StarRocksCsvSerializer(null == props || !props.containsKey("column_separator") ? null : String.valueOf(props.get("column_separator"))); - } - if (StarRocksWriterOptions.StreamLoadFormat.JSON.equals(writerOptions.getStreamLoadFormat())) { - return new StarRocksJsonSerializer(writerOptions.getColumns()); - } - throw new RuntimeException("Failed to create row serializer, unsupported `format` from stream load properties."); - } - -} diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/util/StarRocksWriterUtil.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/util/StarRocksWriterUtil.java deleted file mode 100755 index c3b5d8d1d0..0000000000 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/doriswriter/util/StarRocksWriterUtil.java +++ /dev/null @@ -1,83 +0,0 @@ -package com.starrocks.connector.datax.plugin.writer.starrockswriter.util; - -import com.alibaba.datax.plugin.rdbms.util.DBUtil; -import com.alibaba.datax.plugin.rdbms.util.DataBaseType; -import com.alibaba.datax.plugin.rdbms.util.RdbmsException; -import com.alibaba.datax.plugin.rdbms.writer.Constant; -import com.alibaba.druid.sql.parser.ParserException; -import com.starrocks.connector.datax.plugin.writer.starrockswriter.StarRocksWriterOptions; -import com.google.common.base.Strings; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.sql.Connection; -import java.sql.Statement; -import java.util.*; - -public final class StarRocksWriterUtil { - private static final Logger LOG = LoggerFactory.getLogger(StarRocksWriterUtil.class); - - private StarRocksWriterUtil() {} - - public static List renderPreOrPostSqls(List preOrPostSqls, String tableName) { - if (null == preOrPostSqls) { - return Collections.emptyList(); - } - List renderedSqls = new ArrayList<>(); - for (String sql : preOrPostSqls) { - if (!Strings.isNullOrEmpty(sql)) { - renderedSqls.add(sql.replace(Constant.TABLE_NAME_PLACEHOLDER, tableName)); - } - } - return renderedSqls; - } - - public static void executeSqls(Connection conn, List sqls) { - Statement stmt = null; - String currentSql = null; - try { - stmt = conn.createStatement(); - for (String sql : sqls) { - currentSql = sql; - DBUtil.executeSqlWithoutResultSet(stmt, sql); - } - } catch (Exception e) { - throw RdbmsException.asQueryException(DataBaseType.MySql, e, currentSql, null, null); - } finally { - DBUtil.closeDBResources(null, stmt, null); - } - } - - public static void preCheckPrePareSQL(StarRocksWriterOptions options) { - String table = options.getTable(); - List preSqls = options.getPreSqlList(); - List renderedPreSqls = StarRocksWriterUtil.renderPreOrPostSqls(preSqls, table); - if (null != renderedPreSqls && !renderedPreSqls.isEmpty()) { - LOG.info("Begin to preCheck preSqls:[{}].", String.join(";", renderedPreSqls)); - for (String sql : renderedPreSqls) { - try { - DBUtil.sqlValid(sql, DataBaseType.MySql); - } catch (ParserException e) { - throw RdbmsException.asPreSQLParserException(DataBaseType.MySql,e,sql); - } - } - } - } - - public static void preCheckPostSQL(StarRocksWriterOptions options) { - String table = options.getTable(); - List postSqls = options.getPostSqlList(); - List renderedPostSqls = StarRocksWriterUtil.renderPreOrPostSqls(postSqls, table); - if (null != renderedPostSqls && !renderedPostSqls.isEmpty()) { - LOG.info("Begin to preCheck postSqls:[{}].", String.join(";", renderedPostSqls)); - for(String sql : renderedPostSqls) { - try { - DBUtil.sqlValid(sql, DataBaseType.MySql); - } catch (ParserException e){ - throw RdbmsException.asPostSQLParserException(DataBaseType.MySql,e,sql); - } - } - } - } -} From 1e03c200f71ea3f042d9b69f1cb1fa511fe6062d Mon Sep 17 00:00:00 2001 From: fariel Date: Wed, 1 Sep 2021 20:37:00 +0800 Subject: [PATCH 23/50] support wildcard column --- build.sh | 1 + .../starrockswriter/StarRocksWriter.java | 5 +++++ .../StarRocksWriterOptions.java | 20 ++++++++++++++++++- .../util/StarRocksWriterUtil.java | 19 ++++++++++++++++++ 4 files changed, 44 insertions(+), 1 deletion(-) create mode 100755 build.sh diff --git a/build.sh b/build.sh new file mode 100755 index 0000000000..e3992acba1 --- /dev/null +++ b/build.sh @@ -0,0 +1 @@ +mvn -U -pl starrockswriter -am clean package assembly:assembly -Dmaven.test.skip=true diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriter.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriter.java index 666a99d962..9d8fdf15ef 100755 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriter.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriter.java @@ -93,6 +93,11 @@ public static class Task extends Writer.Task { @Override public void init() { options = new StarRocksWriterOptions(super.getPluginJobConf()); + if (options.isWildcardColumn()) { + Connection conn = DBUtil.getConnection(DataBaseType.MySql, options.getJdbcUrl(), options.getUsername(), options.getPassword()); + List columns = StarRocksWriterUtil.getStarRocksColumns(conn, options.getDatabase(), options.getTable()); + options.setInfoCchemaColumns(columns); + } writerManager = new StarRocksWriterManager(options); rowSerializer = StarRocksSerializerFactory.createSerializer(options); } diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriterOptions.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriterOptions.java index 5180512f59..9e4abd1248 100644 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriterOptions.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriterOptions.java @@ -39,9 +39,16 @@ public enum StreamLoadFormat { private static final String KEY_LOAD_PROPS = "loadProps"; private final Configuration options; + private List infoCchemaColumns; + private List userSetColumns; + private boolean isWildcardColumn; public StarRocksWriterOptions(Configuration options) { this.options = options; + this.userSetColumns = options.getList(KEY_COLUMN, String.class).stream().map(str -> str.replace("`", "")).collect(Collectors.toList()); + if (1 == options.getList(KEY_COLUMN, String.class).size() && "*".trim().equals(options.getList(KEY_COLUMN, String.class).get(0))) { + this.isWildcardColumn = true; + } } public void doPretreatment() { @@ -74,7 +81,18 @@ public List getLoadUrlList() { } public List getColumns() { - return options.getList(KEY_COLUMN, String.class).stream().map(str -> str.replace("`", "")).collect(Collectors.toList()); + if (isWildcardColumn) { + return this.infoCchemaColumns; + } + return this.userSetColumns; + } + + public boolean isWildcardColumn() { + return this.isWildcardColumn; + } + + public void setInfoCchemaColumns(List cols) { + this.infoCchemaColumns = cols; } public List getPreSqlList() { diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/util/StarRocksWriterUtil.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/util/StarRocksWriterUtil.java index c3b5d8d1d0..8de4ad60be 100755 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/util/StarRocksWriterUtil.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/util/StarRocksWriterUtil.java @@ -12,6 +12,7 @@ import org.slf4j.LoggerFactory; import java.sql.Connection; +import java.sql.ResultSet; import java.sql.Statement; import java.util.*; @@ -20,6 +21,24 @@ public final class StarRocksWriterUtil { private StarRocksWriterUtil() {} + public static List getStarRocksColumns(Connection conn, String databaseName, String tableName) { + String currentSql = String.format("SELECT COLUMN_NAME FROM `information_schema`.`COLUMNS` WHERE `TABLE_SCHEMA` = '%s' AND `TABLE_NAME` = '%s' ORDER BY `ORDINAL_POSITION` ASC;", databaseName, tableName); + List columns = new ArrayList<>(); + ResultSet rs = null; + try { + rs = DBUtil.query(conn, currentSql); + while (DBUtil.asyncResultSetNext(rs)) { + String colName = rs.getString("COLUMN_NAME"); + columns.add(colName); + } + return columns; + } catch (Exception e) { + throw RdbmsException.asQueryException(DataBaseType.MySql, e, currentSql, null, null); + } finally { + DBUtil.closeDBResources(rs, null, null); + } + } + public static List renderPreOrPostSqls(List preOrPostSqls, String tableName) { if (null == preOrPostSqls) { return Collections.emptyList(); From 16a61519191bf73ea82552d0ceb4658138ac9143 Mon Sep 17 00:00:00 2001 From: fariel Date: Thu, 9 Sep 2021 15:30:01 +0800 Subject: [PATCH 24/50] fix npe --- .../starrockswriter/manager/StarRocksStreamLoadVisitor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java index 15e93e87e1..a9d1062158 100644 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java @@ -96,7 +96,7 @@ private boolean tryHttpConnection(String host) { private byte[] joinRows(List rows, int totalBytes) { if (StarRocksWriterOptions.StreamLoadFormat.CSV.equals(writerOptions.getStreamLoadFormat())) { Map props = writerOptions.getLoadProps(); - byte[] lineDelimiter = (props.containsKey("row_delimiter") ? StarRocksDelimiterParser.parse(String.valueOf(props.get("row_delimiter")), "\n") : "\n").getBytes(StandardCharsets.UTF_8); + byte[] lineDelimiter = StarRocksDelimiterParser.parse((String)props.get("row_delimiter"), "\n").getBytes(StandardCharsets.UTF_8); ByteBuffer bos = ByteBuffer.allocate(totalBytes + rows.size() * lineDelimiter.length); for (String row : rows) { bos.put(row.getBytes(StandardCharsets.UTF_8)); From b725a83ac13c17e082726267b0cc068761cd55da Mon Sep 17 00:00:00 2001 From: fariel Date: Fri, 10 Sep 2021 15:10:13 +0800 Subject: [PATCH 25/50] bumping version to 1.1.0 --- README.md | 157 ++-------------------------------------- build.sh | 4 + starrockswriter/pom.xml | 1 + 3 files changed, 11 insertions(+), 151 deletions(-) diff --git a/README.md b/README.md index 7fb4c5eb39..9b271464ec 100644 --- a/README.md +++ b/README.md @@ -1,154 +1,9 @@ -![Datax-logo](https://github.com/alibaba/DataX/blob/master/images/DataX-logo.jpg) +# StarRocks writer for DataX +This is a repo forked from [DataX](`https://github.com/alibaba/DataX`), and maintained by [StarRocks](https://www.dorisdb.com) (starrockswriter). -# DataX - -DataX 是阿里巴巴集团内被广泛使用的离线数据同步工具/平台,实现包括 MySQL、Oracle、SqlServer、Postgre、HDFS、Hive、ADS、HBase、TableStore(OTS)、MaxCompute(ODPS)、DRDS 等各种异构数据源之间高效的数据同步功能。 - -# DataX 商业版本 -阿里云DataWorks数据集成是DataX团队在阿里云上的商业化产品,致力于提供复杂网络环境下、丰富的异构数据源之间高速稳定的数据移动能力,以及繁杂业务背景下的数据同步解决方案。目前已经支持云上近3000家客户,单日同步数据超过3万亿条。DataWorks数据集成目前支持离线50+种数据源,可以进行整库迁移、批量上云、增量同步、分库分表等各类同步解决方案。2020年更新实时同步能力,2020年更新实时同步能力,支持10+种数据源的读写任意组合。提供MySQL,Oracle等多种数据源到阿里云MaxCompute,Hologres等大数据引擎的一键全增量同步解决方案。 - -https://www.aliyun.com/product/bigdata/ide - - -# Features - -DataX本身作为数据同步框架,将不同数据源的同步抽象为从源头数据源读取数据的Reader插件,以及向目标端写入数据的Writer插件,理论上DataX框架可以支持任意数据源类型的数据同步工作。同时DataX插件体系作为一套生态系统, 每接入一套新数据源该新加入的数据源即可实现和现有的数据源互通。 - - - -# DataX详细介绍 - -##### 请参考:[DataX-Introduction](https://github.com/alibaba/DataX/blob/master/introduction.md) - - - -# Quick Start - -##### Download [DataX下载地址](http://datax-opensource.oss-cn-hangzhou.aliyuncs.com/datax.tar.gz) - -##### 请点击:[Quick Start](https://github.com/alibaba/DataX/blob/master/userGuid.md) - - - -# Support Data Channels - -DataX目前已经有了比较全面的插件体系,主流的RDBMS数据库、NOSQL、大数据计算系统都已经接入,目前支持数据如下图,详情请点击:[DataX数据源参考指南](https://github.com/alibaba/DataX/wiki/DataX-all-data-channels) - -| 类型 | 数据源 | Reader(读) | Writer(写) |文档| -| ------------ | ---------- | :-------: | :-------: |:-------: | -| RDBMS 关系型数据库 | MySQL | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/mysqlreader/doc/mysqlreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/mysqlwriter/doc/mysqlwriter.md)| -|             | Oracle     |     √     |     √     |[读](https://github.com/alibaba/DataX/blob/master/oraclereader/doc/oraclereader.md) 、[写](https://github.com/alibaba/DataX/blob/master/oraclewriter/doc/oraclewriter.md)| -| | SQLServer | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/sqlserverreader/doc/sqlserverreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/sqlserverwriter/doc/sqlserverwriter.md)| -| | PostgreSQL | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/postgresqlreader/doc/postgresqlreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/postgresqlwriter/doc/postgresqlwriter.md)| -| | DRDS | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/drdsreader/doc/drdsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/drdswriter/doc/drdswriter.md)| -| | 通用RDBMS(支持所有关系型数据库) | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/rdbmsreader/doc/rdbmsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/rdbmswriter/doc/rdbmswriter.md)| -| 阿里云数仓数据存储 | ODPS | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/odpsreader/doc/odpsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/odpswriter/doc/odpswriter.md)| -| | ADS | | √ |[写](https://github.com/alibaba/DataX/blob/master/adswriter/doc/adswriter.md)| -| | OSS | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/ossreader/doc/ossreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/osswriter/doc/osswriter.md)| -| | OCS | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/ocsreader/doc/ocsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/ocswriter/doc/ocswriter.md)| -| NoSQL数据存储 | OTS | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/otsreader/doc/otsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/otswriter/doc/otswriter.md)| -| | Hbase0.94 | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/hbase094xreader/doc/hbase094xreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hbase094xwriter/doc/hbase094xwriter.md)| -| | Hbase1.1 | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/hbase11xreader/doc/hbase11xreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hbase11xwriter/doc/hbase11xwriter.md)| -| | Phoenix4.x | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/hbase11xsqlreader/doc/hbase11xsqlreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hbase11xsqlwriter/doc/hbase11xsqlwriter.md)| -| | Phoenix5.x | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/hbase20xsqlreader/doc/hbase20xsqlreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hbase20xsqlwriter/doc/hbase20xsqlwriter.md)| -| | MongoDB | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/mongodbreader/doc/mongodbreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/mongodbwriter/doc/mongodbwriter.md)| -| | Hive | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/hdfsreader/doc/hdfsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hdfswriter/doc/hdfswriter.md)| -| | Cassandra | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/cassandrareader/doc/cassandrareader.md) 、[写](https://github.com/alibaba/DataX/blob/master/cassandrawriter/doc/cassandrawriter.md)| -| 无结构化数据存储 | TxtFile | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/txtfilereader/doc/txtfilereader.md) 、[写](https://github.com/alibaba/DataX/blob/master/txtfilewriter/doc/txtfilewriter.md)| -| | FTP | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/ftpreader/doc/ftpreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/ftpwriter/doc/ftpwriter.md)| -| | HDFS | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/hdfsreader/doc/hdfsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hdfswriter/doc/hdfswriter.md)| -| | Elasticsearch | | √ |[写](https://github.com/alibaba/DataX/blob/master/elasticsearchwriter/doc/elasticsearchwriter.md)| -| 时间序列数据库 | OpenTSDB | √ | |[读](https://github.com/alibaba/DataX/blob/master/opentsdbreader/doc/opentsdbreader.md)| -| | TSDB | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/tsdbreader/doc/tsdbreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/tsdbwriter/doc/tsdbhttpwriter.md)| - -# 阿里云DataWorks数据集成 - -目前DataX的已有能力已经全部融和进阿里云的数据集成,并且比DataX更加高效、安全,同时数据集成具备DataX不具备的其它高级特性和功能。可以理解为数据集成是DataX的全面升级的商业化用版本,为企业可以提供稳定、可靠、安全的数据传输服务。与DataX相比,数据集成主要有以下几大突出特点: - -支持实时同步: - -- 功能简介:https://help.aliyun.com/document_detail/181912.html -- 支持的数据源:https://help.aliyun.com/document_detail/146778.html -- 支持数据处理:https://help.aliyun.com/document_detail/146777.html - -离线同步数据源种类大幅度扩充: - -- 新增比如:DB2、Kafka、Hologres、MetaQ、SAPHANA、达梦等等,持续扩充中 -- 离线同步支持的数据源:https://help.aliyun.com/document_detail/137670.html -- 具备同步解决方案: - - 解决方案系统:https://help.aliyun.com/document_detail/171765.html - - 一键全增量:https://help.aliyun.com/document_detail/175676.html - - 整库迁移:https://help.aliyun.com/document_detail/137809.html - - 批量上云:https://help.aliyun.com/document_detail/146671.html - - 更新更多能力请访问:https://help.aliyun.com/document_detail/137663.html - - -# 我要开发新的插件 - -请点击:[DataX插件开发宝典](https://github.com/alibaba/DataX/blob/master/dataxPluginDev.md) - - -# 项目成员 - -核心Contributions: 言柏 、枕水、秋奇、青砾、一斅、云时 - -感谢天烬、光戈、祁然、巴真、静行对DataX做出的贡献。 - -# License - -This software is free to use under the Apache License [Apache license](https://github.com/alibaba/DataX/blob/master/license.txt). - -# -请及时提出issue给我们。请前往:[DataxIssue](https://github.com/alibaba/DataX/issues) - -# 开源版DataX企业用户 - -![Datax-logo](https://github.com/alibaba/DataX/blob/master/images/datax-enterprise-users.jpg) - -``` -长期招聘 联系邮箱:datax@alibabacloud.com -【JAVA开发职位】 -职位名称:JAVA资深开发工程师/专家/高级专家 -工作年限 : 2年以上 -学历要求 : 本科(如果能力靠谱,这些都不是条件) -期望层级 : P6/P7/P8 - -岗位描述: - 1. 负责阿里云大数据平台(数加)的开发设计。 - 2. 负责面向政企客户的大数据相关产品开发; - 3. 利用大规模机器学习算法挖掘数据之间的联系,探索数据挖掘技术在实际场景中的产品应用 ; - 4. 一站式大数据开发平台 - 5. 大数据任务调度引擎 - 6. 任务执行引擎 - 7. 任务监控告警 - 8. 海量异构数据同步 - -岗位要求: - 1. 拥有3年以上JAVA Web开发经验; - 2. 熟悉Java的基础技术体系。包括JVM、类装载、线程、并发、IO资源管理、网络; - 3. 熟练使用常用Java技术框架、对新技术框架有敏锐感知能力;深刻理解面向对象、设计原则、封装抽象; - 4. 熟悉HTML/HTML5和JavaScript;熟悉SQL语言; - 5. 执行力强,具有优秀的团队合作精神、敬业精神; - 6. 深刻理解设计模式及应用场景者加分; - 7. 具有较强的问题分析和处理能力、比较强的动手能力,对技术有强烈追求者优先考虑; - 8. 对高并发、高稳定可用性、高性能、大数据处理有过实际项目及产品经验者优先考虑; - 9. 有大数据产品、云产品、中间件技术解决方案者优先考虑。 -```` -钉钉用户群: - -- DataX开源用户交流群 - - - -- DataX开源用户交流群2 - - - -- DataX开源用户交流群3 - - - -- DataX开源用户交流群4 - - - -- DataX开源用户交流群5 - - +## How to use +1. Run `./build.sh` to gennerate the `starrockswriter.tar.gz`, then untar it into your own [DataX release](`https://github.com/alibaba/DataX`) directory(which will be `datax/plugin/writer/`). +2. Create a `job.json` to define the reader and writer. More details about the configurations, please refer to `https://docs.dorisdb.com`. +3. Run `python datax/bin/datax.py --jvm="-Xms6G -Xmx6G" --loglevel=debug job.json` to start a job. diff --git a/build.sh b/build.sh index e3992acba1..e560ac8d22 100755 --- a/build.sh +++ b/build.sh @@ -1 +1,5 @@ +ShellDir="$( cd "$( dirname "$0" )" && pwd )" +cd $ShellDir mvn -U -pl starrockswriter -am clean package assembly:assembly -Dmaven.test.skip=true +rm -f starrockswriter.tar.gz +tar -czvf starrockswriter.tar.gz target/datax/datax/plugin/writer/starrockswriter diff --git a/starrockswriter/pom.xml b/starrockswriter/pom.xml index f36fc4b578..9398b9ff19 100755 --- a/starrockswriter/pom.xml +++ b/starrockswriter/pom.xml @@ -8,6 +8,7 @@ starrockswriter starrockswriter + 1.1.0 jar From 014b240a1ff9710d018b549fca30c49907defe05 Mon Sep 17 00:00:00 2001 From: fariel Date: Fri, 10 Sep 2021 15:16:51 +0800 Subject: [PATCH 26/50] optimize build script --- build.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/build.sh b/build.sh index e560ac8d22..1dd4fac6c1 100755 --- a/build.sh +++ b/build.sh @@ -2,4 +2,5 @@ ShellDir="$( cd "$( dirname "$0" )" && pwd )" cd $ShellDir mvn -U -pl starrockswriter -am clean package assembly:assembly -Dmaven.test.skip=true rm -f starrockswriter.tar.gz -tar -czvf starrockswriter.tar.gz target/datax/datax/plugin/writer/starrockswriter +cd target/datax/datax/plugin/writer/ +tar -czvf starrockswriter.tar.gz starrockswriter From 629fa484120574dc6521e288c37826d0cf7f2c49 Mon Sep 17 00:00:00 2001 From: fariel Date: Fri, 10 Sep 2021 15:18:38 +0800 Subject: [PATCH 27/50] build starrockswriter.tar.gz --- build.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/build.sh b/build.sh index 1dd4fac6c1..609aa2bb12 100755 --- a/build.sh +++ b/build.sh @@ -4,3 +4,4 @@ mvn -U -pl starrockswriter -am clean package assembly:assembly -Dmaven.test.skip rm -f starrockswriter.tar.gz cd target/datax/datax/plugin/writer/ tar -czvf starrockswriter.tar.gz starrockswriter +mv starrockswriter.tar.gz $ShellDir From e41e89330b7c19cf6ec634d4619756482f3319d4 Mon Sep 17 00:00:00 2001 From: fariel Date: Fri, 10 Sep 2021 15:21:44 +0800 Subject: [PATCH 28/50] modify readme --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 9b271464ec..2280a13f2f 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ # StarRocks writer for DataX -This is a repo forked from [DataX](`https://github.com/alibaba/DataX`), and maintained by [StarRocks](https://www.dorisdb.com) (starrockswriter). +This is a repo forked from [DataX](https://github.com/alibaba/DataX), and maintained by [StarRocks](https://www.dorisdb.com) (starrockswriter). ## How to use -1. Run `./build.sh` to gennerate the `starrockswriter.tar.gz`, then untar it into your own [DataX release](`https://github.com/alibaba/DataX`) directory(which will be `datax/plugin/writer/`). +1. Run `./build.sh` to gennerate the `starrockswriter.tar.gz`, then untar it into your own [DataX release](https://github.com/alibaba/DataX) directory(which will be `datax/plugin/writer/`). 2. Create a `job.json` to define the reader and writer. More details about the configurations, please refer to `https://docs.dorisdb.com`. 3. Run `python datax/bin/datax.py --jvm="-Xms6G -Xmx6G" --loglevel=debug job.json` to start a job. From 1ab7dde5d28de9b3017aec1126be1b3a5fed43f4 Mon Sep 17 00:00:00 2001 From: hffariel Date: Wed, 15 Sep 2021 14:31:01 +0800 Subject: [PATCH 29/50] modify README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2280a13f2f..0ff5038715 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ # StarRocks writer for DataX -This is a repo forked from [DataX](https://github.com/alibaba/DataX), and maintained by [StarRocks](https://www.dorisdb.com) (starrockswriter). +This is a repo forked from [DataX](https://github.com/alibaba/DataX), and maintained by [StarRocks](https://www.starrocks.com) (starrockswriter). ## How to use 1. Run `./build.sh` to gennerate the `starrockswriter.tar.gz`, then untar it into your own [DataX release](https://github.com/alibaba/DataX) directory(which will be `datax/plugin/writer/`). -2. Create a `job.json` to define the reader and writer. More details about the configurations, please refer to `https://docs.dorisdb.com`. +2. Create a `job.json` to define the reader and writer. More details about the configurations, please refer to the [Documentations](https://docs.starrocks.com). 3. Run `python datax/bin/datax.py --jvm="-Xms6G -Xmx6G" --loglevel=debug job.json` to start a job. From 39c7065526725f7994f8c55d364362840b7af876 Mon Sep 17 00:00:00 2001 From: hffariel Date: Wed, 15 Sep 2021 14:55:30 +0800 Subject: [PATCH 30/50] fix build issue --- starrockswriter/src/main/assembly/package.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/starrockswriter/src/main/assembly/package.xml b/starrockswriter/src/main/assembly/package.xml index 79ca7be84a..c63845b471 100755 --- a/starrockswriter/src/main/assembly/package.xml +++ b/starrockswriter/src/main/assembly/package.xml @@ -19,7 +19,7 @@ target/ - starrockswriter-0.0.1-SNAPSHOT.jar + starrockswriter-1.1.0.jar plugin/writer/starrockswriter From 9e9fd10b445594d935e618676897ef5f2f71e7c1 Mon Sep 17 00:00:00 2001 From: hffariel Date: Wed, 15 Sep 2021 19:58:29 +0800 Subject: [PATCH 31/50] fix charset to utf-8 --- .../starrockswriter/manager/StarRocksStreamLoadVisitor.java | 2 +- .../writer/starrockswriter/manager/StarRocksWriterManager.java | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java index a9d1062158..aecac149f2 100644 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java @@ -168,7 +168,7 @@ protected boolean isRedirectable(String method) { private String getBasicAuthHeader(String username, String password) { String auth = username + ":" + password; - byte[] encodedAuth = Base64.encodeBase64(auth.getBytes()); + byte[] encodedAuth = Base64.encodeBase64(auth.getBytes(StandardCharsets.UTF_8)); return new StringBuilder("Basic ").append(new String(encodedAuth)).toString(); } diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java index e523442597..0f68afe97b 100644 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java @@ -4,6 +4,7 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; import java.util.UUID; @@ -38,7 +39,7 @@ public final synchronized void writeRecord(String record) throws IOException { try { buffer.add(record); batchCount++; - batchSize += record.getBytes().length; + batchSize += record.getBytes(StandardCharsets.UTF_8).length; if (batchCount >= writerOptions.getBatchRows() || batchSize >= writerOptions.getBatchSize()) { String label = createBatchLabel(); LOG.debug(String.format("StarRocks buffer Sinking triggered: rows[%d] label[%s].", batchCount, label)); From 8fd01d22ca4d4c0f2598d775b7abbd36ef4009b2 Mon Sep 17 00:00:00 2001 From: hffariel Date: Thu, 16 Sep 2021 16:38:24 +0800 Subject: [PATCH 32/50] remove aliyun mirror to do the auto releasing --- pom.xml | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/pom.xml b/pom.xml index 5f194b06ed..7d03f13285 100644 --- a/pom.xml +++ b/pom.xml @@ -183,34 +183,6 @@ - - - central - Nexus aliyun - https://maven.aliyun.com/repository/central - - true - - - true - - - - - - - central - Nexus aliyun - https://maven.aliyun.com/repository/central - - true - - - true - - - - From e7d879185b059a6bf03d19c2bca67e8516fe5680 Mon Sep 17 00:00:00 2001 From: hffariel Date: Fri, 17 Sep 2021 19:08:21 +0800 Subject: [PATCH 33/50] add auto-release workflow --- .github/workflows/release.yml | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000000..27435856b6 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,30 @@ +name: Release + +on: + push: + tags: + - "v*.*.*" + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Set up Maven Central Repository + uses: actions/setup-java@v2 + with: + java-version: '8' + distribution: 'adopt' + - name: Checkout + uses: actions/checkout@v2 + with: + fetch-depth: 0 + - run: | + git checkout main + mvn -U -pl starrockswriter -am clean package assembly:assembly -Dmaven.test.skip=true + cd target/datax/datax/plugin/writer/ + tar -czvf starrockswriter.tar.gz starrockswriter + - name: Release + uses: softprops/action-gh-release@v1 + with: + files: | + target/datax/datax/plugin/writer/starrockswriter.tar.gz From fe43e20ea92a03b3b50f98068407446b81d1cdc0 Mon Sep 17 00:00:00 2001 From: hffariel Date: Thu, 23 Sep 2021 17:15:14 +0800 Subject: [PATCH 34/50] reduce the getBytes operations --- .../starrockswriter/manager/StarRocksFlushTuple.java | 6 +++--- .../manager/StarRocksStreamLoadVisitor.java | 10 +++++----- .../manager/StarRocksWriterManager.java | 7 ++++--- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksFlushTuple.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksFlushTuple.java index cd8c663b18..47cebb912d 100644 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksFlushTuple.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksFlushTuple.java @@ -6,9 +6,9 @@ public class StarRocksFlushTuple { private String label; private Long bytes; - private List rows; + private List rows; - public StarRocksFlushTuple(String label, Long bytes, List rows) { + public StarRocksFlushTuple(String label, Long bytes, List rows) { this.label = label; this.bytes = bytes; this.rows = rows; @@ -16,5 +16,5 @@ public StarRocksFlushTuple(String label, Long bytes, List rows) { public String getLabel() { return label; } public Long getBytes() { return bytes; } - public List getRows() { return rows; } + public List getRows() { return rows; } } \ No newline at end of file diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java index aecac149f2..656f402096 100644 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java @@ -93,13 +93,13 @@ private boolean tryHttpConnection(String host) { } } - private byte[] joinRows(List rows, int totalBytes) { + private byte[] joinRows(List rows, int totalBytes) { if (StarRocksWriterOptions.StreamLoadFormat.CSV.equals(writerOptions.getStreamLoadFormat())) { Map props = writerOptions.getLoadProps(); byte[] lineDelimiter = StarRocksDelimiterParser.parse((String)props.get("row_delimiter"), "\n").getBytes(StandardCharsets.UTF_8); ByteBuffer bos = ByteBuffer.allocate(totalBytes + rows.size() * lineDelimiter.length); - for (String row : rows) { - bos.put(row.getBytes(StandardCharsets.UTF_8)); + for (byte[] row : rows) { + bos.put(row); bos.put(lineDelimiter); } return bos.array(); @@ -110,11 +110,11 @@ private byte[] joinRows(List rows, int totalBytes) { bos.put("[".getBytes(StandardCharsets.UTF_8)); byte[] jsonDelimiter = ",".getBytes(StandardCharsets.UTF_8); boolean isFirstElement = true; - for (String row : rows) { + for (byte[] row : rows) { if (!isFirstElement) { bos.put(jsonDelimiter); } - bos.put(row.getBytes(StandardCharsets.UTF_8)); + bos.put(row); isFirstElement = false; } bos.put("]".getBytes(StandardCharsets.UTF_8)); diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java index 0f68afe97b..0e89005f1d 100644 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java @@ -20,7 +20,7 @@ public class StarRocksWriterManager { private final StarRocksStreamLoadVisitor starrocksStreamLoadVisitor; private final StarRocksWriterOptions writerOptions; - private final List buffer = new ArrayList<>(); + private final List buffer = new ArrayList<>(); private int batchCount = 0; private long batchSize = 0; private volatile boolean closed = false; @@ -37,9 +37,10 @@ public StarRocksWriterManager(StarRocksWriterOptions writerOptions) { public final synchronized void writeRecord(String record) throws IOException { checkFlushException(); try { - buffer.add(record); + byte[] bts = record.getBytes(StandardCharsets.UTF_8); + buffer.add(bts); batchCount++; - batchSize += record.getBytes(StandardCharsets.UTF_8).length; + batchSize += bts.length; if (batchCount >= writerOptions.getBatchRows() || batchSize >= writerOptions.getBatchSize()) { String label = createBatchLabel(); LOG.debug(String.format("StarRocks buffer Sinking triggered: rows[%d] label[%s].", batchCount, label)); From cbc93b98ff8d3ab63c37659a590b8f8759d6a028 Mon Sep 17 00:00:00 2001 From: hffariel Date: Fri, 24 Sep 2021 11:34:55 +0800 Subject: [PATCH 35/50] add interval sinking --- starrockswriter/doc/starrockswriter.md | 8 ++++ .../StarRocksWriterOptions.java | 7 ++++ .../manager/StarRocksWriterManager.java | 39 +++++++++++++++++++ 3 files changed, 54 insertions(+) diff --git a/starrockswriter/doc/starrockswriter.md b/starrockswriter/doc/starrockswriter.md index f471b75212..1080d072cb 100644 --- a/starrockswriter/doc/starrockswriter.md +++ b/starrockswriter/doc/starrockswriter.md @@ -173,6 +173,14 @@ StarRocksWriter 插件实现了写入数据到 StarRocks 主库的目的表的 * 默认值:104857600 (100M) +* **flushInterval** + + * 描述:上一次StreamLoad结束至下一次开始的时间间隔(单位:ms)。
+ + * 必选:否
+ + * 默认值:300000 (ms) + * **loadProps** * 描述:StreamLoad 的请求参数,详情参照StreamLoad介绍页面。
diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriterOptions.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriterOptions.java index 9e4abd1248..be3fa3ecb8 100644 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriterOptions.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriterOptions.java @@ -18,6 +18,7 @@ public class StarRocksWriterOptions implements Serializable { private static final int MAX_RETRIES = 1; private static final int BATCH_ROWS = 500000; private static final long BATCH_BYTES = 90 * MEGA_BYTES_SCALE; + private static final long FLUSH_INTERVAL = 300000; private static final String KEY_LOAD_PROPS_FORMAT = "format"; public enum StreamLoadFormat { @@ -34,6 +35,7 @@ public enum StreamLoadFormat { private static final String KEY_JDBC_URL = "jdbcUrl"; private static final String KEY_MAX_BATCH_ROWS = "maxBatchRows"; private static final String KEY_MAX_BATCH_SIZE = "maxBatchSize"; + private static final String KEY_FLUSH_INTERVAL = "flushInterval"; private static final String KEY_LOAD_URL = "loadUrl"; private static final String KEY_FLUSH_QUEUE_LENGTH = "flushQueueLength"; private static final String KEY_LOAD_PROPS = "loadProps"; @@ -120,6 +122,11 @@ public long getBatchSize() { Long size = options.getLong(KEY_MAX_BATCH_SIZE); return null == size ? BATCH_BYTES : size; } + + public long getFlushInterval() { + Long interval = options.getLong(KEY_FLUSH_INTERVAL); + return null == interval ? FLUSH_INTERVAL : interval; + } public int getFlushQueueLength() { Integer len = options.getInt(KEY_FLUSH_QUEUE_LENGTH); diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java index 0e89005f1d..f32eccdefe 100644 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java @@ -1,5 +1,6 @@ package com.starrocks.connector.datax.plugin.writer.starrockswriter.manager; +import org.apache.commons.lang3.concurrent.BasicThreadFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -8,7 +9,11 @@ import java.util.ArrayList; import java.util.List; import java.util.UUID; +import java.util.concurrent.Executors; import java.util.concurrent.LinkedBlockingDeque; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.TimeUnit; import com.google.common.base.Strings; import com.starrocks.connector.datax.plugin.writer.starrockswriter.StarRocksWriterOptions; @@ -26,14 +31,45 @@ public class StarRocksWriterManager { private volatile boolean closed = false; private volatile Exception flushException; private final LinkedBlockingDeque flushQueue; + private ScheduledExecutorService scheduler; + private ScheduledFuture scheduledFuture; public StarRocksWriterManager(StarRocksWriterOptions writerOptions) { this.writerOptions = writerOptions; this.starrocksStreamLoadVisitor = new StarRocksStreamLoadVisitor(writerOptions); flushQueue = new LinkedBlockingDeque<>(writerOptions.getFlushQueueLength()); + this.startScheduler(); this.startAsyncFlushing(); } + public void startScheduler() { + stopScheduler(); + this.scheduler = Executors.newScheduledThreadPool(1, new BasicThreadFactory.Builder().namingPattern("starrocks-interval-flush").daemon(true).build()); + this.scheduledFuture = this.scheduler.schedule(() -> { + synchronized (StarRocksWriterManager.this) { + if (!closed) { + try { + String label = createBatchLabel(); + LOG.info(String.format("StarRocks interval Sinking triggered: label[%s].", label)); + if (batchCount == 0) { + startScheduler(); + } + flush(label, false); + } catch (Exception e) { + flushException = e; + } + } + } + }, writerOptions.getFlushInterval(), TimeUnit.MILLISECONDS); + } + + public void stopScheduler() { + if (this.scheduledFuture != null) { + scheduledFuture.cancel(false); + this.scheduler.shutdown(); + } + } + public final synchronized void writeRecord(String record) throws IOException { checkFlushException(); try { @@ -109,6 +145,7 @@ private void waitAsyncFlushingDone() throws InterruptedException { for (int i = 0; i <= writerOptions.getFlushQueueLength(); i++) { flushQueue.put(new StarRocksFlushTuple("", 0l, null)); } + checkFlushException(); } private void asyncFlush() throws Exception { @@ -116,12 +153,14 @@ private void asyncFlush() throws Exception { if (Strings.isNullOrEmpty(flushData.getLabel())) { return; } + stopScheduler(); LOG.debug(String.format("Async stream load: rows[%d] bytes[%d] label[%s].", flushData.getRows().size(), flushData.getBytes(), flushData.getLabel())); for (int i = 0; i <= writerOptions.getMaxRetries(); i++) { try { // flush to StarRocks with stream load starrocksStreamLoadVisitor.doStreamLoad(flushData); LOG.info(String.format("Async stream load finished: label[%s].", flushData.getLabel())); + startScheduler(); break; } catch (Exception e) { LOG.warn("Failed to flush batch data to StarRocks, retry times = {}", i, e); From 4450ccadd88a92e66c3f3e763ebc729038690607 Mon Sep 17 00:00:00 2001 From: hffariel Date: Sat, 9 Oct 2021 18:38:33 +0800 Subject: [PATCH 36/50] add the convertion of `bytes` columns --- .../starrockswriter/StarRocksWriterOptions.java | 1 - .../starrockswriter/row/StarRocksBaseSerializer.java | 12 ++++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriterOptions.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriterOptions.java index be3fa3ecb8..8d5ede9831 100644 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriterOptions.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriterOptions.java @@ -158,7 +158,6 @@ private void validateStreamLoadUrl() { private void validateRequired() { final String[] requiredOptionKeys = new String[]{ KEY_USERNAME, - KEY_PASSWORD, KEY_DATABASE, KEY_TABLE, KEY_COLUMN, diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksBaseSerializer.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksBaseSerializer.java index 77d25f1235..a7ad499d55 100644 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksBaseSerializer.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksBaseSerializer.java @@ -6,13 +6,21 @@ public class StarRocksBaseSerializer { protected String fieldConvertion(Column col) { - if (null == col.getRawData()) { + if (null == col.getRawData() || Type.NULL == col.getType()) { return null; } if (Type.BOOL == col.getType()) { return String.valueOf(col.asLong()); } + if (Type.BYTES == col.getType()) { + byte[] bts = (byte[])col.getRawData(); + long value = 0; + for (int i = 0; i < bts.length; i++) { + value += (bts[bts.length - i - 1] & 0xffL) << (8 * i); + } + return String.valueOf(value); + } return col.asString(); } -} +} \ No newline at end of file From a3aa8e31eb72f19c15f8f2e419a19eadbc4c73af Mon Sep 17 00:00:00 2001 From: hffariel Date: Thu, 14 Oct 2021 21:48:44 +0800 Subject: [PATCH 37/50] optimize columns with backquotes --- .../starrockswriter/manager/StarRocksStreamLoadVisitor.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java index 656f402096..847635c828 100644 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java @@ -26,6 +26,7 @@ import java.util.List; import java.util.Map; +import java.util.stream.Collectors; public class StarRocksStreamLoadVisitor { @@ -137,7 +138,7 @@ protected boolean isRedirectable(String method) { HttpPut httpPut = new HttpPut(loadUrl); List cols = writerOptions.getColumns(); if (null != cols && !cols.isEmpty()) { - httpPut.setHeader("columns", String.join(",", cols)); + httpPut.setHeader("columns", String.join(",", cols.stream().map(f -> String.format("`%s`", f)).collect(Collectors.toList()))); } if (null != writerOptions.getLoadProps()) { for (Map.Entry entry : writerOptions.getLoadProps().entrySet()) { From 46f37648012855085db82dbc3f7d9c6a09478b8c Mon Sep 17 00:00:00 2001 From: hffariel Date: Fri, 15 Oct 2021 11:32:07 +0800 Subject: [PATCH 38/50] modify release version --- starrockswriter/pom.xml | 2 +- starrockswriter/src/main/assembly/package.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/starrockswriter/pom.xml b/starrockswriter/pom.xml index 9398b9ff19..fcfeb5b3a5 100755 --- a/starrockswriter/pom.xml +++ b/starrockswriter/pom.xml @@ -8,7 +8,7 @@ starrockswriter starrockswriter - 1.1.0 + release jar diff --git a/starrockswriter/src/main/assembly/package.xml b/starrockswriter/src/main/assembly/package.xml index c63845b471..afb5cd5b9a 100755 --- a/starrockswriter/src/main/assembly/package.xml +++ b/starrockswriter/src/main/assembly/package.xml @@ -19,7 +19,7 @@ target/ - starrockswriter-1.1.0.jar + starrockswriter-release.jar plugin/writer/starrockswriter From bee91f54017a8970441643a358b0a27838cbb0e4 Mon Sep 17 00:00:00 2001 From: hffariel Date: Fri, 15 Oct 2021 11:40:41 +0800 Subject: [PATCH 39/50] a --- .github/workflows/release.yml | 1 + build.sh | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 27435856b6..8db502708a 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -20,6 +20,7 @@ jobs: fetch-depth: 0 - run: | git checkout main + rm -rf target mvn -U -pl starrockswriter -am clean package assembly:assembly -Dmaven.test.skip=true cd target/datax/datax/plugin/writer/ tar -czvf starrockswriter.tar.gz starrockswriter diff --git a/build.sh b/build.sh index 609aa2bb12..d8574e17d6 100755 --- a/build.sh +++ b/build.sh @@ -1,5 +1,6 @@ ShellDir="$( cd "$( dirname "$0" )" && pwd )" cd $ShellDir +rm -rf target mvn -U -pl starrockswriter -am clean package assembly:assembly -Dmaven.test.skip=true rm -f starrockswriter.tar.gz cd target/datax/datax/plugin/writer/ From fa4505775df1b03169ed024a5091e4777bc149d5 Mon Sep 17 00:00:00 2001 From: hffariel Date: Fri, 15 Oct 2021 13:09:05 +0800 Subject: [PATCH 40/50] fix release script --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 8db502708a..f26d8e32fc 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -19,7 +19,7 @@ jobs: with: fetch-depth: 0 - run: | - git checkout main + git checkout main && git pull origin main rm -rf target mvn -U -pl starrockswriter -am clean package assembly:assembly -Dmaven.test.skip=true cd target/datax/datax/plugin/writer/ From 1e574298834c308bd064076a7beaeffc94c4cccf Mon Sep 17 00:00:00 2001 From: hffariel Date: Tue, 7 Dec 2021 11:48:51 +0800 Subject: [PATCH 41/50] don't set the columns header with json format --- .../starrockswriter/manager/StarRocksStreamLoadVisitor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java index 847635c828..38f666808e 100644 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java @@ -137,7 +137,7 @@ protected boolean isRedirectable(String method) { try (CloseableHttpClient httpclient = httpClientBuilder.build()) { HttpPut httpPut = new HttpPut(loadUrl); List cols = writerOptions.getColumns(); - if (null != cols && !cols.isEmpty()) { + if (null != cols && !cols.isEmpty() && StarRocksWriterOptions.StreamLoadFormat.CSV.equals(writerOptions.getStreamLoadFormat())) { httpPut.setHeader("columns", String.join(",", cols.stream().map(f -> String.format("`%s`", f)).collect(Collectors.toList()))); } if (null != writerOptions.getLoadProps()) { From 83242f170571ea81f63a3a54f4337aed1a08bc53 Mon Sep 17 00:00:00 2001 From: hffariel Date: Mon, 13 Dec 2021 15:27:17 +0800 Subject: [PATCH 42/50] fix the host iterating --- .../manager/StarRocksStreamLoadVisitor.java | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java index 38f666808e..2a3722632b 100644 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java @@ -34,7 +34,7 @@ public class StarRocksStreamLoadVisitor { private static final Logger LOG = LoggerFactory.getLogger(StarRocksStreamLoadVisitor.class); private final StarRocksWriterOptions writerOptions; - private int pos; + private long pos; public StarRocksStreamLoadVisitor(StarRocksWriterOptions writerOptions) { this.writerOptions = writerOptions; @@ -68,11 +68,9 @@ public void doStreamLoad(StarRocksFlushTuple flushData) throws IOException { private String getAvailableHost() { List hostList = writerOptions.getLoadUrlList(); - if (pos >= hostList.size()) { - pos = 0; - } - for (; pos < hostList.size(); pos++) { - String host = new StringBuilder("http://").append(hostList.get(pos)).toString(); + long tmp = pos + hostList.size(); + for (; pos < tmp; pos++) { + String host = new StringBuilder("http://").append(hostList.get((int) (pos % hostList.size()))).toString(); if (tryHttpConnection(host)) { return host; } From f5cedbeb3269fb7560cceda16e049863a50bf6af Mon Sep 17 00:00:00 2001 From: hffariel Date: Mon, 24 Jan 2022 12:18:14 +0800 Subject: [PATCH 43/50] check the label state when `label already exists` --- .../StarRocksWriterOptions.java | 2 +- .../manager/StarRocksFlushTuple.java | 1 + .../StarRocksStreamLoadFailedException.java | 33 +++++++ .../manager/StarRocksStreamLoadVisitor.java | 86 ++++++++++++++++--- .../manager/StarRocksWriterManager.java | 9 +- 5 files changed, 118 insertions(+), 13 deletions(-) create mode 100644 starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadFailedException.java diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriterOptions.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriterOptions.java index 8d5ede9831..689c09e15d 100644 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriterOptions.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriterOptions.java @@ -15,7 +15,7 @@ public class StarRocksWriterOptions implements Serializable { private static final long serialVersionUID = 1l; private static final long KILO_BYTES_SCALE = 1024l; private static final long MEGA_BYTES_SCALE = KILO_BYTES_SCALE * KILO_BYTES_SCALE; - private static final int MAX_RETRIES = 1; + private static final int MAX_RETRIES = 3; private static final int BATCH_ROWS = 500000; private static final long BATCH_BYTES = 90 * MEGA_BYTES_SCALE; private static final long FLUSH_INTERVAL = 300000; diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksFlushTuple.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksFlushTuple.java index 47cebb912d..5c939f9b08 100644 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksFlushTuple.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksFlushTuple.java @@ -15,6 +15,7 @@ public StarRocksFlushTuple(String label, Long bytes, List rows) { } public String getLabel() { return label; } + public void setLabel(String label) { this.label = label; } public Long getBytes() { return bytes; } public List getRows() { return rows; } } \ No newline at end of file diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadFailedException.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadFailedException.java new file mode 100644 index 0000000000..4eb4704859 --- /dev/null +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadFailedException.java @@ -0,0 +1,33 @@ +package com.starrocks.connector.datax.plugin.writer.starrockswriter.manager; + +import java.io.IOException; +import java.util.Map; + + +public class StarRocksStreamLoadFailedException extends IOException { + + static final long serialVersionUID = 1L; + + private final Map response; + private boolean reCreateLabel; + + public StarRocksStreamLoadFailedException(String message, Map response) { + super(message); + this.response = response; + } + + public StarRocksStreamLoadFailedException(String message, Map response, boolean reCreateLabel) { + super(message); + this.response = response; + this.reCreateLabel = reCreateLabel; + } + + public Map getFailedResponse() { + return response; + } + + public boolean needReCreateLabel() { + return reCreateLabel; + } + +} diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java index 2a3722632b..b8245d71dd 100644 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java @@ -14,6 +14,7 @@ import org.apache.http.HttpEntity; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPut; import org.apache.http.entity.ByteArrayEntity; import org.apache.http.impl.client.CloseableHttpClient; @@ -26,6 +27,7 @@ import java.util.List; import java.util.Map; +import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; @@ -35,6 +37,13 @@ public class StarRocksStreamLoadVisitor { private final StarRocksWriterOptions writerOptions; private long pos; + private static final String RESULT_FAILED = "Fail"; + private static final String RESULT_LABEL_EXISTED = "Label Already Exists"; + private static final String LAEBL_STATE_VISIBLE = "VISIBLE"; + private static final String LAEBL_STATE_COMMITTED = "COMMITTED"; + private static final String RESULT_LABEL_PREPARE = "PREPARE"; + private static final String RESULT_LABEL_ABORTED = "ABORTED"; + private static final String RESULT_LABEL_UNKNOWN = "UNKNOWN"; public StarRocksStreamLoadVisitor(StarRocksWriterOptions writerOptions) { this.writerOptions = writerOptions; @@ -59,10 +68,14 @@ public void doStreamLoad(StarRocksFlushTuple flushData) throws IOException { throw new IOException("Unable to flush data to StarRocks: unknown result status."); } LOG.debug(new StringBuilder("StreamLoad response:\n").append(JSON.toJSONString(loadResult)).toString()); - if (loadResult.get(keyStatus).equals("Fail")) { + if (RESULT_FAILED.equals(loadResult.get(keyStatus))) { throw new IOException( new StringBuilder("Failed to flush data to StarRocks.\n").append(JSON.toJSONString(loadResult)).toString() ); + } else if (RESULT_LABEL_EXISTED.equals(loadResult.get(keyStatus))) { + LOG.debug(new StringBuilder("StreamLoad response:\n").append(JSON.toJSONString(loadResult)).toString()); + // has to block-checking the state to get the final result + checkLabelState(host, flushData.getLabel()); } } @@ -122,6 +135,52 @@ private byte[] joinRows(List rows, int totalBytes) { throw new RuntimeException("Failed to join rows data, unsupported `format` from stream load properties:"); } + @SuppressWarnings("unchecked") + private void checkLabelState(String host, String label) throws IOException { + int idx = 0; + while(true) { + try { + TimeUnit.SECONDS.sleep(Math.min(++idx, 5)); + } catch (InterruptedException ex) { + break; + } + try (CloseableHttpClient httpclient = HttpClients.createDefault()) { + HttpGet httpGet = new HttpGet(new StringBuilder(host).append("/api/").append(writerOptions.getDatabase()).append("/get_load_state?label=").append(label).toString()); + httpGet.setHeader("Authorization", getBasicAuthHeader(writerOptions.getUsername(), writerOptions.getPassword())); + httpGet.setHeader("Connection", "close"); + + try (CloseableHttpResponse resp = httpclient.execute(httpGet)) { + HttpEntity respEntity = getHttpEntity(resp); + if (respEntity == null) { + throw new IOException(String.format("Failed to flush data to StarRocks, Error " + + "could not get the final state of label[%s].\n", label), null); + } + Map result = (Map)JSON.parse(EntityUtils.toString(respEntity)); + String labelState = (String)result.get("state"); + if (null == labelState) { + throw new IOException(String.format("Failed to flush data to StarRocks, Error " + + "could not get the final state of label[%s]. response[%s]\n", label, EntityUtils.toString(respEntity)), null); + } + LOG.info(String.format("Checking label[%s] state[%s]\n", label, labelState)); + switch(labelState) { + case LAEBL_STATE_VISIBLE: + case LAEBL_STATE_COMMITTED: + return; + case RESULT_LABEL_PREPARE: + continue; + case RESULT_LABEL_ABORTED: + throw new StarRocksStreamLoadFailedException(String.format("Failed to flush data to StarRocks, Error " + + "label[%s] state[%s]\n", label, labelState), null, true); + case RESULT_LABEL_UNKNOWN: + default: + throw new IOException(String.format("Failed to flush data to StarRocks, Error " + + "label[%s] state[%s]\n", label, labelState), null); + } + } + } + } + } + @SuppressWarnings("unchecked") private Map doHttpPut(String loadUrl, String label, byte[] data) throws IOException { LOG.info(String.format("Executing stream load to: '%s', size: '%s'", loadUrl, data.length)); @@ -150,16 +209,9 @@ protected boolean isRedirectable(String method) { httpPut.setEntity(new ByteArrayEntity(data)); httpPut.setConfig(RequestConfig.custom().setRedirectsEnabled(true).build()); try (CloseableHttpResponse resp = httpclient.execute(httpPut)) { - int code = resp.getStatusLine().getStatusCode(); - if (200 != code) { - LOG.warn("Request failed with code:{}", code); + HttpEntity respEntity = getHttpEntity(resp); + if (respEntity == null) return null; - } - HttpEntity respEntity = resp.getEntity(); - if (null == respEntity) { - LOG.warn("Request failed with empty response."); - return null; - } return (Map)JSON.parse(EntityUtils.toString(respEntity)); } } @@ -171,4 +223,18 @@ private String getBasicAuthHeader(String username, String password) { return new StringBuilder("Basic ").append(new String(encodedAuth)).toString(); } + private HttpEntity getHttpEntity(CloseableHttpResponse resp) { + int code = resp.getStatusLine().getStatusCode(); + if (200 != code) { + LOG.warn("Request failed with code:{}", code); + return null; + } + HttpEntity respEntity = resp.getEntity(); + if (null == respEntity) { + LOG.warn("Request failed with empty response."); + return null; + } + return respEntity; + } + } diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java index f32eccdefe..d7c290df82 100644 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java @@ -134,7 +134,7 @@ public void run() { flushException = e; } } - } + } }); flushThread.setDaemon(true); flushThread.start(); @@ -167,8 +167,13 @@ private void asyncFlush() throws Exception { if (i >= writerOptions.getMaxRetries()) { throw new IOException(e); } + if (e instanceof StarRocksStreamLoadFailedException && ((StarRocksStreamLoadFailedException)e).needReCreateLabel()) { + String newLabel = createBatchLabel(); + LOG.warn(String.format("Batch label changed from [%s] to [%s]", flushData.getLabel(), newLabel)); + flushData.setLabel(newLabel); + } try { - Thread.sleep(1000l * (i + 1)); + Thread.sleep(1000l * Math.min(i + 1, 10)); } catch (InterruptedException ex) { Thread.currentThread().interrupt(); throw new IOException("Unable to flush, interrupted while doing another attempt", e); From a1bf9baa3eccd3d2b3f7fa13aabb93f25a0fccee Mon Sep 17 00:00:00 2001 From: hffariel Date: Mon, 14 Feb 2022 18:49:25 +0800 Subject: [PATCH 44/50] add label-prefix option --- .../starrockswriter/StarRocksWriterOptions.java | 5 +++++ .../manager/StarRocksWriterManager.java | 12 +++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriterOptions.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriterOptions.java index 689c09e15d..14ce0332d5 100644 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriterOptions.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriterOptions.java @@ -33,6 +33,7 @@ public enum StreamLoadFormat { private static final String KEY_PRE_SQL = "preSql"; private static final String KEY_POST_SQL = "postSql"; private static final String KEY_JDBC_URL = "jdbcUrl"; + private static final String KEY_LABEL_PREFIX = "labelPrefix"; private static final String KEY_MAX_BATCH_ROWS = "maxBatchRows"; private static final String KEY_MAX_BATCH_SIZE = "maxBatchSize"; private static final String KEY_FLUSH_INTERVAL = "flushInterval"; @@ -78,6 +79,10 @@ public String getPassword() { return options.getString(KEY_PASSWORD); } + public String getLabelPrefix() { + return options.getString(KEY_LABEL_PREFIX); + } + public List getLoadUrlList() { return options.getList(KEY_LOAD_URL, String.class); } diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java index d7c290df82..c28c86478f 100644 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java @@ -24,6 +24,7 @@ public class StarRocksWriterManager { private final StarRocksStreamLoadVisitor starrocksStreamLoadVisitor; private final StarRocksWriterOptions writerOptions; + private static final String UNDERSCORE = "_"; private final List buffer = new ArrayList<>(); private int batchCount = 0; @@ -120,7 +121,16 @@ public synchronized void close() { } public String createBatchLabel() { - return UUID.randomUUID().toString(); + StringBuilder sb = new StringBuilder(); + if (!Strings.isNullOrEmpty(writerOptions.getLabelPrefix())) { + sb.append(writerOptions.getLabelPrefix()).append(UNDERSCORE); + } + return sb.append(writerOptions.getDatabase()) + .append(UNDERSCORE) + .append(writerOptions.getTable()) + .append(UNDERSCORE) + .append(UUID.randomUUID().toString()) + .toString(); } private void startAsyncFlushing() { From 0554bd1bea980c1b19bb19a1a32074027eff5f3b Mon Sep 17 00:00:00 2001 From: hffariel Date: Mon, 14 Feb 2022 19:23:20 +0800 Subject: [PATCH 45/50] change the linking charactor of stream load label --- .../starrockswriter/manager/StarRocksWriterManager.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java index c28c86478f..40acbd7449 100644 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java @@ -24,7 +24,7 @@ public class StarRocksWriterManager { private final StarRocksStreamLoadVisitor starrocksStreamLoadVisitor; private final StarRocksWriterOptions writerOptions; - private static final String UNDERSCORE = "_"; + private static final String DOT = "."; private final List buffer = new ArrayList<>(); private int batchCount = 0; @@ -123,12 +123,12 @@ public synchronized void close() { public String createBatchLabel() { StringBuilder sb = new StringBuilder(); if (!Strings.isNullOrEmpty(writerOptions.getLabelPrefix())) { - sb.append(writerOptions.getLabelPrefix()).append(UNDERSCORE); + sb.append(writerOptions.getLabelPrefix()).append(DOT); } return sb.append(writerOptions.getDatabase()) - .append(UNDERSCORE) + .append(DOT) .append(writerOptions.getTable()) - .append(UNDERSCORE) + .append(DOT) .append(UUID.randomUUID().toString()) .toString(); } From 721ed5136dd3d71c8de83ba4b40d60b56b5114b5 Mon Sep 17 00:00:00 2001 From: hffariel Date: Mon, 14 Feb 2022 20:15:14 +0800 Subject: [PATCH 46/50] change the link charactor of the stream load label --- .../starrockswriter/manager/StarRocksWriterManager.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java index 40acbd7449..405b248bc4 100644 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java @@ -24,7 +24,7 @@ public class StarRocksWriterManager { private final StarRocksStreamLoadVisitor starrocksStreamLoadVisitor; private final StarRocksWriterOptions writerOptions; - private static final String DOT = "."; + private static final String HYPHEN = "-"; private final List buffer = new ArrayList<>(); private int batchCount = 0; @@ -123,12 +123,12 @@ public synchronized void close() { public String createBatchLabel() { StringBuilder sb = new StringBuilder(); if (!Strings.isNullOrEmpty(writerOptions.getLabelPrefix())) { - sb.append(writerOptions.getLabelPrefix()).append(DOT); + sb.append(writerOptions.getLabelPrefix()).append(HYPHEN); } return sb.append(writerOptions.getDatabase()) - .append(DOT) + .append(HYPHEN) .append(writerOptions.getTable()) - .append(DOT) + .append(HYPHEN) .append(UUID.randomUUID().toString()) .toString(); } From d722f302eb6f2df01832493fbf12b17462fc766b Mon Sep 17 00:00:00 2001 From: hffariel Date: Mon, 14 Feb 2022 20:23:19 +0800 Subject: [PATCH 47/50] remove db and table info from stream load label --- .../starrockswriter/manager/StarRocksWriterManager.java | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java index 405b248bc4..7162e413af 100644 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java @@ -24,7 +24,6 @@ public class StarRocksWriterManager { private final StarRocksStreamLoadVisitor starrocksStreamLoadVisitor; private final StarRocksWriterOptions writerOptions; - private static final String HYPHEN = "-"; private final List buffer = new ArrayList<>(); private int batchCount = 0; @@ -123,13 +122,9 @@ public synchronized void close() { public String createBatchLabel() { StringBuilder sb = new StringBuilder(); if (!Strings.isNullOrEmpty(writerOptions.getLabelPrefix())) { - sb.append(writerOptions.getLabelPrefix()).append(HYPHEN); + sb.append(writerOptions.getLabelPrefix()); } - return sb.append(writerOptions.getDatabase()) - .append(HYPHEN) - .append(writerOptions.getTable()) - .append(HYPHEN) - .append(UUID.randomUUID().toString()) + return sb.append(UUID.randomUUID().toString()) .toString(); } From 676af18f6ffbfed1e52bbf7a04c1076eac0124d3 Mon Sep 17 00:00:00 2001 From: hffariel Date: Thu, 17 Mar 2022 17:30:48 +0800 Subject: [PATCH 48/50] fix an npe --- README.md | 2 +- .../starrockswriter/manager/StarRocksStreamLoadVisitor.java | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 0ff5038715..6b9bcc5c1c 100644 --- a/README.md +++ b/README.md @@ -5,5 +5,5 @@ This is a repo forked from [DataX](https://github.com/alibaba/DataX), and mainta ## How to use 1. Run `./build.sh` to gennerate the `starrockswriter.tar.gz`, then untar it into your own [DataX release](https://github.com/alibaba/DataX) directory(which will be `datax/plugin/writer/`). -2. Create a `job.json` to define the reader and writer. More details about the configurations, please refer to the [Documentations](https://docs.starrocks.com). +2. Create a `job.json` to define the reader and writer. More details about the configurations, please refer to the [Documentations](https://docs.starrocks.com/en-us/main/loading/DataX-starrocks-writer). 3. Run `python datax/bin/datax.py --jvm="-Xms6G -Xmx6G" --loglevel=debug job.json` to start a job. diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java index b8245d71dd..7d06d2db7b 100644 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java @@ -25,6 +25,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; @@ -107,7 +108,7 @@ private boolean tryHttpConnection(String host) { private byte[] joinRows(List rows, int totalBytes) { if (StarRocksWriterOptions.StreamLoadFormat.CSV.equals(writerOptions.getStreamLoadFormat())) { - Map props = writerOptions.getLoadProps(); + Map props = (writerOptions.getLoadProps() == null ? new HashMap<>() : writerOptions.getLoadProps()); byte[] lineDelimiter = StarRocksDelimiterParser.parse((String)props.get("row_delimiter"), "\n").getBytes(StandardCharsets.UTF_8); ByteBuffer bos = ByteBuffer.allocate(totalBytes + rows.size() * lineDelimiter.length); for (byte[] row : rows) { @@ -216,7 +217,7 @@ protected boolean isRedirectable(String method) { } } } - + private String getBasicAuthHeader(String username, String password) { String auth = username + ":" + password; byte[] encodedAuth = Base64.encodeBase64(auth.getBytes(StandardCharsets.UTF_8)); From 03158bf046c12932b0ec98c3adbbbf25e2c3f150 Mon Sep 17 00:00:00 2001 From: dingxiaobo Date: Fri, 26 Aug 2022 13:25:48 +0800 Subject: [PATCH 49/50] Merge StartRocks plugin --- pom.xml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 08bb103d9c..cf7a8e6725 100644 --- a/pom.xml +++ b/pom.xml @@ -88,8 +88,6 @@ kingbaseeswriter adswriter oceanbasev10writer - cassandrawriter - clickhousewriter adbpgwriter hologresjdbcwriter rdbmswriter @@ -117,7 +115,7 @@ oscarwriter cassandrawriter clickhousewriter - + plugin-rdbms-util plugin-unstructured-storage-util From b49ceb135f732c4c24442edc6ecc85ec4b4810fd Mon Sep 17 00:00:00 2001 From: dingxiaobo Date: Fri, 26 Aug 2022 13:45:33 +0800 Subject: [PATCH 50/50] Merge StartRocks plugin --- .github/workflows/release.yml | 31 ------------------------------- build.sh | 8 -------- 2 files changed, 39 deletions(-) delete mode 100644 .github/workflows/release.yml delete mode 100755 build.sh diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml deleted file mode 100644 index f26d8e32fc..0000000000 --- a/.github/workflows/release.yml +++ /dev/null @@ -1,31 +0,0 @@ -name: Release - -on: - push: - tags: - - "v*.*.*" - -jobs: - build: - runs-on: ubuntu-latest - steps: - - name: Set up Maven Central Repository - uses: actions/setup-java@v2 - with: - java-version: '8' - distribution: 'adopt' - - name: Checkout - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - run: | - git checkout main && git pull origin main - rm -rf target - mvn -U -pl starrockswriter -am clean package assembly:assembly -Dmaven.test.skip=true - cd target/datax/datax/plugin/writer/ - tar -czvf starrockswriter.tar.gz starrockswriter - - name: Release - uses: softprops/action-gh-release@v1 - with: - files: | - target/datax/datax/plugin/writer/starrockswriter.tar.gz diff --git a/build.sh b/build.sh deleted file mode 100755 index d8574e17d6..0000000000 --- a/build.sh +++ /dev/null @@ -1,8 +0,0 @@ -ShellDir="$( cd "$( dirname "$0" )" && pwd )" -cd $ShellDir -rm -rf target -mvn -U -pl starrockswriter -am clean package assembly:assembly -Dmaven.test.skip=true -rm -f starrockswriter.tar.gz -cd target/datax/datax/plugin/writer/ -tar -czvf starrockswriter.tar.gz starrockswriter -mv starrockswriter.tar.gz $ShellDir