diff --git a/core/src/main/job/job.json b/core/src/main/job/job.json index cc35387778..ad5d4a85c5 100755 --- a/core/src/main/job/job.json +++ b/core/src/main/job/job.json @@ -2,11 +2,10 @@ "job": { "setting": { "speed": { - "channel":1 + "channel": 2 }, "errorLimit": { - "record": 0, - "percentage": 0.02 + "record": 0 } }, "content": [ @@ -14,17 +13,17 @@ "reader": { "name": "streamreader", "parameter": { - "column" : [ + "column": [ { "value": "DataX", "type": "string" }, { - "value": 19890604, + "value": 1724154616370, "type": "long" }, { - "value": "1989-06-04 00:00:00", + "value": "2024-01-01 00:00:00", "type": "date" }, { @@ -32,11 +31,11 @@ "type": "bool" }, { - "value": "test", + "value": "TestRawData", "type": "bytes" } ], - "sliceRecordCount": 100000 + "sliceRecordCount": 100 } }, "writer": { @@ -49,4 +48,4 @@ } ] } -} +} \ No newline at end of file diff --git a/doriswriter/doc/doriswriter.md b/doriswriter/doc/doriswriter.md index 58a688b8dc..2070113b4d 100644 --- a/doriswriter/doc/doriswriter.md +++ b/doriswriter/doc/doriswriter.md @@ -36,8 +36,6 @@ DorisWriter 通过Doris原生支持Stream load方式导入数据, DorisWriter "name": "doriswriter", "parameter": { "loadUrl": ["172.16.0.13:8030"], - "loadProps": { - }, "column": ["emp_no", "birth_date", "first_name","last_name","gender","hire_date"], "username": "root", "password": "xxxxxx", @@ -178,4 +176,4 @@ DorisWriter 通过Doris原生支持Stream load方式导入数据, DorisWriter } ``` -更多信息请参照 Doris 官网:[Stream load - Apache Doris](https://doris.apache.org/zh-CN/docs/data-operate/import/import-way/stream-load-manual) \ No newline at end of file +更多信息请参照 Doris 官网:[Stream load - Apache Doris](https://doris.apache.org/zh-CN/docs/data-operate/import/import-way/stream-load-manual) diff --git a/elasticsearchwriter/doc/elasticsearchwriter.md b/elasticsearchwriter/doc/elasticsearchwriter.md index 9a22f13c22..3a3315edc3 100644 --- a/elasticsearchwriter/doc/elasticsearchwriter.md +++ b/elasticsearchwriter/doc/elasticsearchwriter.md @@ -167,79 +167,4 @@ * dynamic * 描述: 不使用datax的mappings,使用es自己的自动mappings * 必选: 否 - * 默认值: false - - - -## 4 性能报告 - -### 4.1 环境准备 - -* 总数据量 1kw条数据, 每条0.1kb -* 1个shard, 0个replica -* 不加id,这样默认是append_only模式,不检查版本,插入速度会有20%左右的提升 - -#### 4.1.1 输入数据类型(streamreader) - -``` -{"value": "1.1.1.1", "type": "string"}, -{"value": 19890604.0, "type": "double"}, -{"value": 19890604, "type": "long"}, -{"value": 19890604, "type": "long"}, -{"value": "hello world", "type": "string"}, -{"value": "hello world", "type": "string"}, -{"value": "41.12,-71.34", "type": "string"}, -{"value": "2017-05-25", "type": "string"}, -``` - -#### 4.1.2 输出数据类型(eswriter) - -``` -{ "name": "col_ip","type": "ip" }, -{ "name": "col_double","type": "double" }, -{ "name": "col_long","type": "long" }, -{ "name": "col_integer","type": "integer" }, -{ "name": "col_keyword", "type": "keyword" }, -{ "name": "col_text", "type": "text"}, -{ "name": "col_geo_point", "type": "geo_point" }, -{ "name": "col_date", "type": "date"} -``` - -#### 4.1.2 机器参数 - -1. cpu: 32 Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz -2. mem: 128G -3. net: 千兆双网卡 - -#### 4.1.3 DataX jvm 参数 - --Xms1024m -Xmx1024m -XX:+HeapDumpOnOutOfMemoryError - -### 4.2 测试报告 - -| 通道数| 批量提交行数| DataX速度(Rec/s)|DataX流量(MB/s)| -|--------|--------| --------|--------| -| 4| 256| 11013| 0.828| -| 4| 1024| 19417| 1.43| -| 4| 4096| 23923| 1.76| -| 4| 8172| 24449| 1.80| -| 8| 256| 21459| 1.58| -| 8| 1024| 37037| 2.72| -| 8| 4096| 45454| 3.34| -| 8| 8172| 45871| 3.37| -| 16| 1024| 67567| 4.96| -| 16| 4096| 78125| 5.74| -| 16| 8172| 77519| 5.69| -| 32| 1024| 94339| 6.93| -| 32| 4096| 96153| 7.06| -| 64| 1024| 91743| 6.74| - -### 4.3 测试总结 - -* 最好的结果是32通道,每次传4096,如果单条数据很大, 请适当减少批量数,防止oom -* 当然这个很容易水平扩展,而且es也是分布式的,多设置几个shard也可以水平扩展 - -## 5 约束限制 - -* 如果导入id,这样数据导入失败也会重试,重新导入也仅仅是覆盖,保证数据一致性 -* 如果不导入id,就是append_only模式,elasticsearch自动生成id,速度会提升20%左右,但数据无法修复,适合日志型数据(对数据精度要求不高的) \ No newline at end of file + * 默认值: false \ No newline at end of file diff --git a/obhbasereader/doc/obhbasereader.md b/obhbasereader/doc/obhbasereader.md new file mode 100644 index 0000000000..675f6ce795 --- /dev/null +++ b/obhbasereader/doc/obhbasereader.md @@ -0,0 +1,178 @@ +OceanBase的table api为应用提供了ObHBase的访问接口,因此,OceanBase的table api的reader与HBase Reader的结构和配置方法类似。 +obhbasereader插件支持sql和hbase api两种读取方式,两种方式存在如下区别: + +1. sql方式可以按照分区或者K值进行数据切片,而hbase api方式的数据切片需要用户手动设置。 +2. sql方式会将从obhbase读取的kqtv形式的数据转换为单一横行,而hbase api则不做行列转换,直接以kqtv形式将数据传递给下游。 +3. sql方式需要配置column属性,hbase api则不需要配置,数据均为固定的kqtv四列。 +4. sql方式仅支持获取获得最新或者最旧版本的数据,而hbase api支持获得多版本数据。 +#### 脚本配置 +```json +{ + "job": { + "setting": { + "speed": { + "channel": 3, + "byte": 104857600 + }, + "errorLimit": { + "record": 10 + } + }, + "content": [ + { + "reader": { + "name": "obhbasereader", + "parameter": { + "username": "username", + "password": "password", + "encoding": "utf8", + "column": [ + { + "name": "f1:column1_1", + "type": "string" + }, + { + "name": "f1:column2_2", + "type": "string" + }, + { + "name": "f1:column1_1", + "type": "string" + }, + { + "name": "f1:column2_2", + "type": "string" + } + ], + "range": [ + { + "startRowkey": "aaa", + "endRowkey": "ccc", + "isBinaryRowkey": false + }, + { + "startRowkey": "eee", + "endRowkey": "zzz", + "isBinaryRowkey": false + } + ], + "mode": "normal", + "readByPartition": "true", + "scanCacheSize": "", + "readerHint": "", + "readBatchSize": "1000", + "connection": [ + { + "table": [ + "htable1", + "htable2" + ], + "jdbcUrl": [ + "||_dsc_ob10_dsc_||集群:租户||_dsc_ob10_dsc_||jdbc:mysql://ip:port/dbName1" + ], + "username": "username", + "password": "password" + }, + { + "table": [ + "htable1", + "htable2" + ], + "jdbcUrl": [ + "jdbc:mysql://ip:port/database" + ] + } + ] + } + }, + "writer": { + "name": "txtfilewriter", + "parameter": { + "path": "/Users/xujing/datax/txtfile", + "charset": "UTF-8", + "fieldDelimiter": ",", + "fileName": "hbase", + "nullFormat": "null", + "writeMode": "truncate" + } + } + } + ] + } +} +``` +##### 参数解释 + +- **connection** + - 描述:配置分库分表的jdbcUrl和分表名。如果一个分库中有多个分表可以用逗号隔开,也可以写成表名[起始序号-截止序号] + - 必须:是 + - 默认值:无 +- **jdbcUrl** + - 描述:连接ob使用的jdbc url,支持如下两种格式: + - jdbc:mysql://obproxyIp:obproxyPort/db + - 此格式下username需要写成三段式格式 + - ||_dsc_ob10_dsc_||集群名:租户名||_dsc_ob10_dsc_||jdbc:mysql://obproxyIp:obproxyPort/db + - 此格式下username仅填写用户名本身,无需三段式写法 + + - 必选:是 + - 默认值:无 +- **table** + - 描述:所选取的需要同步的表。使用JSON的数组描述,因此支持多张表同时抽取。当配置为多张表时,用户自己需保证多张表是同一schema结构,obhbasereader不予检查表是否同一逻辑表。注意,table必须包含在connection配置单元中。 + - 必选:是 + - 默认值:无 +- **readByPartition** + - 描述:使用sql方式读取时,配置**仅**按照分区进行切片。 + - 必须:否 + - 默认值:false +- **partitionName** + - 描述:使用sql方式读取时,标识仅读取指定分区名的数据,用户需要保证配置的分区名在表结构中真实存在(要求严格大小写)。 + - 必须:否 + - 默认值:无 +- **readBatchSize** + - 描述:使用sql方式读取时,分页大小。 + - 必须:否 + - 默认值:10w +- **fetchSize** + - 描述:使用sql方式读取时,控制每次读取数据时从结果集中获取的数据行数。 + - 必须:否 + - 默认值:-2147483648 +- **scanCacheSize** + - 描述:使用hbase api读取时,每次rpc从服务器端读取的行数 + - 必须:否 + - 默认值:256 +- **readerHint** + - 描述:obhbasereader使用sql方式读取时使用的hint + - 必须:否 + - 默认值:/*+READ_CONSISTENCY(weak),QUERY_TIMEOUT(86400000000)*/ +- **column** + - 描述:使用sql方式读取数据时,所配置的表中需要同步的列名集合,使用JSON的数组描述字段信息。 + - 支持列裁剪,即列可以挑选部分列进行导出。 +``` +支持列换序,即列可以不按照表schema信息进行导出,同时支持通配符*,在使用之前需仔细核对列信息。 +``` + +- 必选:sql方式读取时必选 + - 默认值:无 +- **range** + - 描述**:**指定hbasereader读取的rowkey范围 + - 必须:否 + - 默认值:无 +- **username** + - 描述:访问OceanBase的用户名 + - 必选:是 + - 默认值:无 +- **mode** + - 描述:读取obhbase的模式,normal 模式,即仅读取一个版本的数据。 + - 必选:是 + - 默认值:normal +- **version** + - 描述:读取obhbase的版本,当前支持oldest、latest模式,分别表示读取最旧和最新的数据。 + - 必须:是 + - 默认值:oldest + +一些注意点: +注:如果配置了**partitionName**,则无需再配置readByPartition,即便配置了也会忽略readByPartition选项,而是仅会读取指定分区的数据。 +注:如果配置了**readByPartition**,任务将仅按照分区切分任务,而不会再按照K值进行切分。如果是非分区表,则整张表会被当作一个任务而不会再切分。 + + + diff --git a/obhbasereader/pom.xml b/obhbasereader/pom.xml new file mode 100755 index 0000000000..62afc3444b --- /dev/null +++ b/obhbasereader/pom.xml @@ -0,0 +1,151 @@ + + 4.0.0 + + com.alibaba.datax + datax-all + 0.0.1-SNAPSHOT + + + obhbasereader + com.alibaba.datax + obhbasereader + 0.0.1-SNAPSHOT + + + + com.alibaba.datax + datax-core + ${datax-project-version} + provided + + + com.alibaba.datax + oceanbasev10reader + 0.0.1-SNAPSHOT + + + guava + com.google.guava + + + + + org.apache.zookeeper + zookeeper + 3.3.2 + + + log4j + log4j + + + + + commons-collections + commons-collections + 3.2.1 + + + + + + + + + + com.oceanbase + obkv-hbase-client + 0.1.4.2 + + + guava + com.google.guava + + + + + + com.google.guava + guava + ${guava-version} + + + com.alibaba.toolkit.common + toolkit-common-logging + 1.14 + + + org.json + json + 20160810 + + + junit + junit + 4.11 + test + + + org.powermock + powermock-module-junit4 + 1.4.10 + test + + + org.powermock + powermock-api-mockito + 1.4.10 + test + + + org.mockito + mockito-core + 1.8.5 + test + + + + + + + + src/main/java + + **/*.properties + + + + + + + maven-compiler-plugin + + ${jdk-version} + ${jdk-version} + ${project-sourceEncoding} + + + + + maven-assembly-plugin + + + src/main/assembly/package.xml + + datax + + + + dwzip + package + + single + + + + + + + + diff --git a/obhbasereader/src/main/assembly/package.xml b/obhbasereader/src/main/assembly/package.xml new file mode 100755 index 0000000000..43da622d5c --- /dev/null +++ b/obhbasereader/src/main/assembly/package.xml @@ -0,0 +1,35 @@ + + + + dir + + false + + + src/main/resources + + plugin.json + plugin_job_template.json + + plugin/reader/obhbasereader + + + target/ + + obhbasereader-0.0.1-SNAPSHOT.jar + + plugin/reader/obhbasereader + + + + + + false + plugin/reader/obhbasereader/libs + runtime + + + diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/Constant.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/Constant.java new file mode 100755 index 0000000000..40dd32d282 --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/Constant.java @@ -0,0 +1,34 @@ +package com.alibaba.datax.plugin.reader.obhbasereader; + +import ch.qos.logback.classic.Level; + +public final class Constant { + public static final String ROWKEY_FLAG = "rowkey"; + public static final int DEFAULT_SCAN_CACHE = 256; + public static final int DEFAULT_FETCH_SIZE = Integer.MIN_VALUE; + public static final int DEFAULT_READ_BATCH_SIZE = 100000; + // timeout:24 * 3600 = 86400s + public static final String OB_READ_HINT = "/*+READ_CONSISTENCY(weak),QUERY_TIMEOUT(86400000000)*/"; + public static final String DEFAULT_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss"; + public static final String DEFAULT_ENCODING = "UTF-8"; + public static final String DEFAULT_TIMEZONE = "UTC"; + public static final boolean DEFAULT_USE_SQLREADER = true; + public static final boolean DEFAULT_USE_ODPMODE = true; + public static final String OB_TABLE_CLIENT_PROPERTY = "logging.path.com.alipay.oceanbase-table-client"; + public static final String OB_TABLE_HBASE_PROPERTY = "logging.path.com.alipay.oceanbase-table-hbase"; + public static final String OB_TABLE_CLIENT_LOG_LEVEL = "logging.level.oceanbase-table-client"; + public static final String OB_TABLE_HBASE_LOG_LEVEL = "logging.level.oceanbase-table-hbase"; + public static final String OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL = "logging.level.com.alipay.oceanbase-table-client"; + public static final String OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL = "logging.level.com.alipay.oceanbase-table-hbase"; + public static final String OB_HBASE_LOG_PATH = System.getProperty("datax.home") + "/log/"; + public static final String DEFAULT_OB_TABLE_CLIENT_LOG_LEVEL = Level.OFF.toString(); + public static final String DEFAULT_OB_TABLE_HBASE_LOG_LEVEL = Level.OFF.toString(); + public static final String OBMYSQL_KEYWORDS = + "CUME_DIST,DENSE_RANK,EMPTY,FIRST_VALUE,GROUPING,GROUPS,INTERSECT,JSON_TABLE,LAG,LAST_VALUE,LATERAL,LEAD,NTH_VALUE,NTILE,OF,OVER,PERCENT_RANK,RANK,RECURSIVE,ROW_NUMBER,SYSTEM,WINDOW,ACCESSIBLE,ACCOUNT,ACTION,ADD,AFTER,AGAINST,AGGREGATE,ALGORITHM,ALL,ALTER,ALWAYS,ANALYSE,AND,ANY,AS,ASC,ASCII,ASENSITIVE,AT,AUTO_INCREMENT,AUTOEXTEND_SIZE,AVG,AVG_ROW_LENGTH,BACKUP,BEFORE,BEGIN,BETWEEN,BIGINT,BINARY,BINLOG,BIT,BLOB,BLOCK,BOOL,BOOLEAN,BOTH,BTREE,BY,BYTE,CACHE,CALL,CASCADE,CASCADED,CASE,CATALOG_NAME,CHAIN,CHANGE,CHANGED,CHANNEL,CHAR,CHARACTER,CHARSET,CHECK,CHECKSUM,CIPHER,CLASS_ORIGIN,CLIENT,CLOSE,COALESCE,CODE,COLLATE,COLLATION,COLUMN,COLUMN_FORMAT,COLUMN_NAME,COLUMNS,COMMENT,COMMIT,COMMITTED,COMPACT,COMPLETION,COMPRESSED,COMPRESSION,CONCURRENT,CONDITION,CONNECTION,CONSISTENT,CONSTRAINT,CONSTRAINT_CATALOG,CONSTRAINT_NAME,CONSTRAINT_SCHEMA,CONTAINS,CONTEXT,CONTINUE,CONVERT,CPU,CREATE,CROSS,CUBE,CURRENT,CURRENT_DATE,CURRENT_TIME,CURRENT_TIMESTAMP,CURRENT_USER,CURSOR," + + "CURSOR_NAME,DATA,DATABASE,DATABASES,DATAFILE,DATE,DATETIME,DAY,DAY_HOUR,DAY_MICROSECOND,DAY_MINUTE,DAY_SECOND,DEALLOCATE,DEC,DECIMAL,DECLARE,DEFAULT,DEFAULT_AUTH,DEFINER,DELAY_KEY_WRITE,DELAYED,DELETE,DES_KEY_FILE,DESC,DESCRIBE,DETERMINISTIC,DIAGNOSTICS,DIRECTORY,DISABLE,DISCARD,DISK,DISTINCT,DISTINCTROW,DIV,DO,DOUBLE,DROP,DUAL,DUMPFILE,DUPLICATE,DYNAMIC,EACH,ELSE,ELSEIF,ENABLE,ENCLOSED,ENCRYPTION,END,ENDS,ENGINE,ENGINES,ENUM,ERROR,ERRORS,ESCAPE,ESCAPED,EVENT,EVENTS,EVERY,EXCHANGE,EXECUTE,EXISTS,EXIT,EXPANSION,EXPIRE,EXPLAIN,EXPORT,EXTENDED,EXTENT_SIZE,FAST,FAULTS,FETCH,FIELDS,FILE,FILE_BLOCK_SIZE,FILTER,FIRST,FIXED,FLOAT,FLOAT4,FLOAT8,FLUSH,FOLLOWS,FOR,FORCE,FOREIGN,FORMAT,FOUND,FROM,FULL,FULLTEXT,FUNCTION,GENERAL,GENERATED,GEOMETRY,GEOMETRYCOLLECTION,GET,GET_FORMAT,GLOBAL,GRANT,GRANTS,GROUP,GROUP_REPLICATION,HANDLER,HASH,HAVING,HELP,HIGH_PRIORITY,HOST,HOSTS,HOUR,HOUR_MICROSECOND,HOUR_MINUTE,HOUR_SECOND,IDENTIFIED,IF,IGNORE,IGNORE_SERVER_IDS,IMPORT,IN,INDEX," + + "INDEXES," + "INFILE,INITIAL_SIZE,INNER,INOUT,INSENSITIVE,INSERT,INSERT_METHOD,INSTALL,INSTANCE,INT,INT1,INT2,INT3,INT4,INT8,INTEGER,INTERVAL,INTO,INVOKE,INVOKER,IO,IO_AFTER_GTIDS,IO_BEFORE_GTIDS,IO_THREAD,IPC,IS,ISOLATION,ISSUER,ITERATE,JOIN,JSON,KEY,KEY_BLOCK_SIZE,KEYS,KILL,LANGUAGE,LAST,LEADING,LEAVE,LEAVES,LEFT,LESS,LEVEL,LIKE,LIMIT,LINEAR,LINES,LINESTRING,LIST,LOAD,LOCAL,LOCALTIME,LOCALTIMESTAMP,LOCK,LOCKS,LOGFILE,LOGS,LONG,LONGBLOB,LONGTEXT,LOOP,LOW_PRIORITY,MASTER,MASTER_AUTO_POSITION,MASTER_BIND,MASTER_CONNECT_RETRY,MASTER_DELAY,MASTER_HEARTBEAT_PERIOD,MASTER_HOST,MASTER_LOG_FILE,MASTER_LOG_POS,MASTER_PASSWORD,MASTER_PORT,MASTER_RETRY_COUNT,MASTER_SERVER_ID,MASTER_SSL,MASTER_SSL_CA,MASTER_SSL_CAPATH,MASTER_SSL_CERT,MASTER_SSL_CIPHER,MASTER_SSL_CRL,MASTER_SSL_CRLPATH,MASTER_SSL_KEY,MASTER_SSL_VERIFY_SERVER_CERT,MASTER_TLS_VERSION,MASTER_USER,MATCH,MAX_CONNECTIONS_PER_HOUR,MAX_QUERIES_PER_HOUR,MAX_ROWS,MAX_SIZE,MAX_STATEMENT_TIME,MAX_UPDATES_PER_HOUR," + + "MAX_USER_CONNECTIONS," + + "MAXVALUE,MEDIUM,MEDIUMBLOB,MEDIUMINT,MEDIUMTEXT,MEMORY,MERGE,MESSAGE_TEXT,MICROSECOND,MIDDLEINT,MIGRATE,MIN_ROWS,MINUTE,MINUTE_MICROSECOND,MINUTE_SECOND,MOD,MODE,MODIFIES,MODIFY,MONTH,MULTILINESTRING,MULTIPOINT,MULTIPOLYGON,MUTEX,MYSQL_ERRNO,NAME,NAMES,NATIONAL,NATURAL,NCHAR,NDB,NDBCLUSTER,NEVER,NEW,NEXT,NO,NO_WAIT,NO_WRITE_TO_BINLOG,NODEGROUP,NONBLOCKING,NONE,NOT,NUMBER,NUMERIC,NVARCHAR,OFFSET,OLD_PASSWORD,ON,ONE,ONLY,OPEN,OPTIMIZE,OPTIMIZER_COSTS,OPTION,OPTIONALLY,OPTIONS,OR,ORDER,OUT,OUTER,OUTFILE,OWNER,PACK_KEYS,PAGE,PARSE_GCOL_EXPR,PARSER,PARTIAL,PARTITION,PARTITIONING,PARTITIONS,PASSWORD,PHASE,PLUGIN,PLUGIN_DIR,PLUGINS,POINT,POLYGON,PORT,PRECEDES,PRECISION,PREPARE,PRESERVE,PREV,PRIMARY,PRIVILEGES,PROCEDURE,PROCESSLIST,PROFILE,PROFILES,PROXY,PURGE,QUARTER,QUERY,QUICK,RANGE,READ,READ_ONLY,READ_WRITE,READS,REAL,REBUILD,RECOVER,REDO_BUFFER_SIZE,REDOFILE,REDUNDANT,REFERENCES,REGEXP,RELAY,RELAY_LOG_FILE,RELAY_LOG_POS,RELAY_THREAD,RELAYLOG,RELEASE,RELOAD,REMOVE," + + "RENAME,REORGANIZE,REPAIR,REPEAT,REPEATABLE,REPLACE,REPLICATE_DO_DB,REPLICATE_DO_TABLE,REPLICATE_IGNORE_DB,REPLICATE_IGNORE_TABLE,REPLICATE_REWRITE_DB,REPLICATE_WILD_DO_TABLE,REPLICATE_WILD_IGNORE_TABLE,REPLICATION,REQUIRE,RESET,RESIGNAL,RESTORE,RESTRICT,RESUME,RETURN,RETURNED_SQLSTATE,RETURNS,REVERSE,REVOKE,RIGHT,RLIKE,ROLLBACK,ROLLUP,ROTATE,ROUTINE,ROW,ROW_COUNT,ROW_FORMAT,ROWS,RTREE,SAVEPOINT,SCHEDULE,SCHEMA,SCHEMA_NAME,SCHEMAS,SECOND,SECOND_MICROSECOND,SECURITY,SELECT,SENSITIVE,SEPARATOR,SERIAL,SERIALIZABLE,SERVER,SESSION,SET,SHARE,SHOW,SHUTDOWN,SIGNAL,SIGNED,SIMPLE,SLAVE,SLOW,SMALLINT,SNAPSHOT,SOCKET,SOME,SONAME,SOUNDS,SOURCE,SPATIAL,SPECIFIC,SQL,SQL_AFTER_GTIDS,SQL_AFTER_MTS_GAPS,SQL_BEFORE_GTIDS,SQL_BIG_RESULT,SQL_BUFFER_RESULT,SQL_CACHE,SQL_CALC_FOUND_ROWS,SQL_NO_CACHE,SQL_SMALL_RESULT,SQL_THREAD,SQL_TSI_DAY,SQL_TSI_HOUR,SQL_TSI_MINUTE,SQL_TSI_MONTH,SQL_TSI_QUARTER,SQL_TSI_SECOND,SQL_TSI_WEEK,SQL_TSI_YEAR,SQLEXCEPTION,SQLSTATE,SQLWARNING,SSL,STACKED," + + "START," + "STARTING,STARTS,STATS_AUTO_RECALC,STATS_PERSISTENT,STATS_SAMPLE_PAGES,STATUS,STOP,STORAGE,STORED,STRAIGHT_JOIN,STRING,SUBCLASS_ORIGIN,SUBJECT,SUBPARTITION,SUBPARTITIONS,SUPER,SUSPEND,SWAPS,SWITCHES,TABLE,TABLE_CHECKSUM,TABLE_NAME,TABLES,TABLESPACE,TEMPORARY,TEMPTABLE,TERMINATED,TEXT,THAN,THEN,TIME,TIMESTAMP,TIMESTAMPADD,TIMESTAMPDIFF,TINYBLOB,TINYINT,TINYTEXT,TO,TRAILING,TRANSACTION,TRIGGER,TRIGGERS,TRUNCATE,TYPE,TYPES,UNCOMMITTED,UNDEFINED,UNDO,UNDO_BUFFER_SIZE,UNDOFILE,UNICODE,UNINSTALL,UNION,UNIQUE,UNKNOWN,UNLOCK,UNSIGNED,UNTIL,UPDATE,UPGRADE,USAGE,USE,USE_FRM,USER,USER_RESOURCES,USING,UTC_DATE,UTC_TIME,UTC_TIMESTAMP,VALIDATION,VALUE,VALUES,VARBINARY,VARCHAR,VARCHARACTER,VARIABLES,VARYING,VIEW,VIRTUAL,WAIT,WARNINGS,WEEK,WEIGHT_STRING,WHEN,WHERE,WHILE,WITH,WITHOUT,WORK,WRAPPER,WRITE,X509,XA,XID,XML,XOR,YEAR,YEAR_MONTH,ZEROFILL,FALSE,TRUE"; +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/HTableManager.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/HTableManager.java new file mode 100755 index 0000000000..c36114fbb8 --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/HTableManager.java @@ -0,0 +1,19 @@ +package com.alibaba.datax.plugin.reader.obhbasereader; + +import com.alipay.oceanbase.hbase.OHTable; +import org.apache.hadoop.conf.Configuration; + +import java.io.IOException; + +public final class HTableManager { + + public static OHTable createHTable(Configuration config, String tableName) throws IOException { + return new OHTable(config, tableName); + } + + public static void closeHTable(OHTable hTable) throws IOException { + if (hTable != null) { + hTable.close(); + } + } +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/HbaseColumnCell.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/HbaseColumnCell.java new file mode 100755 index 0000000000..1f794ae0aa --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/HbaseColumnCell.java @@ -0,0 +1,124 @@ +package com.alibaba.datax.plugin.reader.obhbasereader; + +import com.alibaba.datax.common.base.BaseObject; +import com.alibaba.datax.plugin.reader.obhbasereader.enums.ColumnType; +import com.alibaba.datax.plugin.reader.obhbasereader.util.ObHbaseReaderUtil; + +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.Validate; +import org.apache.hadoop.hbase.util.Bytes; + +/** + * 描述 hbasereader 插件中,column 配置中的一个单元项实体 + */ +public class HbaseColumnCell extends BaseObject { + private ColumnType columnType; + + // columnName 格式为:列族:列名 + private String columnName; + + private byte[] cf; + private byte[] qualifier; + + //对于常量类型,其常量值放到 columnValue 里 + private String columnValue; + + //当配置了 columnValue 时,isConstant=true(这个成员变量是用于方便使用本类的地方判断是否是常量类型字段) + private boolean isConstant; + + // 只在类型是时间类型时,才会设置该值,无默认值。形式如:yyyy-MM-dd HH:mm:ss + private String dateformat; + + private HbaseColumnCell(Builder builder) { + this.columnType = builder.columnType; + + //columnName 和 columnValue 必须有一个为 null + Validate.isTrue(builder.columnName == null || builder.columnValue == null, "In obhbasereader, column cannot configure both column name and column value. Choose one of them."); + + //columnName 和 columnValue 不能都为 null + Validate.isTrue(builder.columnName != null || builder.columnValue != null, "In obhbasereader, column cannot configure both column name and column value. Choose one of them."); + + if (builder.columnName != null) { + this.isConstant = false; + this.columnName = builder.columnName; + + // 如果 columnName 不是 rowkey,则必须配置为:列族:列名 格式 + if (!ObHbaseReaderUtil.isRowkeyColumn(this.columnName)) { + + String promptInfo = "In obhbasereader, the column configuration format of column should be: 'family:column'. The column you configured is wrong:" + this.columnName; + String[] cfAndQualifier = this.columnName.split(":"); + Validate.isTrue(cfAndQualifier.length == 2 && StringUtils.isNotBlank(cfAndQualifier[0]) && StringUtils.isNotBlank(cfAndQualifier[1]), promptInfo); + + this.cf = Bytes.toBytes(cfAndQualifier[0].trim()); + this.qualifier = Bytes.toBytes(cfAndQualifier[1].trim()); + } + } else { + this.isConstant = true; + this.columnValue = builder.columnValue; + } + + if (builder.dateformat != null) { + this.dateformat = builder.dateformat; + } + } + + public ColumnType getColumnType() { + return columnType; + } + + public String getColumnName() { + return columnName; + } + + public byte[] getCf() { + return cf; + } + + public byte[] getQualifier() { + return qualifier; + } + + public String getDateformat() { + return dateformat; + } + + public String getColumnValue() { + return columnValue; + } + + public boolean isConstant() { + return isConstant; + } + + // 内部 builder 类 + public static class Builder { + private ColumnType columnType; + private String columnName; + private String columnValue; + + private String dateformat; + + public Builder(ColumnType columnType) { + this.columnType = columnType; + } + + public Builder columnName(String columnName) { + this.columnName = columnName; + return this; + } + + public Builder columnValue(String columnValue) { + this.columnValue = columnValue; + return this; + } + + public Builder dateformat(String dateformat) { + this.dateformat = dateformat; + return this; + } + + public HbaseColumnCell build() { + return new HbaseColumnCell(this); + } + } +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/HbaseReaderErrorCode.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/HbaseReaderErrorCode.java new file mode 100755 index 0000000000..551b19b630 --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/HbaseReaderErrorCode.java @@ -0,0 +1,36 @@ +package com.alibaba.datax.plugin.reader.obhbasereader; + +import com.alibaba.datax.common.spi.ErrorCode; + +public enum HbaseReaderErrorCode implements ErrorCode { + REQUIRED_VALUE("ObHbaseReader-00", "Missing required parameters."), + ILLEGAL_VALUE("ObHbaseReader-01", "Illegal configuration."), + PREPAR_READ_ERROR("ObHbaseReader-02", "Preparing to read ObHBase error."), + SPLIT_ERROR("ObHbaseReader-03", "Splitting ObHBase table error."), + INIT_TABLE_ERROR("ObHbaseReader-04", "Initializing ObHBase extraction table error"), + PARSE_COLUMN_ERROR("ObHbaseReader-05", "Parse column failed."), + READ_ERROR("ObHbaseReader-06", "Read ObHBase error."); + + private final String code; + private final String description; + + private HbaseReaderErrorCode(String code, String description) { + this.code = code; + this.description = description; + } + + @Override + public String getCode() { + return this.code; + } + + @Override + public String getDescription() { + return this.description; + } + + @Override + public String toString() { + return String.format("Code:[%s], Description:[%s]. ", this.code, this.description); + } +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/Key.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/Key.java new file mode 100755 index 0000000000..6415efd098 --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/Key.java @@ -0,0 +1,103 @@ +package com.alibaba.datax.plugin.reader.obhbasereader; + +public final class Key { + + public final static String HBASE_CONFIG = "hbaseConfig"; + + /** + * mode 可以取 normal 或者 multiVersionFixedColumn 或者 multiVersionDynamicColumn 三个值,无默认值。 + *

+ * normal 配合 column(Map 结构的)使用 + *

+ * multiVersionFixedColumn 配合 maxVersion,tetradType, column(List 结构的)使用 + *

+ * multiVersionDynamicColumn 配合 maxVersion,tetradType, columnFamily(List 结构的)使用 + */ + public final static String MODE = "mode"; + + /** + * 配合 mode = multiVersion 时使用,指明需要读取的版本个数。无默认值 + * -1 表示去读全部版本 + * 不能为0,1 + * >1 表示最多读取对应个数的版本数(不能超过 Integer 的最大值) + */ + public final static String MAX_VERSION = "maxVersion"; + + /** + * 多版本情况下,必须配置 四元组的类型(rowkey,column,timestamp,value) + */ + public final static String TETRAD_TYPE = "tetradType"; + + /** + * 默认为 utf8 + */ + public final static String ENCODING = "encoding"; + + public final static String TABLE = "table"; + + public final static String USERNAME = "username"; + + public final static String OB_SYS_USERNAME = "obSysUser"; + + public final static String CONFIG_URL = "obConfigUrl"; + + public final static String ODP_HOST = "odpHost"; + + public final static String ODP_PORT = "odpPort"; + + public final static String DB_NAME = "dbName"; + + public final static String PASSWORD = "password"; + + public final static String OB_SYS_PASSWORD = "obSysPassword"; + + public final static String COLUMN_FAMILY = "columnFamily"; + + public final static String COLUMN = "column"; + + public final static String START_ROWKEY = "startRowkey"; + + public final static String END_ROWKEY = "endRowkey"; + + public final static String IS_BINARY_ROWKEY = "isBinaryRowkey"; + + public final static String SCAN_CACHE = "scanCache"; + + public final static String RS_URL = "rsUrl"; + + public final static String MAX_ACTIVE_CONNECTION = "maxActiveConnection"; + + public final static int DEFAULT_MAX_ACTIVE_CONNECTION = 2000; + + public final static String TIMEOUT = "timeout"; + + public final static long DEFAULT_TIMEOUT = 30; + + public final static String PARTITION_NAME = "partitionName"; + + public final static String JDBC_URL = "jdbcUrl"; + + public final static String TIMEZONE = "timezone"; + + public final static String FETCH_SIZE = "fetchSize"; + + public final static String READ_BATCH_SIZE = "readBatchSize"; + + public final static String SESSION = "session"; + + public final static String READER_HINT = "readerHint"; + + public final static String QUERY_SQL = "querySql"; + + public final static String SAMPLE_PERCENTAGE = "samplePercentage"; + // 是否使用独立密码 + public final static String USE_SPECIAL_SECRET = "useSpecialSecret"; + + public final static String USE_SQL_READER = "useSqlReader"; + + public final static String USE_ODP_MODE = "useOdpMode"; + + public final static String RANGE = "range"; + + public final static String READ_BY_PARTITION = "readByPartition"; +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/LocalStrings.properties b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/LocalStrings.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/LocalStrings_en_US.properties b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/LocalStrings_en_US.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/LocalStrings_ja_JP.properties b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/LocalStrings_ja_JP.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/LocalStrings_zh_CN.properties b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/LocalStrings_zh_CN.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/LocalStrings_zh_HK.properties b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/LocalStrings_zh_HK.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/LocalStrings_zh_TW.properties b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/LocalStrings_zh_TW.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/ObHbaseReader.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/ObHbaseReader.java new file mode 100755 index 0000000000..15472d6eaf --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/ObHbaseReader.java @@ -0,0 +1,445 @@ +package com.alibaba.datax.plugin.reader.obhbasereader; + +import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_OB_TABLE_CLIENT_LOG_LEVEL; +import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_OB_TABLE_HBASE_LOG_LEVEL; +import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_USE_ODPMODE; +import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL; +import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL; +import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_HBASE_LOG_PATH; +import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_TABLE_CLIENT_LOG_LEVEL; +import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_TABLE_CLIENT_PROPERTY; +import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_TABLE_HBASE_LOG_LEVEL; +import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_TABLE_HBASE_PROPERTY; +import static org.apache.commons.lang3.StringUtils.EMPTY; + +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.RecordSender; +import com.alibaba.datax.common.spi.Reader; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.reader.Constant; +import com.alibaba.datax.plugin.rdbms.reader.util.ObVersion; +import com.alibaba.datax.plugin.rdbms.util.DBUtil; +import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; +import com.alibaba.datax.plugin.rdbms.util.TableExpandUtil; +import com.alibaba.datax.plugin.reader.obhbasereader.enums.ModeType; +import com.alibaba.datax.plugin.reader.obhbasereader.ext.ServerConnectInfo; +import com.alibaba.datax.plugin.reader.obhbasereader.task.AbstractHbaseTask; +import com.alibaba.datax.plugin.reader.obhbasereader.task.SQLNormalModeReader; +import com.alibaba.datax.plugin.reader.obhbasereader.task.ScanMultiVersionReader; +import com.alibaba.datax.plugin.reader.obhbasereader.task.ScanNormalModeReader; +import com.alibaba.datax.plugin.reader.obhbasereader.util.HbaseSplitUtil; +import com.alibaba.datax.plugin.reader.obhbasereader.util.ObHbaseReaderUtil; +import com.alibaba.datax.plugin.reader.obhbasereader.util.SqlReaderSplitUtil; +import com.alibaba.datax.plugin.reader.oceanbasev10reader.util.ObReaderUtils; + +import com.google.common.base.Preconditions; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.Validate; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.Connection; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +/** + * ObHbaseReader 支持分库分表 + * 仅支持ob3.x及以上版本 + */ +public class ObHbaseReader extends Reader { + + public static class Job extends Reader.Job { + static private final String ACCESS_DENIED_ERROR = "Access denied for user"; + private static Logger LOG = LoggerFactory.getLogger(ObHbaseReader.class); + private Configuration originalConfig; + + @Override + public void init() { + if (System.getProperty(OB_TABLE_CLIENT_PROPERTY) == null) { + LOG.info(OB_TABLE_CLIENT_PROPERTY + " not set"); + System.setProperty(OB_TABLE_CLIENT_PROPERTY, OB_HBASE_LOG_PATH); + } + if (System.getProperty(OB_TABLE_HBASE_PROPERTY) == null) { + LOG.info(OB_TABLE_HBASE_PROPERTY + " not set"); + System.setProperty(OB_TABLE_HBASE_PROPERTY, OB_HBASE_LOG_PATH); + } + if (System.getProperty(OB_TABLE_CLIENT_LOG_LEVEL) == null) { + LOG.info(OB_TABLE_CLIENT_LOG_LEVEL + " not set"); + System.setProperty(OB_TABLE_CLIENT_LOG_LEVEL, DEFAULT_OB_TABLE_CLIENT_LOG_LEVEL); + } + if (System.getProperty(OB_TABLE_HBASE_LOG_LEVEL) == null) { + LOG.info(OB_TABLE_HBASE_LOG_LEVEL + " not set"); + System.setProperty(OB_TABLE_HBASE_LOG_LEVEL, DEFAULT_OB_TABLE_HBASE_LOG_LEVEL); + } + if (System.getProperty(OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL) == null) { + LOG.info(OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL + " not set"); + System.setProperty(OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL, DEFAULT_OB_TABLE_CLIENT_LOG_LEVEL); + } + if (System.getProperty(OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL) == null) { + LOG.info(OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL + " not set"); + System.setProperty(OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL, DEFAULT_OB_TABLE_HBASE_LOG_LEVEL); + } + + LOG.info("{} is set to {}, {} is set to {}", + OB_TABLE_CLIENT_PROPERTY, OB_HBASE_LOG_PATH, OB_TABLE_HBASE_PROPERTY, OB_HBASE_LOG_PATH); + this.originalConfig = super.getPluginJobConf(); + ObHbaseReaderUtil.doPretreatment(originalConfig); + List conns = originalConfig.getList(Constant.CONN_MARK, Object.class); + // 逻辑表配置 + Preconditions.checkArgument(CollectionUtils.isNotEmpty(conns), "connection information is empty."); + dealLogicConnAndTable(conns); + if (LOG.isDebugEnabled()) { + LOG.debug("After init(), now originalConfig is:\n{}\n", this.originalConfig); + } + } + + @Override + public void destroy() { + } + + private void dealLogicConnAndTable(List conns) { + String unifiedUsername = originalConfig.getString(Key.USERNAME); + String unifiedPassword = originalConfig.getString(Key.PASSWORD); + boolean useSqlReader = originalConfig.getBool(Key.USE_SQL_READER, com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_USE_SQLREADER); + boolean checkSlave = originalConfig.getBool(com.alibaba.datax.plugin.rdbms.reader.Key.CHECK_SLAVE, false); + Set keywords = Arrays.stream(com.alibaba.datax.plugin.reader.obhbasereader.Constant.OBMYSQL_KEYWORDS.split(",")).collect(Collectors.toSet()); + List preSql = originalConfig.getList(com.alibaba.datax.plugin.rdbms.reader.Key.PRE_SQL, String.class); + + int tableNum = 0; + + for (int i = 0, len = conns.size(); i < len; i++) { + Configuration connConf = Configuration.from(conns.get(i).toString()); + String curUsername = connConf.getString(Key.USERNAME, unifiedUsername); + Preconditions.checkArgument(StringUtils.isNotEmpty(curUsername), "username is empty."); + String curPassword = connConf.getString(Key.PASSWORD, unifiedPassword); + + originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, i, Key.USERNAME), curUsername); + originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, i, Key.PASSWORD), curPassword); + + List jdbcUrls = connConf.getList(Key.JDBC_URL, new ArrayList<>(), String.class); + String jdbcUrl; + if (useSqlReader) { + // sql模式下,jdbcUrl必须配置,只有使用sql模式的情况才检查地址 + Preconditions.checkArgument(CollectionUtils.isNotEmpty(jdbcUrls), "if using sql mode, jdbcUrl is needed"); + jdbcUrl = DBUtil.chooseJdbcUrlWithoutRetry(DataBaseType.MySql, jdbcUrls, curUsername, curPassword, preSql, checkSlave); + jdbcUrl = DataBaseType.MySql.appendJDBCSuffixForReader(jdbcUrl); + // 回写到connection[i].jdbcUrl + originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, i, Key.JDBC_URL), jdbcUrl); + LOG.info("Available jdbcUrl:{}.", jdbcUrl); + } else { + jdbcUrl = jdbcUrls.get(0); + jdbcUrl = StringUtils.isNotBlank(jdbcUrl) ? DataBaseType.MySql.appendJDBCSuffixForReader(jdbcUrl) : EMPTY; + checkAndSetHbaseConnConf(jdbcUrl, curUsername, curPassword, connConf, i); + } + + // table 方式 + // 对每一个connection 上配置的table 项进行解析(已对表名称进行了 ` 处理的) + List tables = connConf.getList(Key.TABLE, String.class); + + List expandedTables = TableExpandUtil.expandTableConf(DataBaseType.MySql, tables); + + if (expandedTables.isEmpty()) { + throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_VALUE, "The specified table list is empty."); + } + + for (int ti = 0; ti < expandedTables.size(); ti++) { + String tableName = expandedTables.get(ti); + if (keywords.contains(tableName.toUpperCase())) { + expandedTables.set(ti, "`" + tableName + "`"); + } + } + tableNum += expandedTables.size(); + originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, i, Key.TABLE), expandedTables); + } + + if (tableNum == 0) { + // 分库分表读,未匹配到可以抽取的表 + LOG.error("sharding rule result is empty."); + throw DataXException.asDataXException("No tables were matched"); + } + originalConfig.set(Constant.TABLE_NUMBER_MARK, tableNum); + } + + /** + * In public cloud, only odp mode can be used. + * In private cloud, both odp mode and ocp mode can be used. + * + * @param jdbcUrl + * @param curUsername + * @param curPassword + * @param connConf + */ + private void checkAndSetHbaseConnConf(String jdbcUrl, String curUsername, String curPassword, Configuration connConf, int curIndex) { + ServerConnectInfo serverConnectInfo = new ServerConnectInfo(jdbcUrl, curUsername, curPassword); + if (!originalConfig.getBool(Key.USE_ODP_MODE, false)) { + // Normally, only need to query at first time + // In ocp mode, dbName, configUrl, sysUser and sysPass are needed. + String sysUser = connConf.getString(Key.OB_SYS_USERNAME, originalConfig.getString(Key.OB_SYS_USERNAME)); + String sysPass = connConf.getString(Key.OB_SYS_PASSWORD, originalConfig.getString(Key.OB_SYS_PASSWORD)); + serverConnectInfo.setSysUser(sysUser); + serverConnectInfo.setSysPass(sysPass); + String configUrl = connConf.getString(Key.CONFIG_URL, originalConfig.getString(Key.CONFIG_URL)); + if (StringUtils.isBlank(configUrl)) { + configUrl = queryRsUrl(serverConnectInfo); + } + originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, curIndex, Key.USERNAME), curUsername); + originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, curIndex, Key.OB_SYS_USERNAME), serverConnectInfo.sysUser); + originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, curIndex, Key.OB_SYS_PASSWORD), serverConnectInfo.sysPass); + originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, curIndex, Key.CONFIG_URL), configUrl); + } else { + // In odp mode, dbName, odp host and odp port are needed. + String odpHost = connConf.getString(Key.ODP_HOST, serverConnectInfo.host); + String odpPort = connConf.getString(Key.ODP_PORT, serverConnectInfo.port); + originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, curIndex, Key.ODP_HOST), odpHost); + originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, curIndex, Key.ODP_PORT), odpPort); + } + originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, curIndex, Key.DB_NAME), serverConnectInfo.databaseName); + } + + private String queryRsUrl(ServerConnectInfo serverInfo) { + Preconditions.checkArgument(checkVersionAfterV3(serverInfo.jdbcUrl, serverInfo.getFullUserName(), serverInfo.password), "ob before 3.x is not supported."); + String configUrl = originalConfig.getString(Key.CONFIG_URL, null); + if (configUrl == null) { + try { + Connection conn = null; + int retry = 0; + final String sysJDBCUrl = serverInfo.jdbcUrl.replace(serverInfo.databaseName, "oceanbase"); + do { + try { + if (retry > 0) { + int sleep = retry > 9 ? 500 : 1 << retry; + try { + TimeUnit.SECONDS.sleep(sleep); + } catch (InterruptedException e) { + } + LOG.warn("retry fetch RsUrl the {} times", retry); + } + conn = DBUtil.getConnection(DataBaseType.OceanBase, sysJDBCUrl, serverInfo.sysUser, serverInfo.sysPass); + String sql = "show parameters like 'obconfig_url'"; + LOG.info("query param: {}", sql); + PreparedStatement stmt = conn.prepareStatement(sql); + ResultSet result = stmt.executeQuery(); + if (result.next()) { + configUrl = result.getString("Value"); + } + if (StringUtils.isNotBlank(configUrl)) { + break; + } + } catch (Exception e) { + ++retry; + LOG.warn("fetch root server list(rsList) error {}", e.getMessage()); + } finally { + DBUtil.closeDBResources(null, conn); + } + } while (retry < 3); + + LOG.info("configure url is: " + configUrl); + originalConfig.set(Key.CONFIG_URL, configUrl); + } catch (Exception e) { + LOG.error("Fail to get configure url: {}", e.getMessage(), e); + throw DataXException.asDataXException(HbaseReaderErrorCode.REQUIRED_VALUE, "未配置obConfigUrl,且无法获取obConfigUrl"); + } + } + return configUrl; + } + + @Override + public void prepare() { + } + + @Override + public void post() { + } + + @Override + public List split(int adviceNumber) { + Map hbaseColumnCells = ObHbaseReaderUtil.parseColumn(originalConfig.getList(Key.COLUMN, Map.class)); + if (hbaseColumnCells.size() == 0) { + LOG.error("no column cells specified."); + throw new RuntimeException("no column cells specified"); + } + String columnFamily = ObHbaseReaderUtil.parseColumnFamily(hbaseColumnCells.values()); + Preconditions.checkArgument(StringUtils.isNotEmpty(columnFamily), "column family is empty."); + List conns = originalConfig.getList(Constant.CONN_MARK, Object.class); + Preconditions.checkArgument(conns != null && !conns.isEmpty(), "connection information is necessary."); + return splitLogicTables(adviceNumber, conns, columnFamily); + } + + private List splitLogicTables(int adviceNumber, List conns, String columnFamily) { + // adviceNumber这里是channel数量大小, 即datax并发task数量 + // eachTableShouldSplittedNumber是单表应该切分的份数 + int eachTableShouldSplittedNumber = (int) Math.ceil(1.0 * adviceNumber / originalConfig.getInt(Constant.TABLE_NUMBER_MARK)); + boolean useSqlReader = originalConfig.getBool(Key.USE_SQL_READER, com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_USE_SQLREADER); + boolean odpMode = originalConfig.getBool(Key.USE_ODP_MODE, DEFAULT_USE_ODPMODE); + boolean readByPartition = originalConfig.getBool(Key.READ_BY_PARTITION, false); + List splittedConfigs = new ArrayList<>(); + + for (int i = 0, len = conns.size(); i < len; i++) { + Configuration sliceConfig = originalConfig.clone(); + Configuration connConf = Configuration.from(conns.get(i).toString()); + copyConnConfByMode(useSqlReader, odpMode, sliceConfig, connConf); + // 说明是配置的 table 方式 + // 已在之前进行了扩展和`处理,可以直接使用 + List tables = connConf.getList(Key.TABLE, String.class); + Validate.isTrue(null != tables && !tables.isEmpty(), "error in your configuration for the reading database table."); + int tempEachTableShouldSplittedNumber = eachTableShouldSplittedNumber; + if (tables.size() == 1) { + Integer splitFactor = originalConfig.getInt(com.alibaba.datax.plugin.rdbms.reader.Key.SPLIT_FACTOR, Constant.SPLIT_FACTOR); + tempEachTableShouldSplittedNumber = eachTableShouldSplittedNumber * splitFactor; + } + for (String table : tables) { + Configuration tempSlice; + tempSlice = sliceConfig.clone(); + tempSlice.set(Key.TABLE, table); + splittedConfigs.addAll( + useSqlReader ? SqlReaderSplitUtil.splitSingleTable(tempSlice, table, columnFamily, tempEachTableShouldSplittedNumber, readByPartition) : HbaseSplitUtil.split(tempSlice)); + } + } + return splittedConfigs; + } + + private void copyConnConfByMode(boolean useSqlReader, boolean odpMode, Configuration targetConf, Configuration sourceConnConf) { + String username = sourceConnConf.getNecessaryValue(Key.USERNAME, DBUtilErrorCode.REQUIRED_VALUE); + targetConf.set(Key.USERNAME, username); + String password = sourceConnConf.getNecessaryValue(Key.PASSWORD, DBUtilErrorCode.REQUIRED_VALUE); + targetConf.set(Key.PASSWORD, password); + + if (useSqlReader) { + String jdbcUrl = sourceConnConf.getNecessaryValue(Key.JDBC_URL, DBUtilErrorCode.REQUIRED_VALUE); + targetConf.set(Key.JDBC_URL, jdbcUrl); + } else if (odpMode) { + String dbName = sourceConnConf.getNecessaryValue(Key.DB_NAME, DBUtilErrorCode.REQUIRED_VALUE); + targetConf.set(Key.DB_NAME, dbName); + String odpHost = sourceConnConf.getNecessaryValue(Key.ODP_HOST, DBUtilErrorCode.REQUIRED_VALUE); + targetConf.set(Key.ODP_HOST, odpHost); + String odpPort = sourceConnConf.getNecessaryValue(Key.ODP_PORT, DBUtilErrorCode.REQUIRED_VALUE); + targetConf.set(Key.ODP_PORT, odpPort); + } else { + String dbName = sourceConnConf.getNecessaryValue(Key.DB_NAME, DBUtilErrorCode.REQUIRED_VALUE); + targetConf.set(Key.DB_NAME, dbName); + String sysUser = sourceConnConf.getNecessaryValue(Key.OB_SYS_USERNAME, DBUtilErrorCode.REQUIRED_VALUE); + targetConf.set(Key.OB_SYS_USERNAME, sysUser); + String sysPass = sourceConnConf.getString(Key.OB_SYS_PASSWORD); + targetConf.set(Key.OB_SYS_PASSWORD, sysPass); + } + targetConf.remove(Constant.CONN_MARK); + } + + private boolean checkVersionAfterV3(String jdbcUrl, String username, String password) { + int retryLimit = 3; + int retryCount = 0; + Connection conn = null; + while (retryCount++ <= retryLimit) { + try { + conn = DBUtil.getConnectionWithoutRetry(DataBaseType.MySql, jdbcUrl, username, password); + ObVersion obVersion = ObReaderUtils.getObVersion(conn); + return ObVersion.V3.compareTo(obVersion) <= 0; + } catch (Exception e) { + LOG.error("fail to check ob version, will retry: " + e.getMessage()); + if (e.getMessage().contains(ACCESS_DENIED_ERROR)) { + throw new RuntimeException(e); + } + try { + TimeUnit.SECONDS.sleep(1); + } catch (Exception ex) { + LOG.error("interrupted while waiting for retry."); + } + } finally { + DBUtil.closeDBResources(null, conn); + } + } + return false; + } + } + + public static class Task extends Reader.Task { + private static Logger LOG = LoggerFactory.getLogger(Task.class); + private Configuration taskConfig; + private AbstractHbaseTask hbaseTaskProxy; + + @Override + public void init() { + this.taskConfig = super.getPluginJobConf(); + + String mode = this.taskConfig.getString(Key.MODE); + ModeType modeType = ModeType.getByTypeName(mode); + boolean useSqlReader = this.taskConfig.getBool(Key.USE_SQL_READER, com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_USE_SQLREADER); + LOG.info("init reader with mode: " + modeType); + + switch (modeType) { + case Normal: + this.hbaseTaskProxy = useSqlReader ? new SQLNormalModeReader(this.taskConfig) : new ScanNormalModeReader(this.taskConfig); + break; + case MultiVersionFixedColumn: + this.hbaseTaskProxy = new ScanMultiVersionReader(this.taskConfig); + break; + default: + throw DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE, "This type of mode is not supported by hbasereader:" + modeType); + } + } + + @Override + public void destroy() { + if (this.hbaseTaskProxy != null) { + try { + this.hbaseTaskProxy.close(); + } catch (Exception e) { + // + } + } + } + + @Override + public void prepare() { + try { + this.hbaseTaskProxy.prepare(); + } catch (Exception e) { + throw DataXException.asDataXException(HbaseReaderErrorCode.PREPAR_READ_ERROR, e); + } + } + + @Override + public void post() { + super.post(); + } + + @Override + public void startRead(RecordSender recordSender) { + Record record = recordSender.createRecord(); + boolean fetchOK; + int retryTimes = 0; + int maxRetryTimes = 3; + while (true) { + try { + // TODO check exception + fetchOK = this.hbaseTaskProxy.fetchLine(record); + } catch (Exception e) { + LOG.info("fetch record failed. reason: {}.", e.getMessage(), e); + super.getTaskPluginCollector().collectDirtyRecord(record, e); + if (retryTimes++ > maxRetryTimes) { + throw DataXException.asDataXException(HbaseReaderErrorCode.READ_ERROR, "read from obhbase failed", e); + } + record = recordSender.createRecord(); + continue; + } + if (fetchOK) { + recordSender.sendToWriter(record); + record = recordSender.createRecord(); + } else { + break; + } + } + recordSender.flush(); + } + } +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/enums/ColumnType.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/enums/ColumnType.java new file mode 100755 index 0000000000..ca4d73a73f --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/enums/ColumnType.java @@ -0,0 +1,44 @@ +package com.alibaba.datax.plugin.reader.obhbasereader.enums; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.plugin.reader.obhbasereader.HbaseReaderErrorCode; + +import java.util.Arrays; + +/** + * 只对 normal 模式读取时有用,多版本读取时,不存在列类型的 + */ +public enum ColumnType { + STRING("string"), + BINARY_STRING("binarystring"), + BYTES("bytes"), + BOOLEAN("boolean"), + SHORT("short"), + INT("int"), + LONG("long"), + FLOAT("float"), + DOUBLE("double"), + DATE("date"); + + private String typeName; + + ColumnType(String typeName) { + this.typeName = typeName; + } + + public static ColumnType getByTypeName(String typeName) { + for (ColumnType columnType : values()) { + if (columnType.typeName.equalsIgnoreCase(typeName)) { + return columnType; + } + } + + throw DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE, + String.format("The type %s is not supported by hbasereader, currently supported type is:%s .", typeName, Arrays.asList(values()))); + } + + @Override + public String toString() { + return this.typeName; + } +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/enums/FetchVersion.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/enums/FetchVersion.java new file mode 100644 index 0000000000..2bf273c8d1 --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/enums/FetchVersion.java @@ -0,0 +1,28 @@ +package com.alibaba.datax.plugin.reader.obhbasereader.enums; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.plugin.reader.obhbasereader.HbaseReaderErrorCode; + +import java.util.Arrays; +import java.util.Optional; +import java.util.stream.Stream; + +public enum FetchVersion { + + OLDEST("oldest"), LATEST("latest"); + + private final String version; + + FetchVersion(String version) { + this.version = version; + } + + public static FetchVersion getByDesc(String name) { + Optional result = Stream.of(values()).filter(v -> v.version.equalsIgnoreCase(name)) + .findFirst(); + return result.orElseThrow(() -> { + return DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE, + String.format("obHBasereader 不支持该类型:%s, 目前支持的类型是:%s", name, Arrays.asList(values()))); + }); + } +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/enums/ModeType.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/enums/ModeType.java new file mode 100644 index 0000000000..ccaf879632 --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/enums/ModeType.java @@ -0,0 +1,30 @@ +package com.alibaba.datax.plugin.reader.obhbasereader.enums; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.plugin.reader.obhbasereader.HbaseReaderErrorCode; + +import java.util.Arrays; + +public enum ModeType { + Normal("normal"), + MultiVersionFixedColumn("multiVersionFixedColumn"), + MultiVersionDynamicColumn("multiVersionDynamicColumn"), + ; + + private String mode; + + ModeType(String mode) { + this.mode = mode.toLowerCase(); + } + + public static ModeType getByTypeName(String modeName) { + for (ModeType modeType : values()) { + if (modeType.mode.equalsIgnoreCase(modeName)) { + return modeType; + } + } + + throw DataXException.asDataXException( + HbaseReaderErrorCode.ILLEGAL_VALUE, String.format("The mode type is not supported by hbasereader:%s, and the currently supported mode type is:%s", modeName, Arrays.asList(values()))); + } +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/ext/ServerConnectInfo.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/ext/ServerConnectInfo.java new file mode 100644 index 0000000000..7dca6f5324 --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/ext/ServerConnectInfo.java @@ -0,0 +1,146 @@ +package com.alibaba.datax.plugin.reader.obhbasereader.ext; + +import com.google.common.base.Preconditions; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import static org.apache.commons.lang3.StringUtils.EMPTY; + +public class ServerConnectInfo { + + public String clusterName; + public String tenantName; + // userName doesn't contain tenantName or clusterName + public String userName; + public String password; + public String databaseName; + public String ipPort; + public String jdbcUrl; + public String host; + public String port; + public boolean publicCloud; + public int rpcPort; + public String sysUser; + public String sysPass; + + /** + * + * @param jdbcUrl format is jdbc:oceanbase//ip:port + * @param username format is cluster:tenant:username or username@tenant#cluster or user@tenant or user + * @param password + */ + public ServerConnectInfo(final String jdbcUrl, final String username, final String password) { + this(jdbcUrl, username, password, null, null); + } + + public ServerConnectInfo(final String jdbcUrl, final String username, final String password, final String sysUser, final String sysPass) { + if (jdbcUrl.startsWith(com.alibaba.datax.plugin.rdbms.writer.Constant.OB10_SPLIT_STRING)) { + String[] ss = jdbcUrl.split(com.alibaba.datax.plugin.rdbms.writer.Constant.OB10_SPLIT_STRING_PATTERN); + Preconditions.checkArgument(ss.length == 3, "jdbc url format is not correct:" + jdbcUrl); + this.userName = username; + this.clusterName = ss[1].trim().split(":")[0]; + this.tenantName = ss[1].trim().split(":")[1]; + this.jdbcUrl = ss[2]; + } else { + this.jdbcUrl = jdbcUrl; + } + this.password = password; + this.sysUser = sysUser; + this.sysPass = sysPass; + parseJdbcUrl(jdbcUrl); + parseFullUserName(username); + } + + private void parseJdbcUrl(final String jdbcUrl) { + Pattern pattern = Pattern.compile("//([\\w\\.\\-]+:\\d+)/([\\w-]+)\\?"); + Matcher matcher = pattern.matcher(jdbcUrl); + if (matcher.find()) { + String ipPort = matcher.group(1); + String dbName = matcher.group(2); + this.ipPort = ipPort; + String[] hostPort = ipPort.split(":"); + this.host = hostPort[0]; + this.port = hostPort[1]; + this.databaseName = dbName; + this.publicCloud = host.endsWith("aliyuncs.com"); + } else { + throw new RuntimeException("Invalid argument:" + jdbcUrl); + } + } + + private void parseFullUserName(final String fullUserName) { + int tenantIndex = fullUserName.indexOf("@"); + int clusterIndex = fullUserName.indexOf("#"); + // 适用于jdbcUrl以||_dsc_ob10_dsc_开头的场景 + if (fullUserName.contains(":") && tenantIndex < 0) { + String[] names = fullUserName.split(":"); + if (names.length != 3) { + throw new RuntimeException("invalid argument: " + fullUserName); + } else { + this.clusterName = names[0]; + this.tenantName = names[1]; + this.userName = names[2]; + } + } else if (tenantIndex < 0) { + // 适用于short jdbcUrl,且username中不含租户名(主要是公有云场景,此场景下不计算分区) + this.userName = fullUserName; + this.clusterName = EMPTY; + this.tenantName = EMPTY; + } else { + // 适用于short jdbcUrl,且username中含租户名 + this.userName = fullUserName.substring(0, tenantIndex); + if (clusterIndex < 0) { + this.clusterName = EMPTY; + this.tenantName = fullUserName.substring(tenantIndex + 1); + } else { + this.clusterName = fullUserName.substring(clusterIndex + 1); + this.tenantName = fullUserName.substring(tenantIndex + 1, clusterIndex); + } + } + } + + @Override + public String toString() { + return "ServerConnectInfo{" + + "clusterName='" + clusterName + '\'' + + ", tenantName='" + tenantName + '\'' + + ", userName='" + userName + '\'' + + ", password='" + password + '\'' + + ", databaseName='" + databaseName + '\'' + + ", ipPort='" + ipPort + '\'' + + ", jdbcUrl='" + jdbcUrl + '\'' + + ", publicCloud=" + publicCloud + + ", rpcPort=" + rpcPort + + '}'; + } + + public String getFullUserName() { + StringBuilder builder = new StringBuilder(); + builder.append(userName); + if (publicCloud || (rpcPort != 0 && EMPTY.equals(clusterName))) { + return builder.toString(); + } + if (!EMPTY.equals(tenantName)) { + builder.append("@").append(tenantName); + } + + if (!EMPTY.equals(clusterName)) { + builder.append("#").append(clusterName); + } + if (EMPTY.equals(this.clusterName) && EMPTY.equals(this.tenantName)) { + return this.userName; + } + return builder.toString(); + } + + public void setRpcPort(int rpcPort) { + this.rpcPort = rpcPort; + } + + public void setSysUser(String sysUser) { + this.sysUser = sysUser; + } + + public void setSysPass(String sysPass) { + this.sysPass = sysPass; + } +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/task/AbstractHbaseTask.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/task/AbstractHbaseTask.java new file mode 100755 index 0000000000..6f43a8ac84 --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/task/AbstractHbaseTask.java @@ -0,0 +1,41 @@ +package com.alibaba.datax.plugin.reader.obhbasereader.task; + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.reader.obhbasereader.Constant; +import com.alibaba.datax.plugin.reader.obhbasereader.HbaseColumnCell; +import com.alibaba.datax.plugin.reader.obhbasereader.Key; +import com.alibaba.datax.plugin.reader.obhbasereader.enums.ModeType; +import com.alibaba.datax.plugin.reader.obhbasereader.util.ObHbaseReaderUtil; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +public abstract class AbstractHbaseTask { + protected String encoding; + protected String timezone = null; + protected Map hbaseColumnCellMap; + // 常量字段 + protected Map constantMap; + protected ModeType modeType; + + public AbstractHbaseTask() { + } + + public AbstractHbaseTask(Configuration configuration) { + this.timezone = configuration.getString(Key.TIMEZONE, Constant.DEFAULT_TIMEZONE); + this.encoding = configuration.getString(Key.ENCODING, Constant.DEFAULT_ENCODING); + String mode = configuration.getString(Key.MODE, "Normal"); + this.modeType = ModeType.getByTypeName(mode); + this.constantMap = new HashMap<>(); + this.hbaseColumnCellMap = ObHbaseReaderUtil.parseColumn(configuration.getList(Key.COLUMN, Map.class), constantMap, encoding, timezone); + } + + public abstract void prepare() throws Exception; + + public abstract boolean fetchLine(Record record) throws Exception; + + public abstract void close() throws IOException; +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/task/AbstractScanReader.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/task/AbstractScanReader.java new file mode 100755 index 0000000000..8d1e8ce364 --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/task/AbstractScanReader.java @@ -0,0 +1,99 @@ +package com.alibaba.datax.plugin.reader.obhbasereader.task; + +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.reader.obhbasereader.Constant; +import com.alibaba.datax.plugin.reader.obhbasereader.HTableManager; +import com.alibaba.datax.plugin.reader.obhbasereader.HbaseColumnCell; +import com.alibaba.datax.plugin.reader.obhbasereader.Key; +import com.alibaba.datax.plugin.reader.obhbasereader.util.ObHbaseReaderUtil; + +import com.alipay.oceanbase.hbase.OHTable; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.util.Bytes; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; + +public abstract class AbstractScanReader extends AbstractHbaseTask { + private static Logger LOG = LoggerFactory.getLogger(AbstractScanReader.class); + protected OHTable ohtable; + protected Result lastResult = null; + protected Scan scan; + protected ResultScanner resultScanner; + protected int maxVersion; + private int scanCache; + private byte[] startKey = null; + private byte[] endKey = null; + + public AbstractScanReader(Configuration configuration) { + super(configuration); + this.maxVersion = configuration.getInt(Key.MAX_VERSION, 1); + this.scanCache = configuration.getInt(Key.SCAN_CACHE, Constant.DEFAULT_SCAN_CACHE); + this.ohtable = ObHbaseReaderUtil.initOHtable(configuration); + this.startKey = ObHbaseReaderUtil.convertInnerStartRowkey(configuration); + this.endKey = ObHbaseReaderUtil.convertInnerEndRowkey(configuration); + LOG.info("The task set startRowkey=[{}], endRowkey=[{}].", Bytes.toStringBinary(this.startKey), Bytes.toStringBinary(this.endKey)); + } + + @Override + public void prepare() throws Exception { + this.scan = new Scan(); + this.scan.setSmall(false); + this.scan.setCacheBlocks(false); + this.scan.setStartRow(startKey); + this.scan.setStopRow(endKey); + LOG.info("The task set startRowkey=[{}], endRowkey=[{}].", Bytes.toStringBinary(this.startKey), Bytes.toStringBinary(this.endKey)); + this.scan.setCaching(this.scanCache); + if (this.maxVersion == -1 || this.maxVersion == Integer.MAX_VALUE) { + this.scan.setMaxVersions(); + } else { + this.scan.setMaxVersions(this.maxVersion); + } + initScanColumns(); + this.resultScanner = this.ohtable.getScanner(this.scan); + } + + @Override + public void close() throws IOException { + if (this.resultScanner != null) { + this.resultScanner.close(); + } + HTableManager.closeHTable(this.ohtable); + } + + protected void initScanColumns() { + boolean isConstant; + boolean isRowkeyColumn; + for (HbaseColumnCell cell : this.hbaseColumnCellMap.values()) { + isConstant = cell.isConstant(); + isRowkeyColumn = ObHbaseReaderUtil.isRowkeyColumn(cell.getColumnName()); + if (!isConstant && !isRowkeyColumn) { + LOG.info("columnFamily: " + new String(cell.getCf()) + ", qualifier: " + new String(cell.getQualifier())); + this.scan.addColumn(cell.getCf(), cell.getQualifier()); + } + } + } + + protected Result getNextHbaseRow() throws Exception { + Result result = null; + try { + result = resultScanner.next(); + } catch (Exception e) { + LOG.error("failed to get result", e); + if (lastResult != null) { + scan.setStartRow(lastResult.getRow()); + } + resultScanner = this.ohtable.getScanner(scan); + result = resultScanner.next(); + if (lastResult != null && Bytes.equals(lastResult.getRow(), result.getRow())) { + result = resultScanner.next(); + } + } + lastResult = result; + // may be null + return result; + } +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/task/SQLNormalModeReader.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/task/SQLNormalModeReader.java new file mode 100755 index 0000000000..327ac971fe --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/task/SQLNormalModeReader.java @@ -0,0 +1,257 @@ +package com.alibaba.datax.plugin.reader.obhbasereader.task; + +import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_READ_HINT; + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.util.DBUtil; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; +import com.alibaba.datax.plugin.reader.obhbasereader.Constant; +import com.alibaba.datax.plugin.reader.obhbasereader.HbaseColumnCell; +import com.alibaba.datax.plugin.reader.obhbasereader.HbaseReaderErrorCode; +import com.alibaba.datax.plugin.reader.obhbasereader.Key; +import com.alibaba.datax.plugin.reader.obhbasereader.enums.FetchVersion; +import com.alibaba.datax.plugin.reader.obhbasereader.util.ObHbaseReaderUtil; +import com.alibaba.datax.plugin.reader.oceanbasev10reader.util.ObReaderUtils; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Predicate; +import java.util.stream.Collectors; + +public class SQLNormalModeReader extends AbstractHbaseTask { + private final static String QUERY_SQL_TEMPLATE = "select %s K, Q, T, V, hex(K) as `hex` from %s %s"; + private static Logger LOG = LoggerFactory.getLogger(SQLNormalModeReader.class); + private final Map columnMap; + private final Map versionMap; + private final FetchVersion fetchVersion; + private Set columnNames; + private boolean noMoreData = false; + private String querySQL = null; + private Connection conn = null; + private PreparedStatement stmt = null; + private ResultSet rs = null; + private String jdbcUrl = null; + private String columnFamily = null; + private String username = null; + private String password = null; + private int fetchSize = com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_FETCH_SIZE; + private long readBatchSize = com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_READ_BATCH_SIZE; + private Configuration configuration; + private boolean hasRange = false; + private String[] savepoint = new String[3]; + // only used by unit test + protected boolean reuseConn = false; + + public SQLNormalModeReader(Configuration configuration) { + this.configuration = configuration; + this.hbaseColumnCellMap = ObHbaseReaderUtil.parseColumn(configuration.getList(Key.COLUMN, Map.class)); + if (hbaseColumnCellMap.size() == 0) { + LOG.error("no column cells specified."); + throw new RuntimeException("no column cells specified"); + } + columnFamily = ObHbaseReaderUtil.parseColumnFamily(hbaseColumnCellMap.values()); + this.columnNames = + hbaseColumnCellMap.keySet().stream().map(e -> ObHbaseReaderUtil.isRowkeyColumn(e) ? Constant.ROWKEY_FLAG : e.substring((columnFamily + ":").length())).collect(Collectors.toSet()); + + String partInfo = ""; + String partName = configuration.getString(Key.PARTITION_NAME, null); + if (partName != null) { + partInfo = "partition(" + partName + ")"; + } + + String tableName = configuration.getString(Key.TABLE, null); + String hint = configuration.getString(Key.READER_HINT, OB_READ_HINT); + this.hasRange = !StringUtils.isEmpty(configuration.getString(Key.RANGE, null)); + this.querySQL = String.format(QUERY_SQL_TEMPLATE, hint, tableName + "$" + columnFamily, partInfo); + if (hasRange) { + this.querySQL = querySQL + " where (" + configuration.getString(Key.RANGE) + ")"; + } + this.jdbcUrl = configuration.getString(Key.JDBC_URL, null); + this.username = configuration.getString(Key.USERNAME, null); + this.password = configuration.getString(Key.PASSWORD, null); + this.columnMap = Maps.newHashMap(); + this.versionMap = Maps.newHashMap(); + this.fetchVersion = FetchVersion.getByDesc(configuration.getString("version", FetchVersion.LATEST.name())); + this.timezone = configuration.getString(Key.TIMEZONE, "UTC"); + this.encoding = configuration.getString(Key.ENCODING, Constant.DEFAULT_ENCODING); + this.fetchSize = configuration.getInt(Key.FETCH_SIZE, com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_FETCH_SIZE); + this.readBatchSize = configuration.getLong(Key.READ_BATCH_SIZE, com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_READ_BATCH_SIZE); + LOG.info("read from jdbcUrl {} with fetchSize {}, readBatchSize {}", jdbcUrl, fetchSize, readBatchSize); + } + + private boolean notFinished(String currentKey) throws SQLException { + boolean updateSuccess = updateResultSet(); + if (updateSuccess) { + String newKey = rs.getString("K"); + return newKey.equals(currentKey); + } else { + noMoreData = true; + Arrays.fill(savepoint, null); + return false; + } + } + + private boolean updateResultSet() throws SQLException { + if (rs != null && rs.next()) { + return true; + } + if (savepoint[0] != null) { + int retryLimit = 10; + int retryCount = 0; + String tempQuery = querySQL + (hasRange ? " and " : " where ") + "(K,Q,T) > (unhex(?),?,?) order by K,Q,T limit " + readBatchSize; + while (retryCount < retryLimit) { + retryCount++; + try { + resetConnection(); + DBUtil.closeDBResources(rs, stmt, null); + stmt = conn.prepareStatement(tempQuery, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); + stmt.setFetchSize(fetchSize); + for (int i = 0; i < savepoint.length; i++) { + stmt.setObject(i + 1, savepoint[i]); + } + rs = stmt.executeQuery(); + if (rs.next()) { + LOG.info("execute sql: {}, savepoint:[{}]", tempQuery, Arrays.stream(savepoint).map(e -> "'" + e + "'").collect(Collectors.joining(","))); + return true; + } + // All data in this task are read + break; + } catch (Exception ex) { + LOG.error("failed to query sql, will retry {} times", retryCount, ex); + DBUtil.closeDBResources(rs, stmt, conn); + if (retryCount > retryLimit) { + LOG.error("Sql: [{}] executed failed, savepoint:[{}], reason: {}", tempQuery, Arrays.stream(savepoint).map(e -> "'" + e + "'").collect(Collectors.joining(",")), + ex.getMessage()); + throw new RuntimeException(ex); + } + } + } + } + return false; + } + + @Override + public void prepare() { + int retryLimit = 10; + int retryCount = 0; + while (true) { + retryCount++; + try { + resetConnection(); + String tempQuery = querySQL + " order by K,Q,T limit " + readBatchSize; + stmt = conn.prepareStatement(tempQuery, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); + stmt.setFetchSize(fetchSize); + LOG.info("execute sql : {}", tempQuery); + rs = stmt.executeQuery(); + if (!rs.next()) { + noMoreData = true; + } + break; + } catch (Exception e) { + LOG.error("failed to query sql, will retry {} times", retryCount, e); + DBUtil.closeDBResources(rs, stmt, conn); + if (retryCount > retryLimit) { + LOG.error("Sql: [{}] executed failed, reason: {}", querySQL, e.getMessage()); + throw new RuntimeException(e); + } + } + } + } + + @Override + public boolean fetchLine(Record record) throws Exception { + try { + if (noMoreData) { + return false; + } + String currentKey = rs.getString("K"); + savepoint[0] = rs.getString("hex"); + columnMap.put(Constant.ROWKEY_FLAG, currentKey.getBytes()); + do { + String columnName = rs.getString("Q"); + savepoint[1] = columnName; + if (!this.columnNames.contains(columnName)) { + continue; + } + Long version = rs.getLong("T"); + savepoint[2] = String.valueOf(version); + byte[] value = rs.getBytes("V"); + Predicate predicate; + switch (this.fetchVersion) { + case OLDEST: + predicate = v -> v.compareTo(versionMap.getOrDefault(columnName, Long.MIN_VALUE)) > 0; + break; + case LATEST: + predicate = v -> v.compareTo(versionMap.getOrDefault(columnName, Long.MAX_VALUE)) < 0; + break; + default: + throw DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE, "Not support version: " + this.fetchVersion); + } + + if (predicate.test(version)) { + versionMap.put(columnName, version); + columnMap.put(columnName, value); + } + } while (notFinished(currentKey)); + + for (HbaseColumnCell cell : this.hbaseColumnCellMap.values()) { + Column column = null; + if (cell.isConstant()) { + // 对常量字段的处理 + column = this.constantMap.get(cell.getColumnName()); + } else { + String columnName = ObHbaseReaderUtil.isRowkeyColumn(cell.getColumnName()) ? Constant.ROWKEY_FLAG : cell.getColumnName().substring((columnFamily + ":").length()); + byte[] value = null; + if (!columnMap.containsKey(columnName)) { + LOG.debug("{} is not contained in the record with K value={}. consider this record as null record.", columnName, currentKey); + } else { + value = columnMap.get(columnName); + } + column = ObHbaseReaderUtil.buildColumn(value, cell.getColumnType(), encoding, cell.getDateformat(), timezone); + } + record.addColumn(column); + } + } finally { + this.columnMap.clear(); + this.versionMap.clear(); + } + return true; + } + + @Override + public void close() throws IOException { + DBUtil.closeDBResources(rs, stmt, conn); + } + + private void resetConnection() throws SQLException { + if (reuseConn && conn != null && !conn.isClosed()) { + return; + } + // set ob_query_timeout and ob_trx_timeout to a large time in case timeout + int queryTimeoutSeconds = 60 * 60 * 48; + String setQueryTimeout = "set ob_query_timeout=" + (queryTimeoutSeconds * 1000 * 1000L); + String setTrxTimeout = "set ob_trx_timeout=" + ((queryTimeoutSeconds + 5) * 1000 * 1000L); + List newSessionConfig = Lists.newArrayList(setQueryTimeout, setTrxTimeout); + List sessionConfig = configuration.getList(Key.SESSION, new ArrayList<>(), String.class); + newSessionConfig.addAll(sessionConfig); + configuration.set(Key.SESSION, newSessionConfig); + conn = DBUtil.getConnection(DataBaseType.MySql, jdbcUrl, this.username, this.password); + } +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/task/ScanMultiVersionReader.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/task/ScanMultiVersionReader.java new file mode 100755 index 0000000000..872b5f5f58 --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/task/ScanMultiVersionReader.java @@ -0,0 +1,98 @@ +package com.alibaba.datax.plugin.reader.obhbasereader.task; + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.LongColumn; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.reader.obhbasereader.Constant; +import com.alibaba.datax.plugin.reader.obhbasereader.HbaseColumnCell; +import com.alibaba.datax.plugin.reader.obhbasereader.HbaseReaderErrorCode; +import com.alibaba.datax.plugin.reader.obhbasereader.enums.ColumnType; +import com.alibaba.datax.plugin.reader.obhbasereader.util.ObHbaseReaderUtil; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.util.Bytes; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.List; + +public class ScanMultiVersionReader extends AbstractScanReader { + private final static Logger LOG = LoggerFactory.getLogger(ScanMultiVersionReader.class); + private static byte[] COLON_BYTE; + private List kvList = new ArrayList<>(); + private int currentReadPosition = 0; + + // rowKey类型 + private ColumnType rowkeyReadoutType = null; + + public ScanMultiVersionReader(Configuration configuration) { + super(configuration); + HbaseColumnCell rowKey = hbaseColumnCellMap.get(Constant.ROWKEY_FLAG); + if (rowKey != null && rowKey.getColumnType() != null) { + this.rowkeyReadoutType = rowKey.getColumnType(); + } else { + this.rowkeyReadoutType = ColumnType.BYTES; + } + try { + ScanMultiVersionReader.COLON_BYTE = ":".getBytes(encoding); + } catch (UnsupportedEncodingException e) { + throw DataXException.asDataXException(HbaseReaderErrorCode.PREPAR_READ_ERROR, "Failed to get binary of column family and column name colon separator inside the system.", e); + } + } + + private void convertKVToLine(KeyValue keyValue, Record record) throws Exception { + byte[] rawRowkey = keyValue.getRow(); + long timestamp = keyValue.getTimestamp(); + byte[] cfAndQualifierName = Bytes.add(keyValue.getFamily(), ScanMultiVersionReader.COLON_BYTE, keyValue.getQualifier()); + + record.addColumn(convertBytesToAssignType(this.rowkeyReadoutType, rawRowkey)); + + record.addColumn(convertBytesToAssignType(ColumnType.STRING, cfAndQualifierName)); + + // 直接忽略了用户配置的 timestamp 的类型 + record.addColumn(new LongColumn(timestamp)); + + String cfAndQualifierNameStr = Bytes.toString(cfAndQualifierName); + HbaseColumnCell currentCell = hbaseColumnCellMap.get(cfAndQualifierNameStr); + ColumnType valueReadoutType = currentCell != null ? currentCell.getColumnType() : ColumnType.BYTES; + String dateFormat = currentCell != null ? currentCell.getDateformat() : null; + record.addColumn(convertBytesToAssignType(valueReadoutType, keyValue.getValue(), dateFormat)); + } + + private Column convertBytesToAssignType(ColumnType columnType, byte[] byteArray) throws Exception { + return convertBytesToAssignType(columnType, byteArray, null); + } + + private Column convertBytesToAssignType(ColumnType columnType, byte[] byteArray, String dateFormat) throws Exception { + return ObHbaseReaderUtil.buildColumn(byteArray, columnType, encoding, dateFormat, timezone); + } + + @Override + public boolean fetchLine(Record record) throws Exception { + Result result; + if (this.kvList.size() == this.currentReadPosition) { + result = getNextHbaseRow(); + if (result == null) { + return false; + } + this.kvList = result.list(); + if (this.kvList == null) { + return false; + } + this.currentReadPosition = 0; + } + + try { + KeyValue keyValue = this.kvList.get(this.currentReadPosition); + convertKVToLine(keyValue, record); + } finally { + this.currentReadPosition++; + } + return true; + } +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/task/ScanNormalModeReader.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/task/ScanNormalModeReader.java new file mode 100644 index 0000000000..37d173025e --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/task/ScanNormalModeReader.java @@ -0,0 +1,65 @@ +package com.alibaba.datax.plugin.reader.obhbasereader.task; + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.element.StringColumn; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.reader.obhbasereader.HbaseColumnCell; +import com.alibaba.datax.plugin.reader.obhbasereader.enums.ColumnType; +import com.alibaba.datax.plugin.reader.obhbasereader.util.ObHbaseReaderUtil; + +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.util.Bytes; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ScanNormalModeReader extends AbstractScanReader { + private static Logger LOG = LoggerFactory.getLogger(ScanNormalModeReader.class); + + public ScanNormalModeReader(Configuration configuration) { + super(configuration); + this.maxVersion = 1; + } + + @Override + public boolean fetchLine(Record record) throws Exception { + Result result = getNextHbaseRow(); + if (null == result) { + return false; + } + try { + byte[] hbaseColumnValue; + String columnName; + ColumnType columnType; + + byte[] cf; + byte[] qualifier; + + for (HbaseColumnCell cell : this.hbaseColumnCellMap.values()) { + columnType = cell.getColumnType(); + Column column = null; + if (cell.isConstant()) { + // 对常量字段的处理 + column = constantMap.get(cell.getColumnName()); + } else { + // 根据列名称获取值 + columnName = cell.getColumnName(); + if (ObHbaseReaderUtil.isRowkeyColumn(columnName)) { + hbaseColumnValue = result.getRow(); + } else { + cf = cell.getCf(); + qualifier = cell.getQualifier(); + hbaseColumnValue = result.getValue(cf, qualifier); + } + column = ObHbaseReaderUtil.buildColumn(hbaseColumnValue, columnType, super.encoding, cell.getDateformat(), timezone); + } + record.addColumn(column); + } + } catch (Exception e) { + // 注意,这里catch的异常,期望是byte数组转换失败的情况。而实际上,string的byte数组,转成整数类型是不容易报错的。但是转成double类型容易报错。 + record.setColumn(0, new StringColumn(Bytes.toStringBinary(result.getRow()))); + throw e; + } + return true; + } +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/HbaseSplitUtil.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/HbaseSplitUtil.java new file mode 100755 index 0000000000..2baa227030 --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/HbaseSplitUtil.java @@ -0,0 +1,154 @@ +package com.alibaba.datax.plugin.reader.obhbasereader.util; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.reader.obhbasereader.HbaseReaderErrorCode; +import com.alibaba.datax.plugin.reader.obhbasereader.Key; + +import com.google.common.collect.Lists; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Pair; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; + +public final class HbaseSplitUtil { + private final static Logger LOG = LoggerFactory.getLogger(HbaseSplitUtil.class); + + public static List split(Configuration configuration) { + final List ranges = configuration.getListConfiguration(Key.RANGE); + if (CollectionUtils.isEmpty(ranges)) { + return Lists.newArrayList(configuration); + } + + //TODO(yuez) 后续hbase api具备查询region的功能后,这里需要添加查询table region的逻辑,并且取table region和用户指定的range的交集 + List sliceConfs = new ArrayList<>(ranges.size()); + for (Configuration range : ranges) { + byte[] startRowKey = convertUserRowkey(range, true); + byte[] endRowKey = convertUserRowkey(range, false); + if (startRowKey.length != 0 && endRowKey.length != 0 && Bytes.compareTo(startRowKey, endRowKey) > 0) { + throw DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE, "The startRowkey in obhbasereader must not be greater than the endRowkey."); + } + Configuration sliceConf = configuration.clone(); + sliceConf.remove(Key.RANGE); + String startKeyStr = Bytes.toStringBinary(startRowKey); + String endRowKeyStr = Bytes.toStringBinary(endRowKey); + sliceConf.set(Key.START_ROWKEY, startKeyStr); + sliceConf.set(Key.END_ROWKEY, endRowKeyStr); + sliceConfs.add(sliceConf); + } + return sliceConfs; + } + + public static byte[] convertUserRowkey(Configuration configuration, boolean isStart) { + String keyName = isStart ? Key.START_ROWKEY : Key.END_ROWKEY; + String startRowkey = configuration.getString(keyName); + if (StringUtils.isBlank(startRowkey)) { + return HConstants.EMPTY_BYTE_ARRAY; + } else { + boolean isBinaryRowkey = configuration.getBool(Key.IS_BINARY_ROWKEY, false); + return stringToBytes(startRowkey, isBinaryRowkey); + } + } + + private static byte[] stringToBytes(String rowkey, boolean isBinaryRowkey) { + if (isBinaryRowkey) { + return Bytes.toBytesBinary(rowkey); + } else { + return Bytes.toBytes(rowkey); + } + } + + /** + * 后续hbase api具备查询region的功能后才用得到此方法 + * + * @param config + * @param startRowkeyByte + * @param endRowkeyByte + * @param regionRanges + * @return + */ + private static List doSplit(Configuration config, byte[] startRowkeyByte, byte[] endRowkeyByte, Pair regionRanges) { + + List configurations = new ArrayList(); + + for (int i = 0; i < regionRanges.getFirst().length; i++) { + + byte[] regionStartKey = regionRanges.getFirst()[i]; + byte[] regionEndKey = regionRanges.getSecond()[i]; + + // 当前的region为最后一个region + // 如果最后一个region的start Key大于用户指定的userEndKey,则最后一个region,应该不包含在内 + // 注意如果用户指定userEndKey为"",则此判断应该不成立。userEndKey为""表示取得最大的region + if (Bytes.compareTo(regionEndKey, HConstants.EMPTY_BYTE_ARRAY) == 0 && (endRowkeyByte.length != 0 && (Bytes.compareTo(regionStartKey, endRowkeyByte) > 0))) { + continue; + } + + // 如果当前的region不是最后一个region, + // 用户配置的userStartKey大于等于region的endkey,则这个region不应该含在内 + if ((Bytes.compareTo(regionEndKey, HConstants.EMPTY_BYTE_ARRAY) != 0) && (Bytes.compareTo(startRowkeyByte, regionEndKey) >= 0)) { + continue; + } + + // 如果用户配置的userEndKey小于等于 region的startkey,则这个region不应该含在内 + // 注意如果用户指定的userEndKey为"",则次判断应该不成立。userEndKey为""表示取得最大的region + if (endRowkeyByte.length != 0 && (Bytes.compareTo(endRowkeyByte, regionStartKey) <= 0)) { + continue; + } + + String thisStartKey = getStartKey(startRowkeyByte, regionStartKey); + String thisEndKey = getEndKey(endRowkeyByte, regionEndKey); + Configuration p = config.clone(); + p.set(Key.START_ROWKEY, thisStartKey); + p.set(Key.END_ROWKEY, thisEndKey); + LOG.debug("startRowkey:[{}], endRowkey:[{}] .", thisStartKey, thisEndKey); + configurations.add(p); + } + + return configurations; + } + + private static String getEndKey(byte[] endRowkeyByte, byte[] regionEndKey) { + if (endRowkeyByte == null) { // 由于之前处理过,所以传入的userStartKey不可能为null + throw new IllegalArgumentException("userEndKey should not be null!"); + } + + byte[] tempEndRowkeyByte; + + if (endRowkeyByte.length == 0) { + tempEndRowkeyByte = regionEndKey; + } else if (Bytes.compareTo(regionEndKey, HConstants.EMPTY_BYTE_ARRAY) == 0) { + // 为最后一个region + tempEndRowkeyByte = endRowkeyByte; + } else { + if (Bytes.compareTo(endRowkeyByte, regionEndKey) > 0) { + tempEndRowkeyByte = regionEndKey; + } else { + tempEndRowkeyByte = endRowkeyByte; + } + } + + return Bytes.toStringBinary(tempEndRowkeyByte); + } + + private static String getStartKey(byte[] startRowkeyByte, byte[] regionStarKey) { + if (startRowkeyByte == null) { // 由于之前处理过,所以传入的userStartKey不可能为null + throw new IllegalArgumentException("userStartKey should not be null!"); + } + + byte[] tempStartRowkeyByte; + + if (Bytes.compareTo(startRowkeyByte, regionStarKey) < 0) { + tempStartRowkeyByte = regionStarKey; + } else { + tempStartRowkeyByte = startRowkeyByte; + } + + return Bytes.toStringBinary(tempStartRowkeyByte); + } +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/LocalStrings.properties b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/LocalStrings.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/LocalStrings_en_US.properties b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/LocalStrings_en_US.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/LocalStrings_ja_JP.properties b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/LocalStrings_ja_JP.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/LocalStrings_zh_CN.properties b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/LocalStrings_zh_CN.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/LocalStrings_zh_HK.properties b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/LocalStrings_zh_HK.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/LocalStrings_zh_TW.properties b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/LocalStrings_zh_TW.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/ObHbaseReaderUtil.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/ObHbaseReaderUtil.java new file mode 100755 index 0000000000..4177f1f5d4 --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/ObHbaseReaderUtil.java @@ -0,0 +1,293 @@ +package com.alibaba.datax.plugin.reader.obhbasereader.util; + +import static com.alibaba.datax.plugin.reader.obhbasereader.enums.ModeType.MultiVersionFixedColumn; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_DATABASE; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_FULL_USER_NAME; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_ODP_ADDR; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_ODP_MODE; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_ODP_PORT; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_PARAM_URL; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_PASSWORD; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_SYS_PASSWORD; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_SYS_USER_NAME; + +import com.alibaba.datax.common.element.BoolColumn; +import com.alibaba.datax.common.element.BytesColumn; +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.DateColumn; +import com.alibaba.datax.common.element.DoubleColumn; +import com.alibaba.datax.common.element.LongColumn; +import com.alibaba.datax.common.element.StringColumn; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.reader.obhbasereader.Constant; +import com.alibaba.datax.plugin.reader.obhbasereader.HTableManager; +import com.alibaba.datax.plugin.reader.obhbasereader.HbaseColumnCell; +import com.alibaba.datax.plugin.reader.obhbasereader.HbaseReaderErrorCode; +import com.alibaba.datax.plugin.reader.obhbasereader.Key; +import com.alibaba.datax.plugin.reader.obhbasereader.enums.ColumnType; +import com.alibaba.datax.plugin.reader.obhbasereader.enums.ModeType; +import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson.TypeReference; + +import com.alipay.oceanbase.hbase.OHTable; +import org.apache.commons.collections.MapUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.Validate; +import org.apache.commons.lang3.time.DateUtils; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.util.Bytes; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.nio.charset.Charset; +import java.text.SimpleDateFormat; +import java.util.Collection; +import java.util.Date; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public final class ObHbaseReaderUtil { + private static Logger LOG = LoggerFactory.getLogger(ObHbaseReaderUtil.class); + + public static void doPretreatment(Configuration originalConfig) { + String mode = ObHbaseReaderUtil.dealMode(originalConfig); + originalConfig.set(Key.MODE, mode); + + String encoding = originalConfig.getString(Key.ENCODING, Constant.DEFAULT_ENCODING); + if (!Charset.isSupported(encoding)) { + throw DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE, String.format("The encoding you configured is not supported by hbasereader:[%s]", encoding)); + } + originalConfig.set(Key.ENCODING, encoding); + + // 此处增强一个检查:isBinaryRowkey 配置不能出现在与 hbaseConfig 等配置平级地位 + Boolean isBinaryRowkey = originalConfig.getBool(Key.IS_BINARY_ROWKEY); + if (isBinaryRowkey != null) { + throw DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE, String.format("%s cannot be configured here. It should be configured in range.", Key.IS_BINARY_ROWKEY)); + } + } + + /** + * 对模式以及与模式进行配对的配置进行检查 + */ + private static String dealMode(Configuration originalConfig) { + String mode = originalConfig.getString(Key.MODE); + ModeType modeType = ModeType.getByTypeName(mode); + List column = originalConfig.getList(Key.COLUMN, Map.class); + if (column == null || column.isEmpty()) { + throw DataXException.asDataXException(HbaseReaderErrorCode.REQUIRED_VALUE, + "You have configured the normal mode to read the data in HBase, so you must configure the column in the form of:column:[{\"name\": \"cf0:column0\",\"type\": \"string\"}," + + "{\"name\": \"cf1:column1\",\"type\": \"long\"}]"); + } + + // 通过 parse 进行 column 格式的进一步检查 + ObHbaseReaderUtil.parseColumn(column); + if (MultiVersionFixedColumn.equals(modeType)) { + Integer maxVersion = originalConfig.getInt(Key.MAX_VERSION); + Validate.notNull(maxVersion, String.format("You have configured thw mode %s to read the data in HBase, so you must configure: maxVersion", mode)); + + boolean isMaxVersionValid = maxVersion == -1 || maxVersion > 1; + Validate.isTrue(isMaxVersionValid, String.format( + "You have configured the mode %s to read the data in HBase, but the configured maxVersion value is wrong. maxVersion specifies that: - 1 is to read all versions, and cannot be " + + "configured as 0 or 1 (because 0 or 1, we think the user wants to read the data in normal mode instead of reading in mode %s, the difference is big). If it is greater " + + "than" + + " 1, it means to read the latest corresponding number of versions.", + mode, mode)); + } + return mode; + } + + /** + * 注意:convertUserStartRowkey 和 convertInnerStartRowkey,前者会受到 isBinaryRowkey 的影响,只用于第一次对用户配置的 String 类型的 rowkey 转为二进制时使用。而后者约定:切分时得到的二进制的 rowkey 回填到配置中时采用 + */ + public static byte[] convertInnerStartRowkey(Configuration configuration) { + String startRowkey = configuration.getString(Key.START_ROWKEY); + if (StringUtils.isBlank(startRowkey)) { + return HConstants.EMPTY_BYTE_ARRAY; + } + + return Bytes.toBytesBinary(startRowkey); + } + + public static byte[] convertInnerEndRowkey(Configuration configuration) { + String endRowkey = configuration.getString(Key.END_ROWKEY); + if (StringUtils.isBlank(endRowkey)) { + return HConstants.EMPTY_BYTE_ARRAY; + } + + return Bytes.toBytesBinary(endRowkey); + } + + private static void setObHBaseConfig(com.alibaba.datax.common.util.Configuration confFile, org.apache.hadoop.conf.Configuration oHbaseConf) { + + boolean odpMode = confFile.getBool(Key.USE_ODP_MODE); + String username = confFile.getString(Key.USERNAME); + String password = confFile.getString(Key.PASSWORD); + String dbName = confFile.getString(Key.DB_NAME); + +// oHbaseConf.set(RS_LIST_ACQUIRE_CONNECT_TIMEOUT.getKey(), "500"); +// oHbaseConf.set(RS_LIST_ACQUIRE_READ_TIMEOUT.getKey(), "5000"); + oHbaseConf.set(HBASE_OCEANBASE_FULL_USER_NAME, username); + oHbaseConf.set(HBASE_OCEANBASE_PASSWORD, password); +// oHbaseConf.set(HBASE_, META_SCANNER_CACHING); + if (odpMode) { + oHbaseConf.setBoolean(HBASE_OCEANBASE_ODP_MODE, true); + oHbaseConf.set(HBASE_OCEANBASE_DATABASE, dbName); + oHbaseConf.set(HBASE_OCEANBASE_ODP_ADDR, confFile.getString(Key.ODP_HOST)); + oHbaseConf.setInt(HBASE_OCEANBASE_ODP_PORT, confFile.getInt(Key.ODP_PORT)); + } else { + String clusterName = null; + final Pattern pattern = Pattern.compile("([\\w]+)@([\\w]+)#([\\w]+)"); + Matcher matcher = pattern.matcher(username); + if (matcher.find()) { + clusterName = matcher.group(3); + } else { + throw new RuntimeException("user name is not in the correct format: user@tenant#cluster"); + } + String configUrl = confFile.getString(Key.CONFIG_URL); + if (!configUrl.contains("ObRegion")) { + if (configUrl.contains("?")) { + configUrl += "&ObRegion=" + clusterName; + } else { + configUrl += "?ObRegion=" + clusterName; + } + } + + if (!configUrl.contains("database")) { + configUrl += "&database=" + dbName; + } + oHbaseConf.set(HBASE_OCEANBASE_PARAM_URL, configUrl); + oHbaseConf.set(HBASE_OCEANBASE_SYS_USER_NAME, confFile.getString(Key.OB_SYS_USERNAME)); + oHbaseConf.set(HBASE_OCEANBASE_SYS_PASSWORD, confFile.getString(Key.OB_SYS_PASSWORD)); + } + + String hbaseConf = confFile.getString(Key.HBASE_CONFIG); + Map map = JSON.parseObject(hbaseConf, new TypeReference>() { + }); + if (MapUtils.isNotEmpty(map)) { + for (Map.Entry entry : map.entrySet()) { + oHbaseConf.set(entry.getKey(), entry.getValue()); + } + } + } + + /** + * 每次都获取一个新的HTable 注意:HTable 本身是线程不安全的 + */ + public static OHTable initOHtable(com.alibaba.datax.common.util.Configuration configuration) { + String tableName = configuration.getString(Key.TABLE); + try { + org.apache.hadoop.conf.Configuration oHbaseConf = new org.apache.hadoop.conf.Configuration(); + setObHBaseConfig(configuration, oHbaseConf); + return HTableManager.createHTable(oHbaseConf, tableName); + } catch (Exception e) { + LOG.error("init ohTable error, reason: {}", e.getMessage(), e); + throw DataXException.asDataXException(HbaseReaderErrorCode.INIT_TABLE_ERROR, e); + } + } + + public static boolean isRowkeyColumn(String columnName) { + return Constant.ROWKEY_FLAG.equalsIgnoreCase(columnName); + } + + public static String parseColumnFamily(Collection hbaseColumnCells) { + for (HbaseColumnCell columnCell : hbaseColumnCells) { + if (ObHbaseReaderUtil.isRowkeyColumn(columnCell.getColumnName())) { + continue; + } + if (columnCell.getColumnName() == null || columnCell.getColumnName().split(":").length != 2) { + LOG.error("column cell format is unknown: {}", columnCell); + throw new RuntimeException("Column cell format is unknown: " + columnCell); + } + return columnCell.getColumnName().split(":")[0]; + } + throw new RuntimeException("parse column family failed."); + } + + /** + * 用于解析列配置 + */ + public static LinkedHashMap parseColumn(List column) { + return parseColumn(column, null, Constant.DEFAULT_ENCODING, Constant.DEFAULT_TIMEZONE); + } + + public static LinkedHashMap parseColumn(List column, Map constantMap, String encoding, String timezone) { + LinkedHashMap hbaseColumnCells = new LinkedHashMap<>(column.size()); + boolean cacheConstantValue = constantMap != null; + HbaseColumnCell oneColumnCell; + try { + for (Map aColumn : column) { + ColumnType type = ColumnType.getByTypeName(aColumn.get("type")); + boolean isRowKey = isRowkeyColumn(aColumn.get("name")); + String columnName = isRowKey ? Constant.ROWKEY_FLAG : aColumn.get("name"); + + String columnValue = aColumn.get("value"); + String dateFormat = aColumn.getOrDefault("format", Constant.DEFAULT_DATE_FORMAT); + Validate.isTrue(StringUtils.isNotBlank(columnName) || StringUtils.isNotBlank(columnValue), + "It is either a combination of type + name + format or a combination of type + value + format. Your configuration is neither of the two. Please check and modify it."); + if (type == ColumnType.DATE) { + if (StringUtils.isBlank(dateFormat)) { + LOG.warn("date format for {} is empty, use default date format 'yyyy-MM-dd HH:mm:ss' instead.", columnName); + } + oneColumnCell = new HbaseColumnCell.Builder(type).columnName(columnName).columnValue(columnValue).dateformat(dateFormat).build(); + } else { + oneColumnCell = new HbaseColumnCell.Builder(type).columnName(columnName).columnValue(columnValue).build(); + } + hbaseColumnCells.put(columnName, oneColumnCell); + if (cacheConstantValue && oneColumnCell.isConstant()) { + constantMap.put(columnName, buildColumn(columnValue, type, encoding, dateFormat, timezone)); + } + } + return hbaseColumnCells; + } catch (Exception e) { + LOG.error("parse column failed, reason:{}", e.getMessage(), e); + throw DataXException.asDataXException(HbaseReaderErrorCode.PARSE_COLUMN_ERROR, e.getMessage()); + } + } + + public static Column buildColumn(String columnValue, ColumnType columnType, String encoding, String dateformat, String timezone) throws Exception { + return buildColumn(columnValue.getBytes(encoding), columnType, encoding, dateformat, timezone); + } + + public static Column buildColumn(byte[] columnValue, ColumnType columnType, String encoding, String dateformat, String timezone) throws Exception { + switch (columnType) { + case BOOLEAN: + return new BoolColumn(columnValue == null ? null : Bytes.toBoolean(columnValue)); + case SHORT: + return new LongColumn(columnValue == null ? null : String.valueOf(Bytes.toShort(columnValue))); + case INT: + return new LongColumn(columnValue == null ? null : Bytes.toInt(columnValue)); + case LONG: + return new LongColumn(columnValue == null ? null : Bytes.toLong(columnValue)); + case BYTES: + return new BytesColumn(columnValue == null ? null : columnValue); + case FLOAT: + return new DoubleColumn(columnValue == null ? null : Bytes.toFloat(columnValue)); + case DOUBLE: + return new DoubleColumn(columnValue == null ? null : Bytes.toDouble(columnValue)); + case STRING: + return new StringColumn(columnValue == null ? null : new String(columnValue, encoding)); + case BINARY_STRING: + return new StringColumn(columnValue == null ? null : Bytes.toStringBinary(columnValue)); + case DATE: + String dateValue = Bytes.toStringBinary(columnValue); + String timestamp = null; + try { + long milliSec = Long.parseLong(dateValue); + Date date = new java.util.Date(milliSec); + SimpleDateFormat sdf = new java.text.SimpleDateFormat(dateformat); + sdf.setTimeZone(java.util.TimeZone.getTimeZone(timezone)); + timestamp = sdf.format(date); + } catch (Exception e) { + // this is already formatted timestamp + timestamp = dateValue; + } + return columnValue == null ? null : new DateColumn(DateUtils.parseDate(timestamp, dateformat)); + default: + throw DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE, "obHbasereader 不支持您配置的列类型:" + columnType); + } + } +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/SqlReaderSplitUtil.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/SqlReaderSplitUtil.java new file mode 100644 index 0000000000..bd589500a4 --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/SqlReaderSplitUtil.java @@ -0,0 +1,190 @@ +package com.alibaba.datax.plugin.reader.obhbasereader.util; + +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.reader.Constant; +import com.alibaba.datax.plugin.rdbms.util.DBUtil; +import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; +import com.alibaba.datax.plugin.rdbms.util.SplitedSlice; +import com.alibaba.datax.plugin.reader.obhbasereader.Key; +import com.alibaba.datax.plugin.reader.oceanbasev10reader.util.ExecutorTemplate; +import com.alibaba.datax.plugin.reader.oceanbasev10reader.util.ObReaderUtils; +import com.alibaba.datax.plugin.reader.oceanbasev10reader.util.PartInfo; +import com.alibaba.datax.plugin.reader.oceanbasev10reader.util.PartitionSplitUtil; +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.commons.collections.CollectionUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class SqlReaderSplitUtil { + public static final String SAMPLE_SQL_TEMPLATE = "SELECT `hex` FROM (SELECT `hex`,K , bucket, ROW_NUMBER() OVER (PARTITION BY bucket ORDER BY K) rn FROM(SELECT %s `hex`, K ,NTILE(%s) OVER " + + "(ORDER BY K ) bucket FROM (SELECT hex(K) as `hex`, K FROM %s SAMPLE BLOCK(%s)) a) b) c WHERE rn = 1 GROUP BY K ORDER BY K"; + public static final String MIDDLE_RANGE_TEMPLATE = "((K) > (unhex('%s'))) AND ((K) <= (unhex('%s')))"; + public static final String MIN_MAX_RANGE_TEMPLATE = "((K)<= (unhex('%s'))) or ((K) > (unhex('%s')))"; + private static final Logger LOG = LoggerFactory.getLogger(SqlReaderSplitUtil.class); + + public static List splitSingleTable(Configuration configuration, String tableName, String columnFamily, int eachTableShouldSplittedNumber, boolean readByPartition) { + List partitionList = Lists.newArrayList(); + String tableNameWithCf = tableName + "$" + columnFamily; + PartInfo partInfo = PartitionSplitUtil.getObMySQLPartInfoBySQL(configuration, tableNameWithCf); + if (partInfo.isPartitionTable()) { + partitionList.addAll(partInfo.getPartList()); + } + // read all partitions and split job only by partition + if (readByPartition) { + LOG.info("table: [{}] will read only by partition", tableNameWithCf); + return splitSingleTableByPartition(configuration, partitionList); + } + + if (eachTableShouldSplittedNumber <= 1) { + LOG.info("total enable splitted number of table: [{}] is {}, no need to split", tableNameWithCf, eachTableShouldSplittedNumber); + return Lists.newArrayList(configuration); + } + + // If user specified some partitions to be read, + List userSetPartitions = configuration.getList(Key.PARTITION_NAME, String.class); + if (CollectionUtils.isNotEmpty(userSetPartitions)) { + Set partSet = new HashSet<>(partitionList); + // If partition name does not exist in the table, throw exception directly. Case is sensitive. + userSetPartitions.forEach(e -> Preconditions.checkArgument(partSet.contains(e), "partition %s does not exist in table: %s", e, tableNameWithCf)); + partitionList.clear(); + partitionList.addAll(userSetPartitions); + } + + if (partitionList.isEmpty()) { + LOG.info("table: [{}] is not partitioned, just split table by rowKey.", tableNameWithCf); + List splitConfs = splitSingleTableByRowKey(configuration, tableNameWithCf, eachTableShouldSplittedNumber); + LOG.info("total split count of non-partitioned table :[{}] is {}", tableNameWithCf, splitConfs.size()); + return splitConfs; + } else { + ExecutorTemplate> template = new ExecutorTemplate<>("split-rows-by-rowkey-" + tableNameWithCf + "-", eachTableShouldSplittedNumber); + int splitNumPerPartition = (int) Math.ceil(1.0d * eachTableShouldSplittedNumber / partitionList.size()); + LOG.info("table: [{}] is partitioned, split table by rowKey in parallel. splitNumPerPartition is {}", tableNameWithCf, splitNumPerPartition); + for (String partName : partitionList) { + try { + template.submit(() -> { + Configuration tempConf = configuration.clone(); + tempConf.set(Key.PARTITION_NAME, partName); + return splitSingleTableByRowKey(tempConf, tableNameWithCf, splitNumPerPartition); + }); + } catch (Throwable th) { + LOG.error("submit split task of table: [{}-{}] failed, reason: {}", tableNameWithCf, partName, th.getMessage(), th); + } + } + List splitConfs = template.waitForResult().stream().flatMap(Collection::stream).collect(Collectors.toList()); + LOG.info("total split count of partitioned table :[{}] is {}", tableNameWithCf, splitConfs.size()); + return splitConfs; + } + } + + private static List splitSingleTableByPartition(Configuration configuration, List partList) { + if (partList == null || partList.isEmpty()) { + return Lists.newArrayList(configuration); + } + List confList = new ArrayList<>(); + for (String partName : partList) { + LOG.info("read sub task: reading from partition " + partName); + Configuration conf = configuration.clone(); + conf.set(Key.PARTITION_NAME, partName); + confList.add(conf); + } + return confList; + } + + /** + * @param configuration + * @param tableNameWithCf + * @param eachTableShouldSplittedNumber + * @return + */ + public static List splitSingleTableByRowKey(Configuration configuration, String tableNameWithCf, int eachTableShouldSplittedNumber) { + String jdbcURL = configuration.getString(Key.JDBC_URL); + String username = configuration.getString(Key.USERNAME); + String password = configuration.getString(Key.PASSWORD); + String hint = configuration.getString(Key.READER_HINT, com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_READ_HINT); + String partInfo = ""; + String partName = configuration.getString(Key.PARTITION_NAME, null); + if (partName != null) { + partInfo = " partition(" + partName + ")"; + } + tableNameWithCf += partInfo; + int fetchSize = configuration.getInt(Constant.FETCH_SIZE, com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_FETCH_SIZE); + Double percentage = configuration.getDouble(Key.SAMPLE_PERCENTAGE, 0.1); + List slices = new ArrayList<>(); + List pluginParams = new ArrayList<>(); + // set ob_query_timeout and ob_trx_timeout to a large time in case timeout + int queryTimeoutSeconds = 60 * 60 * 48; + try (Connection conn = DBUtil.getConnection(DataBaseType.MySql, jdbcURL, username, password)) { + String setQueryTimeout = "set ob_query_timeout=" + (queryTimeoutSeconds * 1000 * 1000L); + String setTrxTimeout = "set ob_trx_timeout=" + ((queryTimeoutSeconds + 5) * 1000 * 1000L); + try (Statement stmt = conn.createStatement()) { + stmt.execute(setQueryTimeout); + stmt.execute(setTrxTimeout); + } catch (Exception e) { + LOG.warn("set ob_query_timeout and set ob_trx_timeout failed. reason: {}", e.getMessage(), e); + } + slices = getSplitSqlBySample(conn, tableNameWithCf, fetchSize, percentage, eachTableShouldSplittedNumber, hint); + } catch (Throwable e) { + LOG.warn("query rowkey range failed of table: {}. reason: {}. the table will not be splitted.", tableNameWithCf, e.getMessage(), e); + } + + if (!slices.isEmpty()) { + for (SplitedSlice slice : slices) { + Configuration tempConfig = configuration.clone(); + tempConfig.set(Key.RANGE, slice.getRange()); + pluginParams.add(tempConfig); + } + } else { + Configuration tempConfig = configuration.clone(); + pluginParams.add(tempConfig); + } + return pluginParams; + } + + /** + * 按照采样方法切分,不能直接顺序切分否则可能导致原本属于一行的数据被切分为两行 + * + * @param conn + * @param tableName + * @param fetchSize + * @param percentage + * @param adviceNum + * @param hint + * @return List + * @throws SQLException + */ + private static List getSplitSqlBySample(Connection conn, String tableName, int fetchSize, double percentage, int adviceNum, String hint) throws SQLException { + String splitSql = String.format(SAMPLE_SQL_TEMPLATE, hint, adviceNum, tableName, percentage); + LOG.info("split pk [sql={}] is running... ", splitSql); + List boundList = new ArrayList<>(); + try (ResultSet rs = DBUtil.query(conn, splitSql, fetchSize)) { + while (rs.next()) { + boundList.add(rs.getString(1)); + } + } + if (boundList.size() == 0) { + return new ArrayList<>(); + } + List rangeSql = new ArrayList<>(); + for (int i = 0; i < boundList.size() - 1; i++) { + String range = String.format(MIDDLE_RANGE_TEMPLATE, boundList.get(i), boundList.get(i + 1)); + SplitedSlice slice = new SplitedSlice(boundList.get(i), boundList.get(i + 1), range); + rangeSql.add(slice); + } + String range = String.format(MIN_MAX_RANGE_TEMPLATE, boundList.get(0), boundList.get(boundList.size() - 1)); + SplitedSlice slice = new SplitedSlice(null, null, range); + rangeSql.add(slice); + return rangeSql; + } +} diff --git a/obhbasereader/src/main/resources/plugin.json b/obhbasereader/src/main/resources/plugin.json new file mode 100755 index 0000000000..36d52d69ad --- /dev/null +++ b/obhbasereader/src/main/resources/plugin.json @@ -0,0 +1,6 @@ +{ + "name": "obhbasereader", + "class": "com.alibaba.datax.plugin.reader.obhbasereader.ObHbaseReader", + "description": "useScene: prod. mechanism: Scan to read data.", + "developer": "alibaba" +} diff --git a/obhbasereader/src/main/resources/plugin_job_template.json b/obhbasereader/src/main/resources/plugin_job_template.json new file mode 100644 index 0000000000..e8adb94510 --- /dev/null +++ b/obhbasereader/src/main/resources/plugin_job_template.json @@ -0,0 +1,15 @@ +{ + "name": "obhbasereader", + "parameter": { + "hbaseConfig": {}, + "table": "", + "encoding": "", + "mode": "", + "column": [], + "range": { + "startRowkey": "", + "endRowkey": "" + }, + "isBinaryRowkey": true + } +} \ No newline at end of file diff --git a/obhbasewriter/doc/obhbasewriter.md b/obhbasewriter/doc/obhbasewriter.md new file mode 100644 index 0000000000..8c2a6749cb --- /dev/null +++ b/obhbasewriter/doc/obhbasewriter.md @@ -0,0 +1,209 @@ +OceanBase的table api为应用提供了ObHBase的访问接口,因此,OceanBase table api的reader与HBase writer的结构和配置方法类似。 +1 快速介绍 +obhbaseWriter 插件实现了从向ObHbase中写取数据。在底层实现上,obhbaseWriter 通过 HBase 的 Java 客户端连接远程 HBase 服务,并通过 put 方式写入obHbase。 +1.1支持功能 +1、目前obhbasewriter支持的obHbase版本为OceanBase3.x以及4.x版本。 +2、目前obhbasewriter支持源端多个字段拼接作为ObHbase 表的 rowkey,具体配置参考:rowkeyColumn配置; +3、写入obhbase的时间戳(版本)支持:用当前时间作为版本,指定源端列作为版本,指定一个时间 三种方式作为版本; +#### 脚本配置 +```json +{ + "job": { + "setting": { + "speed": { + "channel": 5 + } + }, + "content": [ + { + "reader": { + "name": "txtfilereader", + "parameter": { + "path": "/normal.txt", + "charset": "UTF-8", + "column": [ + { + "index": 0, + "type": "String" + }, + { + "index": 1, + "type": "string" + }, + { + "index": 2, + "type": "string" + }, + { + "index": 3, + "type": "string" + }, + { + "index": 4, + "type": "string" + }, + { + "index": 5, + "type": "string" + }, + { + "index": 6, + "type": "string" + } + + ], + "fieldDelimiter": "," + } + }, + "writer": { + "name": "obhbasewriter", + "parameter": { + "username": "username", + "password": "password", + "writerThreadCount": "20", + "writeBufferHighMark": "2147483647", + "rpcExecuteTimeout": "30000", + "useOdpMode": "false", + "obSysUser": "root", + "obSysPassword": "", + "column": [ + { + "index": 0, + "name": "family1:c1", + "type": "string" + }, + { + "index": 1, + "name": "family1:c2", + "type": "string" + }, + { + "index": 2, + "name": "family1:c3", + "type": "string" + }, + { + "index": 3, + "name": "family1:c4", + "type": "string" + }, + { + "index": 4, + "name": "family1:c5", + "type": "string" + }, + { + "index": 5, + "name": "family1:c6", + "type": "string" + }, + { + "index": 6, + "name": "family1:c7", + "type": "string" + } + ], + "mode": "normal", + "rowkeyColumn": [ + { + "index": 0, + "type": "string" + }, + { + "index": 3, + "type": "string" + }, + { + "index": 2, + "type": "string" + }, + { + "index": 1, + "type": "string" + } + ], + "table": "htable3", + "batchSize": "200", + "dbName": "database", + "jdbcUrl": "jdbc:mysql://ip:port/database?" + } + } + } + ] + } +} +``` +##### 参数解释 + +- **connection** + +公有云和私有云需要配置的信息不同,具体如下: +公有云: + +- 数据库用户名;(在外层统一配置) +- 用户密码;(在外层统一配置) +- proxy的jdbc地址 +- 数据库名称; + +私有云: + +- 数据库用户名;(在外层统一配置) +- 用户密码;(在外层统一配置) +- proxy的jdbc地址 +- obSysUser:sys租户的用户名; +- obSysPass:sys租户的密码; +- configUrl; + - 描述:可以通过show parameters like 'obConfigUrl' 获得。 + - 必须:是 + - 默认值:无 +- **jdbcUrl** + - 描述:连接ob使用的jdbc url,支持如下两种格式: + - jdbc:mysql://obproxyIp:obproxyPort/db + - 此格式下username需要写成三段式格式 + - ||_dsc_ob10_dsc_||集群名:租户名||_dsc_ob10_dsc_||jdbc:mysql://obproxyIp:obproxyPort/db + - 此格式下username仅填写用户名本身,无需三段式写法 + - 必选:是 + - 默认值:无 +- **table** + - 描述:所选取的需要同步的表。无需增加列族信息。 + - 必选:是 + - 默认值:无 +- **username** + - 描述:访问OceanBase的用户名 + - 必选:是 + - 默认值:无 +- **useOdpMode** + - 描述:是否通过proxy连接。无法提供sys租户帐密时需要设置为true + - 必须:否 + - 默认值:false +- **column** + - 描述:要写入的hbase字段。index:指定该列对应reader端column的索引,从0开始;name:指定hbase表中的列,必须为 列族:列名 的格式;type:指定写入数据类型,用于转换HBase byte[]。配置格式如下: +```json +"column": [ { "index":1, "name": "cf1:q1", "type": "string" }, { "index":2, "name": "cf1:q2", "type": "string" } ] +``` + +- 必选:是 + - 默认值:无 +- **rowkeyColumn** + - 描述:要写入的ObHbase的rowkey列。index:指定该列对应reader端column的索引,从0开始,若为常量index为-1;type:指定写入数据类型,用于转换HBase byte[];value:配置常量,常作为多个字段的拼接符。obhbasewriter会将rowkeyColumn中所有列按照配置顺序进行拼接作为写入hbase的rowkey,不能全为常量。配置格式如下: +```json +"rowkeyColumn": [ { "index":0, "type":"string" }, { "index":-1, "type":"string", "value":"_" } ] +``` + +- 必选:是 + - 默认值:无 +- **versionColumn** + - 描述:指定写入obhbase的时间戳。支持:当前时间、指定时间列,指定时间,三者选一。若不配置表示用当前时间。index:指定对应reader端column的索引,从0开始,需保证能转换为long,若是Date类型,会尝试用yyyy-MM-dd HH:mm:ss和yyyy-MM-dd HH:mm:ss SSS去解析;若为指定时间index为-1;value:指定时间的值,long值。配置格式如下: +```json +"versionColumn":{ "index":1 } +``` +或者 +```json +"versionColumn":{ "index":-1, "value":123456789 } +``` + +- 必选:否 +- 默认值:无 + + + diff --git a/obhbasewriter/pom.xml b/obhbasewriter/pom.xml new file mode 100644 index 0000000000..af06aa26f9 --- /dev/null +++ b/obhbasewriter/pom.xml @@ -0,0 +1,185 @@ + + + datax-all + com.alibaba.datax + 0.0.1-SNAPSHOT + + 4.0.0 + + obhbasewriter + + com.alibaba.datax + 0.0.1-SNAPSHOT + + + + com.alibaba.datax + datax-common + ${datax-project-version} + + + slf4j-log4j12 + org.slf4j + + + + + com.alibaba.datax + plugin-rdbms-util + ${datax-project-version} + + + guava + com.google.guava + + + + + org.slf4j + slf4j-api + + + ch.qos.logback + logback-classic + + + com.alibaba.datax + simulator + ${datax-project-version} + test + + + org.springframework + spring-test + 4.0.4.RELEASE + test + + + com.taobao.tddl + tddl-client + + + com.google.guava + guava + + + com.taobao.diamond + diamond-client + + + + + + com.google.guava + guava + 33.1.0-jre + + + + com.alipay.oceanbase + oceanbase-connector-java + 3.2.0 + + + + log4j + log4j + 1.2.16 + + + com.alibaba.toolkit.common + toolkit-common-logging + 1.10 + + + org.json + json + 20160810 + + + junit + junit + 4.11 + test + + + org.powermock + powermock-module-junit4 + 1.4.10 + test + + + org.powermock + powermock-api-mockito + 1.4.10 + test + + + org.mockito + mockito-core + 1.8.5 + test + + + + com.oceanbase + obkv-hbase-client + 0.1.4.2 + + + guava + com.google.guava + + + + + + org.apache.hadoop + hadoop-core + 1.0.3 + + + + + + + + src/main/java + + **/*.properties + + + + + + + maven-compiler-plugin + + ${jdk-version} + ${jdk-version} + ${project-sourceEncoding} + + + + + maven-assembly-plugin + + + src/main/assembly/package.xml + + datax + + + + dwzip + package + + single + + + + + + + diff --git a/obhbasewriter/src/main/assembly/package.xml b/obhbasewriter/src/main/assembly/package.xml new file mode 100644 index 0000000000..fd05bea3ca --- /dev/null +++ b/obhbasewriter/src/main/assembly/package.xml @@ -0,0 +1,35 @@ + + + + dir + + false + + + src/main/resources + + plugin.json + plugin_job_template.json + + plugin/writer/obhbasewriter + + + target/ + + obhbasewriter-0.0.1-SNAPSHOT.jar + + plugin/writer/obhbasewriter + + + + + + false + plugin/writer/obhbasewriter/libs + runtime + + + diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ColumnType.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ColumnType.java new file mode 100755 index 0000000000..e451071e80 --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ColumnType.java @@ -0,0 +1,50 @@ +package com.alibaba.datax.plugin.writer.obhbasewriter; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.MessageSource; +import java.util.Arrays; + + + +import org.apache.commons.lang.StringUtils; + +/** + * 只对 normal 模式读取时有用,多版本读取时,不存在列类型的 + */ +public enum ColumnType { + STRING("string"), + BINARY_STRING("binarystring"), + BYTES("bytes"), + BOOLEAN("boolean"), + SHORT("short"), + INT("int"), + LONG("long"), + FLOAT("float"), + DOUBLE("double"), + DATE("date"), + BINARY("binary"); + + private String typeName; + + ColumnType(String typeName) { + this.typeName = typeName; + } + + public static ColumnType getByTypeName(String typeName) { + if (StringUtils.isBlank(typeName)) { + throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MessageSource.loadResourceBundle(ColumnType.class).message("columntype.1", typeName, Arrays.asList(values()))); + } + for (ColumnType columnType : values()) { + if (StringUtils.equalsIgnoreCase(columnType.typeName, typeName.trim())) { + return columnType; + } + } + + throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MessageSource.loadResourceBundle(ColumnType.class).message("columntype.1", typeName, Arrays.asList(values()))); + } + + @Override + public String toString() { + return this.typeName; + } +} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/Config.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/Config.java new file mode 100644 index 0000000000..2a37d5d5ea --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/Config.java @@ -0,0 +1,42 @@ +package com.alibaba.datax.plugin.writer.obhbasewriter; + +public interface Config { + + String MEMSTORE_THRESHOLD = "memstoreThreshold"; + + double DEFAULT_MEMSTORE_THRESHOLD = 0.9d; + + String MEMSTORE_CHECK_INTERVAL_SECOND = "memstoreCheckIntervalSecond"; + + long DEFAULT_MEMSTORE_CHECK_INTERVAL_SECOND = 30; + + String FAIL_TRY_COUNT = "failTryCount"; + + int DEFAULT_FAIL_TRY_COUNT = 10000; + + String WRITER_THREAD_COUNT = "writerThreadCount"; + + int DEFAULT_WRITER_THREAD_COUNT = 5; + + String CONCURRENT_WRITE = "concurrentWrite"; + + boolean DEFAULT_CONCURRENT_WRITE = true; + + String RS_URL = "rsUrl"; + + String OB_VERSION = "obVersion"; + + String TIMEOUT = "timeout"; + + String PRINT_COST = "printCost"; + + boolean DEFAULT_PRINT_COST = false; + + String COST_BOUND = "costBound"; + + long DEFAULT_COST_BOUND = 20; + + String MAX_ACTIVE_CONNECTION = "maxActiveConnection"; + + int DEFAULT_MAX_ACTIVE_CONNECTION = 2000; +} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ConfigKey.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ConfigKey.java new file mode 100755 index 0000000000..bd06524f3a --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ConfigKey.java @@ -0,0 +1,78 @@ +package com.alibaba.datax.plugin.writer.obhbasewriter; + +public final class ConfigKey { + + public final static String HBASE_CONFIG = "hbaseConfig"; + + public final static String TABLE = "table"; + + public final static String DBNAME = "dbName"; + + public final static String OBCONFIG_URL = "obConfigUrl"; + + public final static String JDBC_URL = "jdbcUrl"; + /** + * mode 可以取 normal 或者 multiVersionFixedColumn 或者 multiVersionDynamicColumn 三个值,无默认值。 + *

+ * normal 配合 column(Map 结构的)使用 + *

+ * multiVersion + */ + public final static String MODE = "mode"; + + public final static String ROWKEY_COLUMN = "rowkeyColumn"; + + public final static String VERSION_COLUMN = "versionColumn"; + + /** + * 默认为 utf8 + */ + public final static String ENCODING = "encoding"; + + public final static String COLUMN = "column"; + + public static final String INDEX = "index"; + + public static final String NAME = "name"; + + public static final String TYPE = "type"; + + public static final String VALUE = "value"; + + public static final String FORMAT = "format"; + + /** + * 默认为 EMPTY_BYTES + */ + public static final String NULL_MODE = "nullMode"; + + public static final String TRUNCATE = "truncate"; + + public static final String AUTO_FLUSH = "autoFlush"; + + public static final String WAL_FLAG = "walFlag"; + + public static final String WRITE_BUFFER_SIZE = "writeBufferSize"; + + public static final String MAX_RETRY_COUNT = "maxRetryCount"; + + public static final String USE_ODP_MODE = "useOdpMode"; + + public static final String OB_SYS_USER = "obSysUser"; + + public static final String OB_SYS_PASSWORD = "obSysPassword"; + + public static final String ODP_HOST = "odpHost"; + + public static final String ODP_PORT = "odpPort"; + + public static final String OBHBASE_HTABLE_CLIENT_WRITE_BUFFER = "obhbaseClientWriteBuffer"; + + public static final String OBHBASE_HTABLE_PUT_WRITE_BUFFER_CHECK = "obhbaseHtablePutWriteBufferCheck"; + + public static final String WRITE_BUFFER_LOW_MARK = "writeBufferLowMark"; + + public static final String WRITE_BUFFER_HIGH_MARK = "writeBufferHighMark"; + + public static final String TABLE_CLIENT_RPC_EXECUTE_TIMEOUT = "rpcExecuteTimeout"; +} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ConfigValidator.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ConfigValidator.java new file mode 100644 index 0000000000..22224d4b2c --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ConfigValidator.java @@ -0,0 +1,110 @@ +package com.alibaba.datax.plugin.writer.obhbasewriter; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.common.util.MessageSource; +import com.alibaba.datax.plugin.rdbms.writer.Key; + +import java.nio.charset.Charset; +import java.util.List; + +/** + * Created by johnxu.xj on Sept 30 2018 + */ +public class ConfigValidator { + private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(ConfigValidator.class); + + public static void validateParameter(com.alibaba.datax.common.util.Configuration originalConfig) { + originalConfig.getNecessaryValue(Key.USERNAME, Hbase094xWriterErrorCode.REQUIRED_VALUE); + originalConfig.getNecessaryValue(Key.PASSWORD, Hbase094xWriterErrorCode.REQUIRED_VALUE); +// originalConfig.getNecessaryValue(ConfigKey.OBCONFIG_URL, Hbase094xWriterErrorCode.REQUIRED_VALUE); + originalConfig.getNecessaryValue(ConfigKey.TABLE, Hbase094xWriterErrorCode.REQUIRED_VALUE); + originalConfig.getNecessaryValue(ConfigKey.DBNAME, Hbase094xWriterErrorCode.REQUIRED_VALUE); + + ConfigValidator.validateMode(originalConfig); + + String encoding = originalConfig.getString(ConfigKey.ENCODING, Constant.DEFAULT_ENCODING); + if (!Charset.isSupported(encoding)) { + throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.9", encoding)); + } + originalConfig.set(ConfigKey.ENCODING, encoding); + } + + public static void validateMode(com.alibaba.datax.common.util.Configuration originalConfig) { + String mode = originalConfig.getNecessaryValue(ConfigKey.MODE, Hbase094xWriterErrorCode.REQUIRED_VALUE); + ModeType modeType = ModeType.getByTypeName(mode); + if (ModeType.Normal.equals(modeType)) { + validateRowkeyColumn(originalConfig); + validateColumn(originalConfig); + validateVersionColumn(originalConfig); + } + + if (originalConfig.getBool(ConfigKey.USE_ODP_MODE)) { + originalConfig.getNecessaryValue(ConfigKey.ODP_HOST, Hbase094xWriterErrorCode.REQUIRED_VALUE); + originalConfig.getNecessaryValue(ConfigKey.ODP_PORT, Hbase094xWriterErrorCode.REQUIRED_VALUE); + } else { + originalConfig.getNecessaryValue(ConfigKey.OBCONFIG_URL, Hbase094xWriterErrorCode.REQUIRED_VALUE); + originalConfig.getNecessaryValue(ConfigKey.OB_SYS_USER, Hbase094xWriterErrorCode.REQUIRED_VALUE); + } + } + + public static void validateColumn(com.alibaba.datax.common.util.Configuration originalConfig) { + List columns = originalConfig.getListConfiguration(ConfigKey.COLUMN); + if (columns == null || columns.isEmpty()) { + throw DataXException.asDataXException(Hbase094xWriterErrorCode.REQUIRED_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.11")); + } + for (Configuration aColumn : columns) { + Integer index = aColumn.getInt(ConfigKey.INDEX); + String type = aColumn.getNecessaryValue(ConfigKey.TYPE, Hbase094xWriterErrorCode.REQUIRED_VALUE); + String name = aColumn.getNecessaryValue(ConfigKey.NAME, Hbase094xWriterErrorCode.REQUIRED_VALUE); + ColumnType.getByTypeName(type); + if (name.split(":").length != 2) { + throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.12", name)); + } + if (index == null || index < 0) { + throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.13")); + } + } + } + + public static void validateRowkeyColumn(com.alibaba.datax.common.util.Configuration originalConfig) { + List rowkeyColumn = originalConfig.getListConfiguration(ConfigKey.ROWKEY_COLUMN); + if (rowkeyColumn == null || rowkeyColumn.isEmpty()) { + throw DataXException.asDataXException(Hbase094xWriterErrorCode.REQUIRED_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.14")); + } + int rowkeyColumnSize = rowkeyColumn.size(); + //包含{"index":0,"type":"string"} 或者 {"index":-1,"type":"string","value":"_"} + for (Configuration aRowkeyColumn : rowkeyColumn) { + Integer index = aRowkeyColumn.getInt(ConfigKey.INDEX); + String type = aRowkeyColumn.getNecessaryValue(ConfigKey.TYPE, Hbase094xWriterErrorCode.REQUIRED_VALUE); + ColumnType.getByTypeName(type); + if (index == null) { + throw DataXException.asDataXException(Hbase094xWriterErrorCode.REQUIRED_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.15")); + } + //不能只有-1列,即rowkey连接串 + if (rowkeyColumnSize == 1 && index == -1) { + throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.16")); + } + if (index == -1) { + aRowkeyColumn.getNecessaryValue(ConfigKey.VALUE, Hbase094xWriterErrorCode.REQUIRED_VALUE); + } + } + } + + public static void validateVersionColumn(com.alibaba.datax.common.util.Configuration originalConfig) { + Configuration versionColumn = originalConfig.getConfiguration(ConfigKey.VERSION_COLUMN); + //为null,表示用当前时间;指定列,需要index + if (versionColumn != null) { + Integer index = versionColumn.getInt(ConfigKey.INDEX); + if (index == null) { + throw DataXException.asDataXException(Hbase094xWriterErrorCode.REQUIRED_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.17")); + } + if (index == -1) { + //指定时间,需要index=-1,value + versionColumn.getNecessaryValue(ConfigKey.VALUE, Hbase094xWriterErrorCode.REQUIRED_VALUE); + } else if (index < 0) { + throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.18")); + } + } + } +} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/Constant.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/Constant.java new file mode 100755 index 0000000000..910855953d --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/Constant.java @@ -0,0 +1,27 @@ +package com.alibaba.datax.plugin.writer.obhbasewriter; + +import ch.qos.logback.classic.Level; + +public final class Constant { + public static final String DEFAULT_ENCODING = "UTF-8"; + public static final String DEFAULT_DATA_FORMAT = "yyyy-MM-dd HH:mm:ss"; + public static final String DEFAULT_NULL_MODE = "skip"; + public static final long DEFAULT_WRITE_BUFFER_SIZE = 8 * 1024 * 1024; + public static final long DEFAULT_MEMSTORE_CHECK_INTERVAL_SECOND = 30; + public static final double DEFAULT_MEMSTORE_THRESHOLD = 0.9d; + public static final int DEFAULT_FAIL_TRY_COUNT = 10000; + public static final String OB_TABLE_CLIENT_PROPERTY = "logging.path.com.alipay.oceanbase-table-client"; + public static final String OB_TABLE_HBASE_PROPERTY = "logging.path.com.alipay.oceanbase-table-hbase"; + public static final String OB_TABLE_CLIENT_LOG_LEVEL = "logging.level.oceanbase-table-client"; + public static final String OB_TABLE_HBASE_LOG_LEVEL = "logging.level.oceanbase-table-hbase"; + public static final String OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL = "logging.level.com.alipay.oceanbase-table-client"; + public static final String OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL = "logging.level.com.alipay.oceanbase-table-hbase"; + public static final String OB_HBASE_LOG_PATH = System.getProperty("datax.home") + "/log/"; + public static final String DEFAULT_OB_TABLE_CLIENT_LOG_LEVEL = Level.OFF.toString(); + public static final String DEFAULT_OB_TABLE_HBASE_LOG_LEVEL = Level.OFF.toString(); + public static final String DEFAULT_NETTY_BUFFER_LOW_WATERMARK = Integer.toString(512 * 1024); + public static final String DEFAULT_NETTY_BUFFER_HIGH_WATERMARK = Integer.toString(1024 * 1024); + public static final String DEFAULT_HBASE_HTABLE_CLIENT_WRITE_BUFFER = "2097152"; + public static final String DEFAULT_HBASE_HTABLE_PUT_WRITE_BUFFER_CHECK = "10"; + public static final String DEFAULT_RPC_EXECUTE_TIMEOUT = "3000"; +} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/Hbase094xWriterErrorCode.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/Hbase094xWriterErrorCode.java new file mode 100644 index 0000000000..08529c378c --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/Hbase094xWriterErrorCode.java @@ -0,0 +1,44 @@ +package com.alibaba.datax.plugin.writer.obhbasewriter; + +import com.alibaba.datax.common.spi.ErrorCode; +import com.alibaba.datax.common.util.MessageSource; + +/** + * Created by shf on 16/3/8. + */ +public enum Hbase094xWriterErrorCode implements ErrorCode { + REQUIRED_VALUE("Hbasewriter-00", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.required_value")), + ILLEGAL_VALUE("Hbasewriter-01", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.illegal_value")), + GET_HBASE_CONFIG_ERROR("Hbasewriter-02", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.get_hbase_config_error")), + GET_HBASE_TABLE_ERROR("Hbasewriter-03", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.get_hbase_table_error")), + CLOSE_HBASE_AMIN_ERROR("Hbasewriter-05", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.close_hbase_amin_error")), + CLOSE_HBASE_TABLE_ERROR("Hbasewriter-06", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.close_hbase_table_error")), + PUT_HBASE_ERROR("Hbasewriter-07", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.put_hbase_error")), + DELETE_HBASE_ERROR("Hbasewriter-08", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.delete_hbase_error")), + TRUNCATE_HBASE_ERROR("Hbasewriter-09", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.truncate_hbase_error")), + CONSTRUCT_ROWKEY_ERROR("Hbasewriter-10", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.construct_rowkey_error")), + CONSTRUCT_VERSION_ERROR("Hbasewriter-11", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.construct_version_error")), + INIT_ERROR("Hbasewriter-12", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.init_error")); + private final String code; + private final String description; + + private Hbase094xWriterErrorCode(String code, String description) { + this.code = code; + this.description = description; + } + + @Override + public String getCode() { + return this.code; + } + + @Override + public String getDescription() { + return this.description; + } + + @Override + public String toString() { + return String.format("Code:[%s], Description:[%s].", this.code, this.description); + } +} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/LocalStrings.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/LocalStrings.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/LocalStrings_en_US.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/LocalStrings_en_US.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/LocalStrings_ja_JP.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/LocalStrings_ja_JP.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/LocalStrings_zh_CN.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/LocalStrings_zh_CN.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/LocalStrings_zh_HK.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/LocalStrings_zh_HK.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/LocalStrings_zh_TW.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/LocalStrings_zh_TW.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ModeType.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ModeType.java new file mode 100644 index 0000000000..592a59a88f --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ModeType.java @@ -0,0 +1,30 @@ +package com.alibaba.datax.plugin.writer.obhbasewriter; + +import java.util.Arrays; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.MessageSource; + +public enum ModeType { + Normal("normal"), + MultiVersion("multiVersion"); + + private String mode; + + ModeType(String mode) { + this.mode = mode.toLowerCase(); + } + + public String getMode() { + return mode; + } + + public static ModeType getByTypeName(String modeName) { + for (ModeType modeType : values()) { + if (modeType.mode.equalsIgnoreCase(modeName)) { + return modeType; + } + } + throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MessageSource.loadResourceBundle(ModeType.class).message("modetype.1", modeName, Arrays.asList(values()))); + } +} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/NullModeType.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/NullModeType.java new file mode 100644 index 0000000000..6514a1a44a --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/NullModeType.java @@ -0,0 +1,30 @@ +package com.alibaba.datax.plugin.writer.obhbasewriter; + +import java.util.Arrays; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.MessageSource; + +public enum NullModeType { + Skip("skip"), + Empty("empty"); + + private String mode; + + NullModeType(String mode) { + this.mode = mode.toLowerCase(); + } + + public String getMode() { + return mode; + } + + public static NullModeType getByTypeName(String modeName) { + for (NullModeType modeType : values()) { + if (modeType.mode.equalsIgnoreCase(modeName)) { + return modeType; + } + } + throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MessageSource.loadResourceBundle(NullModeType.class).message("nullmodetype.1", modeName, Arrays.asList(values()))); + } +} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ObHTableInfo.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ObHTableInfo.java new file mode 100644 index 0000000000..80b15ae9cd --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ObHTableInfo.java @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2021 OceanBase ob-loader-dumper is licensed under Mulan PSL v2. You can use this software according to + * the terms and conditions of the Mulan PSL v2. You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING + * BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. See the Mulan PSL v2 for more + * details. + */ +package com.alibaba.datax.plugin.writer.obhbasewriter; + +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.reader.Key; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import org.apache.commons.lang3.tuple.Triple; + +/** + * @author cjyyz + * @date 2023/03/24 + * @since + */ +public class ObHTableInfo { + + /** + * 不带列族的表名,用于构建OHTable + */ + String tableName; + + /** + * 带列族的表名,用于分区计算 + */ + String fullHbaseTableName; + + NullModeType nullModeType; + + String encoding; + + List columns; + + /** + * 记录配置文件中的columns的列族名,字段名,字段类型,避免每次执行插入都解析 + * Triple left : 列族名;middle : 字段名;right:字段类型 + */ + LinkedHashMap> indexColumnInfoMap; + + /** + * 记录配置文件中rowKey的Index,常量值,字段类型,避免每次执行插入都解析 + * Triple left : Index;middle : 常量值;right:字段类型 + */ + List> rowKeyElementList; + + public ObHTableInfo(Configuration configuration) { + this.nullModeType = NullModeType.getByTypeName(configuration.getString(ConfigKey.NULL_MODE, Constant.DEFAULT_NULL_MODE)); + this.encoding = configuration.getString(ConfigKey.ENCODING, Constant.DEFAULT_ENCODING); + this.columns = configuration.getListConfiguration(ConfigKey.COLUMN); + this.indexColumnInfoMap = new LinkedHashMap<>(); + configuration.getListConfiguration(ConfigKey.COLUMN).forEach(e -> { + String[] name = e.getString(ConfigKey.NAME).split(":"); + indexColumnInfoMap.put(e.getInt(ConfigKey.INDEX), Triple.of(name[0], name[1], ColumnType.getByTypeName(e.getString(ConfigKey.TYPE))) + ); + }); + + this.rowKeyElementList = new ArrayList<>(); + configuration.getListConfiguration(ConfigKey.ROWKEY_COLUMN).forEach(e -> { + Integer index = e.getInt(ConfigKey.INDEX); + String constantValue = e.getString(ConfigKey.VALUE); + ColumnType columnType = ColumnType.getByTypeName(e.getString(ConfigKey.TYPE)); + rowKeyElementList.add(Triple.of(index, constantValue, columnType)); + + }); + + this.tableName = configuration.getString(Key.TABLE); + this.fullHbaseTableName = tableName; + if (!fullHbaseTableName.contains("$")) { + String name = columns.get(0).getString(ConfigKey.NAME); + String familyName = name.split(":")[0]; + fullHbaseTableName = fullHbaseTableName + "$" + familyName; + } + } + + public String getTableName() { + return tableName; + } + + public String getFullHbaseTableName() { + return fullHbaseTableName; + } + + public NullModeType getNullModeType() { + return nullModeType; + } + + public String getEncoding() { + return encoding; + } + + public Map> getIndexColumnInfoMap() { + return indexColumnInfoMap; + } + + public List> getRowKeyElementList() { + return rowKeyElementList; + } +} \ No newline at end of file diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ObHbaseWriter.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ObHbaseWriter.java new file mode 100644 index 0000000000..555ce83638 --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ObHbaseWriter.java @@ -0,0 +1,267 @@ +package com.alibaba.datax.plugin.writer.obhbasewriter; + +import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.DEFAULT_OB_TABLE_CLIENT_LOG_LEVEL; +import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.DEFAULT_OB_TABLE_HBASE_LOG_LEVEL; +import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL; +import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL; +import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.OB_HBASE_LOG_PATH; +import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.OB_TABLE_CLIENT_LOG_LEVEL; +import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.OB_TABLE_CLIENT_PROPERTY; +import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.OB_TABLE_HBASE_LOG_LEVEL; +import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.OB_TABLE_HBASE_PROPERTY; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.spi.Writer; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.reader.util.ObVersion; +import com.alibaba.datax.plugin.rdbms.util.DBUtil; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; +import com.alibaba.datax.plugin.rdbms.writer.CommonRdbmsWriter; +import com.alibaba.datax.plugin.rdbms.writer.Key; +import com.alibaba.datax.plugin.writer.obhbasewriter.ext.ServerConnectInfo; +import com.alibaba.datax.plugin.writer.obhbasewriter.task.ObHBaseWriteTask; +import com.google.common.base.Preconditions; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.util.List; +import java.util.concurrent.TimeUnit; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; + +/** + * + */ +public class ObHbaseWriter extends Writer { + /** + * Job 中的方法仅执行一次,Task 中方法会由框架启动多个 Task 线程并行执行。 + *

+ * 整个 Writer 执行流程是: + * + *

+     * Job类init-->prepare-->split
+     *
+     *                          Task类init-->prepare-->startWrite-->post-->destroy
+     *                          Task类init-->prepare-->startWrite-->post-->destroy
+     *
+     *                                                                            Job类post-->destroy
+     * 
+ */ + public static class Job extends Writer.Job { + private Configuration originalConfig = null; + private static final Logger LOG = LoggerFactory.getLogger(Job.class); + + /** + * 注意:此方法仅执行一次。 最佳实践:通常在这里对用户的配置进行校验:是否缺失必填项?有无错误值?有没有无关配置项?... + * 并给出清晰的报错/警告提示。校验通常建议采用静态工具类进行,以保证本类结构清晰。 + */ + @Override + public void init() { + if (System.getProperty(OB_TABLE_CLIENT_PROPERTY) == null) { + LOG.info(OB_TABLE_CLIENT_PROPERTY + " not set"); + System.setProperty(OB_TABLE_CLIENT_PROPERTY, OB_HBASE_LOG_PATH); + } + if (System.getProperty(OB_TABLE_HBASE_PROPERTY) == null) { + LOG.info(OB_TABLE_HBASE_PROPERTY + " not set"); + System.setProperty(OB_TABLE_HBASE_PROPERTY, OB_HBASE_LOG_PATH); + } + if (System.getProperty(OB_TABLE_CLIENT_LOG_LEVEL) == null) { + LOG.info(OB_TABLE_CLIENT_LOG_LEVEL + " not set"); + System.setProperty(OB_TABLE_CLIENT_LOG_LEVEL, DEFAULT_OB_TABLE_CLIENT_LOG_LEVEL); + } + if (System.getProperty(OB_TABLE_HBASE_LOG_LEVEL) == null) { + LOG.info(OB_TABLE_HBASE_LOG_LEVEL + " not set"); + System.setProperty(OB_TABLE_HBASE_LOG_LEVEL, DEFAULT_OB_TABLE_HBASE_LOG_LEVEL); + } + if (System.getProperty(OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL) == null) { + LOG.info(OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL + " not set"); + System.setProperty(OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL, DEFAULT_OB_TABLE_CLIENT_LOG_LEVEL); + } + if (System.getProperty(OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL) == null) { + LOG.info(OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL + " not set"); + System.setProperty(OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL, DEFAULT_OB_TABLE_HBASE_LOG_LEVEL); + } + + LOG.info("{} is set to {}, {} is set to {}", + OB_TABLE_CLIENT_PROPERTY, OB_HBASE_LOG_PATH, OB_TABLE_HBASE_PROPERTY, OB_HBASE_LOG_PATH); + this.originalConfig = super.getPluginJobConf(); + boolean useOdpMode = originalConfig.getBool(ConfigKey.USE_ODP_MODE, false); + String configUrl = originalConfig.getString(ConfigKey.OBCONFIG_URL, null); + String jdbcUrl = originalConfig.getString(ConfigKey.JDBC_URL, null); + jdbcUrl = DataBaseType.MySql.appendJDBCSuffixForReader(jdbcUrl); + String user = originalConfig.getString(Key.USERNAME, null); + String password = originalConfig.getString(Key.PASSWORD); + ServerConnectInfo serverConnectInfo = new ServerConnectInfo(jdbcUrl, user, password); + if (useOdpMode) { + originalConfig.set(ConfigKey.ODP_HOST, serverConnectInfo.host); + originalConfig.set(ConfigKey.ODP_PORT, serverConnectInfo.port); + } else if (StringUtils.isBlank(configUrl)) { + serverConnectInfo.setSysUser(originalConfig.getString(ConfigKey.OB_SYS_USER)); + serverConnectInfo.setSysPass(originalConfig.getString(ConfigKey.OB_SYS_PASSWORD)); + try { + originalConfig.set(ConfigKey.OBCONFIG_URL, queryRsUrl(serverConnectInfo)); + originalConfig.set(ConfigKey.OB_SYS_USER, serverConnectInfo.sysUser); + originalConfig.set(ConfigKey.OB_SYS_PASSWORD, serverConnectInfo.sysPass); + LOG.info("fetch configUrl success, configUrl is {}", configUrl); + } catch (Exception e) { + LOG.error("fail to get configure url: " + e.getMessage()); + throw DataXException.asDataXException(Hbase094xWriterErrorCode.REQUIRED_VALUE, "Missing obConfigUrl"); + } + } + if (StringUtils.isBlank(originalConfig.getString(ConfigKey.DBNAME))) { + originalConfig.set(ConfigKey.DBNAME, serverConnectInfo.databaseName); + } + ConfigValidator.validateParameter(this.originalConfig); + } + + private String queryRsUrl(ServerConnectInfo serverInfo) { + String configUrl = originalConfig.getString(ConfigKey.OBCONFIG_URL, null); + if (configUrl == null) { + try { + Connection conn = null; + int retry = 0; + final String sysJDBCUrl = serverInfo.jdbcUrl.replace(serverInfo.databaseName, "oceanbase"); + do { + try { + if (retry > 0) { + int sleep = retry > 9 ? 500 : 1 << retry; + try { + TimeUnit.SECONDS.sleep(sleep); + } catch (InterruptedException e) { + } + LOG.warn("retry fetch RsUrl the {} times", retry); + } + conn = DBUtil.getConnection(DataBaseType.OceanBase, sysJDBCUrl, serverInfo.sysUser, serverInfo.sysPass); + String sql = "show parameters like 'obconfig_url'"; + LOG.info("query param: {}", sql); + PreparedStatement stmt = conn.prepareStatement(sql); + ResultSet result = stmt.executeQuery(); + if (result.next()) { + configUrl = result.getString("Value"); + } + if (StringUtils.isNotBlank(configUrl)) { + break; + } + } catch (Exception e) { + ++retry; + LOG.warn("fetch root server list(rsList) error {}", e.getMessage()); + } finally { + DBUtil.closeDBResources(null, conn); + } + } while (retry < 3); + + LOG.info("configure url is: " + configUrl); + originalConfig.set(ConfigKey.OBCONFIG_URL, configUrl); + } catch (Exception e) { + LOG.error("Fail to get configure url: {}", e.getMessage(), e); + throw DataXException.asDataXException(Hbase094xWriterErrorCode.REQUIRED_VALUE, "未配置obConfigUrl,且无法获取obConfigUrl"); + } + } + return configUrl; + } + + /** + * 注意:此方法仅执行一次。 最佳实践:如果 Job 中有需要进行数据同步之前的处理,可以在此处完成,如果没有必要则可以直接去掉。 + */ + // 一般来说,是需要推迟到 task 中进行pre 的执行(单表情况例外) + @Override + public void prepare() { + } + + /** + * 注意:此方法仅执行一次。 最佳实践:通常采用工具静态类完成把 Job 配置切分成多个 Task 配置的工作。 这里的 + * mandatoryNumber 是强制必须切分的份数。 + */ + @Override + public List split(int mandatoryNumber) { + // This function does not need any change. + Configuration simplifiedConf = this.originalConfig; + + List splitResultConfigs = new ArrayList(); + for (int j = 0; j < mandatoryNumber; j++) { + splitResultConfigs.add(simplifiedConf.clone()); + } + return splitResultConfigs; + } + + /** + * 注意:此方法仅执行一次。 最佳实践:如果 Job 中有需要进行数据同步之后的后续处理,可以在此处完成。 + */ + @Override + public void post() { + // No post supported + } + + /** + * 注意:此方法仅执行一次。 最佳实践:通常配合 Job 中的 post() 方法一起完成 Job 的资源释放。 + */ + @Override + public void destroy() { + + } + } + + public static class Task extends Writer.Task { + private Configuration taskConfig; + private CommonRdbmsWriter.Task writerTask; + + /** + * 注意:此方法每个 Task 都会执行一次。 最佳实践:此处通过对 taskConfig 配置的读取,进而初始化一些资源为 + * startWrite()做准备。 + */ + @Override + public void init() { + this.taskConfig = super.getPluginJobConf(); + String mode = this.taskConfig.getString(ConfigKey.MODE); + ModeType modeType = ModeType.getByTypeName(mode); + + switch (modeType) { + case Normal: + try { + this.writerTask = new ObHBaseWriteTask(this.taskConfig); + } catch (Exception e) { + throw DataXException.asDataXException(Hbase094xWriterErrorCode.INIT_ERROR, "ObHbase writer init error:" + e.getMessage()); + } + break; + default: + throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, "ObHbase not support this mode type:" + modeType); + } + } + + /** + * 注意:此方法每个 Task 都会执行一次。 最佳实践:如果 Task + * 中有需要进行数据同步之前的处理,可以在此处完成,如果没有必要则可以直接去掉。 + */ + @Override + public void prepare() { + this.writerTask.prepare(taskConfig); + } + + /** + * 注意:此方法每个 Task 都会执行一次。 最佳实践:此处适当封装确保简洁清晰完成数据写入工作。 + */ + public void startWrite(RecordReceiver recordReceiver) { + this.writerTask.startWrite(recordReceiver, taskConfig, super.getTaskPluginCollector()); + } + + /** + * 注意:此方法每个 Task 都会执行一次。 最佳实践:如果 Task 中有需要进行数据同步之后的后续处理,可以在此处完成。 + */ + @Override + public void post() { + this.writerTask.post(taskConfig); + } + + /** + * 注意:此方法每个 Task 都会执行一次。 最佳实践:通常配合Task 中的 post() 方法一起完成 Task 的资源释放。 + */ + @Override + public void destroy() { + this.writerTask.destroy(taskConfig); + } + } +} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings.properties new file mode 100644 index 0000000000..63a53efab7 --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings.properties @@ -0,0 +1 @@ +databasewriterbuffer.1=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684table\u4e0d\u5b58\u5728, \u7b97\u51fa\u7684tableName={0},db={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. \ No newline at end of file diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings_en_US.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings_en_US.properties new file mode 100644 index 0000000000..86b6a8b3b8 --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings_en_US.properties @@ -0,0 +1 @@ +databasewriterbuffer.1=The [table] calculated based on the rules does not exist. The calculated [tableName]={0}, [db]={1}. Please check the rules you configured. diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings_ja_JP.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings_ja_JP.properties new file mode 100644 index 0000000000..63a53efab7 --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings_ja_JP.properties @@ -0,0 +1 @@ +databasewriterbuffer.1=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684table\u4e0d\u5b58\u5728, \u7b97\u51fa\u7684tableName={0},db={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. \ No newline at end of file diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings_zh_CN.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings_zh_CN.properties new file mode 100644 index 0000000000..63a53efab7 --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings_zh_CN.properties @@ -0,0 +1 @@ +databasewriterbuffer.1=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684table\u4e0d\u5b58\u5728, \u7b97\u51fa\u7684tableName={0},db={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. \ No newline at end of file diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings_zh_HK.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings_zh_HK.properties new file mode 100644 index 0000000000..4940a177b6 --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings_zh_HK.properties @@ -0,0 +1 @@ +databasewriterbuffer.1=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684table\u4e0d\u5b58\u5728, \u7b97\u51fa\u7684tableName={0},db={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.databasewriterbuffer.1=通過規則計算出來的table不存在, 算出的tableName={0},db={1}, 請檢查您配置的規則. diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings_zh_TW.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings_zh_TW.properties new file mode 100644 index 0000000000..4940a177b6 --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings_zh_TW.properties @@ -0,0 +1 @@ +databasewriterbuffer.1=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684table\u4e0d\u5b58\u5728, \u7b97\u51fa\u7684tableName={0},db={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.databasewriterbuffer.1=通過規則計算出來的table不存在, 算出的tableName={0},db={1}, 請檢查您配置的規則. diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/ObDataSourceErrorCode.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/ObDataSourceErrorCode.java new file mode 100644 index 0000000000..5e656a6e41 --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/ObDataSourceErrorCode.java @@ -0,0 +1,30 @@ +package com.alibaba.datax.plugin.writer.obhbasewriter.ext; + +import com.alibaba.datax.common.spi.ErrorCode; + +public enum ObDataSourceErrorCode implements ErrorCode { + DESC("ObDataSourceError code", "connect error"); + + private final String code; + private final String describe; + + private ObDataSourceErrorCode(String code, String describe) { + this.code = code; + this.describe = describe; + } + + @Override + public String getCode() { + return this.code; + } + + @Override + public String getDescription() { + return this.describe; + } + + @Override + public String toString() { + return String.format("Code:[%s], Describe:[%s]. ", this.code, this.describe); + } +} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/ObHbaseTableHolder.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/ObHbaseTableHolder.java new file mode 100644 index 0000000000..7b74b9479c --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/ObHbaseTableHolder.java @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2021 OceanBase ob-loader-dumper is licensed under Mulan PSL v2. You can use this software according to + * the terms and conditions of the Mulan PSL v2. You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING + * BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. See the Mulan PSL v2 for more + * details. + */ +package com.alibaba.datax.plugin.writer.obhbasewriter.ext; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.plugin.writer.obhbasewriter.Hbase094xWriterErrorCode; +import com.alipay.oceanbase.hbase.OHTable; +import org.apache.hadoop.conf.Configuration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * @author cjyyz + * @date 2023/03/16 + * @since + */ +public class ObHbaseTableHolder { + private static final Logger LOG = LoggerFactory.getLogger(ObHbaseTableHolder.class); + + private Configuration configuration; + + private String hbaseTableName; + + private OHTable ohTable; + + public ObHbaseTableHolder(Configuration configuration, String hbaseTableName) { + this.configuration = configuration; + this.hbaseTableName = hbaseTableName; + } + + public OHTable getOhTable() { + try { + if (ohTable == null) { + ohTable = new OHTable(configuration, hbaseTableName); + } + return ohTable; + } catch (Exception e) { + LOG.error("build obHTable: {} failed. reason: {}", hbaseTableName, e.getMessage()); + throw DataXException.asDataXException(Hbase094xWriterErrorCode.GET_HBASE_TABLE_ERROR, Hbase094xWriterErrorCode.GET_HBASE_TABLE_ERROR.getDescription()); + } + } + + public void destroy() { + try { + if (ohTable != null) { + ohTable.close(); + } + } catch (Exception e) { + LOG.warn("error in closing htable: {}. Reason: {}", hbaseTableName, e.getMessage()); + } + } +} \ No newline at end of file diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/ServerConnectInfo.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/ServerConnectInfo.java new file mode 100644 index 0000000000..80c2d0d9f7 --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/ServerConnectInfo.java @@ -0,0 +1,146 @@ +package com.alibaba.datax.plugin.writer.obhbasewriter.ext; + +import com.google.common.base.Preconditions; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import static org.apache.commons.lang3.StringUtils.EMPTY; + +public class ServerConnectInfo { + + public String clusterName; + public String tenantName; + // userName doesn't contain tenantName or clusterName + public String userName; + public String password; + public String databaseName; + public String ipPort; + public String jdbcUrl; + public String host; + public String port; + public boolean publicCloud; + public int rpcPort; + public String sysUser; + public String sysPass; + + /** + * + * @param jdbcUrl format is jdbc:oceanbase//ip:port + * @param username format is cluster:tenant:username or username@tenant#cluster or user@tenant or user + * @param password + */ + public ServerConnectInfo(final String jdbcUrl, final String username, final String password) { + this(jdbcUrl, username, password, null, null); + } + + public ServerConnectInfo(final String jdbcUrl, final String username, final String password, final String sysUser, final String sysPass) { + if (jdbcUrl.startsWith(com.alibaba.datax.plugin.rdbms.writer.Constant.OB10_SPLIT_STRING)) { + String[] ss = jdbcUrl.split(com.alibaba.datax.plugin.rdbms.writer.Constant.OB10_SPLIT_STRING_PATTERN); + Preconditions.checkArgument(ss.length == 3, "jdbc url format is not correct:" + jdbcUrl); + this.userName = username; + this.clusterName = ss[1].trim().split(":")[0]; + this.tenantName = ss[1].trim().split(":")[1]; + this.jdbcUrl = ss[2]; + } else { + this.jdbcUrl = jdbcUrl; + } + this.password = password; + this.sysUser = sysUser; + this.sysPass = sysPass; + parseJdbcUrl(jdbcUrl); + parseFullUserName(username); + } + + private void parseJdbcUrl(final String jdbcUrl) { + Pattern pattern = Pattern.compile("//([\\w\\.\\-]+:\\d+)/([\\w-]+)\\?"); + Matcher matcher = pattern.matcher(jdbcUrl); + if (matcher.find()) { + String ipPort = matcher.group(1); + String dbName = matcher.group(2); + this.ipPort = ipPort; + String[] hostPort = ipPort.split(":"); + this.host = hostPort[0]; + this.port = hostPort[1]; + this.databaseName = dbName; + this.publicCloud = host.endsWith("aliyuncs.com"); + } else { + throw new RuntimeException("Invalid argument:" + jdbcUrl); + } + } + + private void parseFullUserName(final String fullUserName) { + int tenantIndex = fullUserName.indexOf("@"); + int clusterIndex = fullUserName.indexOf("#"); + // 适用于jdbcUrl以||_dsc_ob10_dsc_开头的场景 + if (fullUserName.contains(":") && tenantIndex < 0) { + String[] names = fullUserName.split(":"); + if (names.length != 3) { + throw new RuntimeException("invalid argument: " + fullUserName); + } else { + this.clusterName = names[0]; + this.tenantName = names[1]; + this.userName = names[2]; + } + } else if (tenantIndex < 0) { + // 适用于short jdbcUrl,且username中不含租户名(主要是公有云场景,此场景下不计算分区) + this.userName = fullUserName; + this.clusterName = EMPTY; + this.tenantName = EMPTY; + } else { + // 适用于short jdbcUrl,且username中含租户名 + this.userName = fullUserName.substring(0, tenantIndex); + if (clusterIndex < 0) { + this.clusterName = EMPTY; + this.tenantName = fullUserName.substring(tenantIndex + 1); + } else { + this.clusterName = fullUserName.substring(clusterIndex + 1); + this.tenantName = fullUserName.substring(tenantIndex + 1, clusterIndex); + } + } + } + + @Override + public String toString() { + return "ServerConnectInfo{" + + "clusterName='" + clusterName + '\'' + + ", tenantName='" + tenantName + '\'' + + ", userName='" + userName + '\'' + + ", password='" + password + '\'' + + ", databaseName='" + databaseName + '\'' + + ", ipPort='" + ipPort + '\'' + + ", jdbcUrl='" + jdbcUrl + '\'' + + ", publicCloud=" + publicCloud + + ", rpcPort=" + rpcPort + + '}'; + } + + public String getFullUserName() { + StringBuilder builder = new StringBuilder(); + builder.append(userName); + if (publicCloud || (rpcPort != 0 && EMPTY.equals(clusterName))) { + return builder.toString(); + } + if (!EMPTY.equals(tenantName)) { + builder.append("@").append(tenantName); + } + + if (!EMPTY.equals(clusterName)) { + builder.append("#").append(clusterName); + } + if (EMPTY.equals(this.clusterName) && EMPTY.equals(this.tenantName)) { + return this.userName; + } + return builder.toString(); + } + + public void setRpcPort(int rpcPort) { + this.rpcPort = rpcPort; + } + + public void setSysUser(String sysUser) { + this.sysUser = sysUser; + } + + public void setSysPass(String sysPass) { + this.sysPass = sysPass; + } +} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings.properties new file mode 100644 index 0000000000..d41f6151e9 --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings.properties @@ -0,0 +1,21 @@ +multitablewritertask.1=\u914d\u7f6e\u7684tableList\u4e3a\u591a\u8868\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e +multitablewritertask.2=\u914d\u7f6e\u7684\u591a\u5e93\u4e2d\u7684\u8868\u540d\u6709\u91cd\u590d\u7684\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\u548c\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e +multitablewritertask.3=\u914d\u7f6e\u7684\u6240\u6709\u8868\u540d\u90fd\u76f8\u540c\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e +multitablewritertask.4=\u914d\u7f6e\u7684table\u548cdb\u540d\u79f0\u90fd\u76f8\u540c\uff0c\u6b64\u79cd\u56de\u6d41\u65b9\u5f0f\u4e0d\u652f\u6301 +multitablewritertask.5=\u5217\u914d\u7f6e\u4fe1\u606f\u6709\u9519\u8bef. \u56e0\u4e3a\u60a8\u914d\u7f6e\u7684\u4efb\u52a1\u4e2d\uff0c\u6e90\u5934\u8bfb\u53d6\u5b57\u6bb5\u6570:{0} \u4e0e \u76ee\u7684\u8868\u8981\u5199\u5165\u7684\u5b57\u6bb5\u6570:{1} \u4e0d\u76f8\u7b49. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +multitablewritertask.6=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684tableName\u67e5\u627e\u5bf9\u5e94\u7684db\u4e0d\u5b58\u5728\uff0ctableName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. +multitablewritertask.7=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u548ctable\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0},tableName={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. +multitablewritertask.8=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. +multitablewritertask.9=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684dbName[{0}], \u5b58\u5728\u591a\u5f20\u5206\u8868\uff0c\u8bf7\u914d\u7f6e\u60a8\u7684\u5206\u8868\u89c4\u5219. +multitablewritertask.10=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0},ErrorCode:{1} +multitablewritertask.11=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0},ErrorCode:{1} +multitablewritertask.12=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0},ErrorCode:{1} +multitablewritertask.13=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0},ErrorCode:{1} +multitablewritertask.14=\u5199\u5165\u8868[{0}]\u5931\u8d25,\u4f11\u7720[{1}]\u6beb\u79d2,\u6570\u636e:{2} +multitablewritertask.15=\u5199\u5165\u8868[{0}]\u5b58\u5728\u810f\u6570\u636e,record={1}, \u5199\u5165\u5f02\u5e38\u4e3a: + + +singletablewritertask.1=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0},ErrorCode:{1} +singletablewritertask.2=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0},ErrorCode:{1} +singletablewritertask.3=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0},ErrorCode:{1} +singletablewritertask.4=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0},ErrorCode:{1} \ No newline at end of file diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings_en_US.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings_en_US.properties new file mode 100644 index 0000000000..010db531ac --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings_en_US.properties @@ -0,0 +1,21 @@ +multitablewritertask.1=The configured [tableList] contains multiple tables but no table splitting rules have been configured. Please check your configuration. +multitablewritertask.2=There are repeated table names in the multiple tables you configured, but no database or table splitting rules have been configured. Please check your configuration. +multitablewritertask.3=All configured tables share the same name, but no database splitting rules have been configured. Please check your configuration. +multitablewritertask.4=The configured table and database share the same name. This back-to-source method is not supported. +multitablewritertask.5=Error in column configuration information. In your configured tasks, the number of source fields to be read: {0} and the number of fields to be written to the target table: {1} are not equivalent. Please check your configuration and make corrections. +multitablewritertask.6=The database that corresponds to the [tableName] calculated based on the rules does not exist. The [tableName]={0}. Please check the rules you configured. +multitablewritertask.7=The database and [table] calculated based on the rules do not exist. The calculated [dbName]={0}, and [tableName]={1}. Please check the rules you configured. +multitablewritertask.8=The database calculated based on the rules does not exist. The calculated [dbName]={0}. Please check the rules you configured. +multitablewritertask.9=The [dbName] [{0}] calculated based on the rules contains multiple sub-tables. Please configure your table splitting rules. +multitablewritertask.10=Fatal exception in OB. Roll back this write and hibernate for five minutes. SQLState: {0}. ErrorCode: {1} +multitablewritertask.11=Recoverable exception in OB. Roll back this write and hibernate for one minute. SQLState: {0}. ErrorCode: {1} +multitablewritertask.12=Exception in OB. Roll back this write and hibernate for one second. Write and submit the records one by one. SQLState: {0}. ErrorCode: {1} +multitablewritertask.13=Exception in OB. Roll back this write. Write and submit the records one by one. SQLState: {0}. ErrorCode: {1} +multitablewritertask.14=Failed to write to table: [{0}]. Hibernate for [{1}] milliseconds. Data: {2} +multitablewritertask.15=writing table [{0}] contains dirty data. Record={1}. Writing exception is: + + +singletablewritertask.1=Fatal exception in OB. Roll back this write and hibernate for five minutes. SQLState: {0}. ErrorCode: {1} +singletablewritertask.2=Recoverable exception in OB. Roll back this write and hibernate for one minute. SQLState: {0}. ErrorCode: {1} +singletablewritertask.3=Exception in OB. Roll back this write and hibernate for one second. Write and submit the records one by one. SQLState: {0}. ErrorCode: {1} +singletablewritertask.4=Exception in OB. Roll back this write. Write and submit the records one by one. SQLState: {0}. ErrorCode: {1} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings_ja_JP.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings_ja_JP.properties new file mode 100644 index 0000000000..12e3e481f8 --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings_ja_JP.properties @@ -0,0 +1,21 @@ +multitablewritertask.1=\u914d\u7f6e\u7684tableList\u4e3a\u591a\u8868\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e +multitablewritertask.2=\u914d\u7f6e\u7684\u591a\u5e93\u4e2d\u7684\u8868\u540d\u6709\u91cd\u590d\u7684\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\u548c\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e +multitablewritertask.3=\u914d\u7f6e\u7684\u6240\u6709\u8868\u540d\u90fd\u76f8\u540c\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e +multitablewritertask.4=\u914d\u7f6e\u7684table\u548cdb\u540d\u79f0\u90fd\u76f8\u540c\uff0c\u6b64\u79cd\u56de\u6d41\u65b9\u5f0f\u4e0d\u652f\u6301 +multitablewritertask.5=\u5217\u914d\u7f6e\u4fe1\u606f\u6709\u9519\u8bef. \u56e0\u4e3a\u60a8\u914d\u7f6e\u7684\u4efb\u52a1\u4e2d\uff0c\u6e90\u5934\u8bfb\u53d6\u5b57\u6bb5\u6570:{0} \u4e0e \u76ee\u7684\u8868\u8981\u5199\u5165\u7684\u5b57\u6bb5\u6570:{1} \u4e0d\u76f8\u7b49. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +multitablewritertask.6=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684tableName\u67e5\u627e\u5bf9\u5e94\u7684db\u4e0d\u5b58\u5728\uff0ctableName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. +multitablewritertask.7=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u548ctable\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0},tableName={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. +multitablewritertask.8=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. +multitablewritertask.9=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684dbName[{0}], \u5b58\u5728\u591a\u5f20\u5206\u8868\uff0c\u8bf7\u914d\u7f6e\u60a8\u7684\u5206\u8868\u89c4\u5219. +multitablewritertask.10=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0} +multitablewritertask.11=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0} +multitablewritertask.12=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0} +multitablewritertask.13=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0} +multitablewritertask.14=\u5199\u5165\u8868[{0}]\u5931\u8d25,\u4f11\u7720[{1}]\u6beb\u79d2,\u6570\u636e:{2} +multitablewritertask.15=\u5199\u5165\u8868[{0}]\u5b58\u5728\u810f\u6570\u636e,record={1}, \u5199\u5165\u5f02\u5e38\u4e3a: + + +singletablewritertask.1=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0} +singletablewritertask.2=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0} +singletablewritertask.3=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0} +singletablewritertask.4=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0} \ No newline at end of file diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings_zh_CN.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings_zh_CN.properties new file mode 100644 index 0000000000..12e3e481f8 --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings_zh_CN.properties @@ -0,0 +1,21 @@ +multitablewritertask.1=\u914d\u7f6e\u7684tableList\u4e3a\u591a\u8868\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e +multitablewritertask.2=\u914d\u7f6e\u7684\u591a\u5e93\u4e2d\u7684\u8868\u540d\u6709\u91cd\u590d\u7684\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\u548c\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e +multitablewritertask.3=\u914d\u7f6e\u7684\u6240\u6709\u8868\u540d\u90fd\u76f8\u540c\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e +multitablewritertask.4=\u914d\u7f6e\u7684table\u548cdb\u540d\u79f0\u90fd\u76f8\u540c\uff0c\u6b64\u79cd\u56de\u6d41\u65b9\u5f0f\u4e0d\u652f\u6301 +multitablewritertask.5=\u5217\u914d\u7f6e\u4fe1\u606f\u6709\u9519\u8bef. \u56e0\u4e3a\u60a8\u914d\u7f6e\u7684\u4efb\u52a1\u4e2d\uff0c\u6e90\u5934\u8bfb\u53d6\u5b57\u6bb5\u6570:{0} \u4e0e \u76ee\u7684\u8868\u8981\u5199\u5165\u7684\u5b57\u6bb5\u6570:{1} \u4e0d\u76f8\u7b49. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +multitablewritertask.6=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684tableName\u67e5\u627e\u5bf9\u5e94\u7684db\u4e0d\u5b58\u5728\uff0ctableName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. +multitablewritertask.7=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u548ctable\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0},tableName={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. +multitablewritertask.8=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. +multitablewritertask.9=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684dbName[{0}], \u5b58\u5728\u591a\u5f20\u5206\u8868\uff0c\u8bf7\u914d\u7f6e\u60a8\u7684\u5206\u8868\u89c4\u5219. +multitablewritertask.10=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0} +multitablewritertask.11=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0} +multitablewritertask.12=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0} +multitablewritertask.13=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0} +multitablewritertask.14=\u5199\u5165\u8868[{0}]\u5931\u8d25,\u4f11\u7720[{1}]\u6beb\u79d2,\u6570\u636e:{2} +multitablewritertask.15=\u5199\u5165\u8868[{0}]\u5b58\u5728\u810f\u6570\u636e,record={1}, \u5199\u5165\u5f02\u5e38\u4e3a: + + +singletablewritertask.1=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0} +singletablewritertask.2=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0} +singletablewritertask.3=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0} +singletablewritertask.4=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0} \ No newline at end of file diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings_zh_HK.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings_zh_HK.properties new file mode 100644 index 0000000000..4c0f8e55bc --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings_zh_HK.properties @@ -0,0 +1,41 @@ +multitablewritertask.1=\u914d\u7f6e\u7684tableList\u4e3a\u591a\u8868\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e +multitablewritertask.2=\u914d\u7f6e\u7684\u591a\u5e93\u4e2d\u7684\u8868\u540d\u6709\u91cd\u590d\u7684\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\u548c\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e +multitablewritertask.3=\u914d\u7f6e\u7684\u6240\u6709\u8868\u540d\u90fd\u76f8\u540c\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e +multitablewritertask.4=\u914d\u7f6e\u7684table\u548cdb\u540d\u79f0\u90fd\u76f8\u540c\uff0c\u6b64\u79cd\u56de\u6d41\u65b9\u5f0f\u4e0d\u652f\u6301 +multitablewritertask.5=\u5217\u914d\u7f6e\u4fe1\u606f\u6709\u9519\u8bef. \u56e0\u4e3a\u60a8\u914d\u7f6e\u7684\u4efb\u52a1\u4e2d\uff0c\u6e90\u5934\u8bfb\u53d6\u5b57\u6bb5\u6570:{0} \u4e0e \u76ee\u7684\u8868\u8981\u5199\u5165\u7684\u5b57\u6bb5\u6570:{1} \u4e0d\u76f8\u7b49. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +multitablewritertask.6=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684tableName\u67e5\u627e\u5bf9\u5e94\u7684db\u4e0d\u5b58\u5728\uff0ctableName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. +multitablewritertask.7=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u548ctable\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0},tableName={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. +multitablewritertask.8=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. +multitablewritertask.9=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684dbName[{0}], \u5b58\u5728\u591a\u5f20\u5206\u8868\uff0c\u8bf7\u914d\u7f6e\u60a8\u7684\u5206\u8868\u89c4\u5219. +multitablewritertask.10=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0} +multitablewritertask.11=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0} +multitablewritertask.12=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0} +multitablewritertask.13=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0} +multitablewritertask.14=\u5199\u5165\u8868[{0}]\u5931\u8d25,\u4f11\u7720[{1}]\u6beb\u79d2,\u6570\u636e:{2} +multitablewritertask.15=\u5199\u5165\u8868[{0}]\u5b58\u5728\u810f\u6570\u636e,record={1}, \u5199\u5165\u5f02\u5e38\u4e3a: + + +singletablewritertask.1=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0} +singletablewritertask.2=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0} +singletablewritertask.3=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0} +singletablewritertask.4=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}multitablewritertask.1=配置的tableList為多表,但未配置分表規則,請檢查您的配置 +multitablewritertask.2=配置的多庫中的表名有重複的,但未配置分庫規則和分表規則,請檢查您的配置 +multitablewritertask.3=配置的所有表名都相同,但未配置分庫規則,請檢查您的配置 +multitablewritertask.4=配置的table和db名稱都相同,此種回流方式不支援 +multitablewritertask.5=列配置資訊有錯誤. 因為您配置的任務中,源頭讀取欄位數:{0}與 目的表要寫入的欄位數:{1}不相等. 請檢查您的配置並作出修改. +multitablewritertask.6=通過規則計算出來的tableName查找對應的db不存在,tableName={0}, 請檢查您配置的規則. +multitablewritertask.7=通過規則計算出來的db和table不存在,算出的dbName={0},tableName={1}, 請檢查您配置的規則. +multitablewritertask.8=通過規則計算出來的db不存在,算出的dbName={0}, 請檢查您配置的規則. +multitablewritertask.9=通過規則計算出來的dbName[{0}], 存在多張分表,請配置您的分表規則. +multitablewritertask.10=遇到OB致命異常,回滾此次寫入, 休眠 5分鐘,SQLState:{0} +multitablewritertask.11=遇到OB可恢復異常,回滾此次寫入, 休眠 1分鐘,SQLState:{0} +multitablewritertask.12=遇到OB異常,回滾此次寫入, 休眠 1秒,採用逐條寫入提交,SQLState:{0} +multitablewritertask.13=遇到OB異常,回滾此次寫入, 採用逐條寫入提交,SQLState:{0} +multitablewritertask.14=寫入表[{0}]失敗,休眠[{1}]毫秒,數據:{2} +multitablewritertask.15=寫入表[{0}]存在髒數據,record={1}, 寫入異常為: + + +singletablewritertask.1=遇到OB致命異常,回滾此次寫入, 休眠 5分鐘,SQLState:{0} +singletablewritertask.2=遇到OB可恢復異常,回滾此次寫入, 休眠 1分鐘,SQLState:{0} +singletablewritertask.3=遇到OB異常,回滾此次寫入, 休眠 1秒,採用逐條寫入提交,SQLState:{0} +singletablewritertask.4=遇到OB異常,回滾此次寫入, 採用逐條寫入提交,SQLState:{0} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings_zh_TW.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings_zh_TW.properties new file mode 100644 index 0000000000..4c0f8e55bc --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings_zh_TW.properties @@ -0,0 +1,41 @@ +multitablewritertask.1=\u914d\u7f6e\u7684tableList\u4e3a\u591a\u8868\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e +multitablewritertask.2=\u914d\u7f6e\u7684\u591a\u5e93\u4e2d\u7684\u8868\u540d\u6709\u91cd\u590d\u7684\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\u548c\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e +multitablewritertask.3=\u914d\u7f6e\u7684\u6240\u6709\u8868\u540d\u90fd\u76f8\u540c\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e +multitablewritertask.4=\u914d\u7f6e\u7684table\u548cdb\u540d\u79f0\u90fd\u76f8\u540c\uff0c\u6b64\u79cd\u56de\u6d41\u65b9\u5f0f\u4e0d\u652f\u6301 +multitablewritertask.5=\u5217\u914d\u7f6e\u4fe1\u606f\u6709\u9519\u8bef. \u56e0\u4e3a\u60a8\u914d\u7f6e\u7684\u4efb\u52a1\u4e2d\uff0c\u6e90\u5934\u8bfb\u53d6\u5b57\u6bb5\u6570:{0} \u4e0e \u76ee\u7684\u8868\u8981\u5199\u5165\u7684\u5b57\u6bb5\u6570:{1} \u4e0d\u76f8\u7b49. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +multitablewritertask.6=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684tableName\u67e5\u627e\u5bf9\u5e94\u7684db\u4e0d\u5b58\u5728\uff0ctableName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. +multitablewritertask.7=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u548ctable\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0},tableName={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. +multitablewritertask.8=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. +multitablewritertask.9=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684dbName[{0}], \u5b58\u5728\u591a\u5f20\u5206\u8868\uff0c\u8bf7\u914d\u7f6e\u60a8\u7684\u5206\u8868\u89c4\u5219. +multitablewritertask.10=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0} +multitablewritertask.11=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0} +multitablewritertask.12=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0} +multitablewritertask.13=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0} +multitablewritertask.14=\u5199\u5165\u8868[{0}]\u5931\u8d25,\u4f11\u7720[{1}]\u6beb\u79d2,\u6570\u636e:{2} +multitablewritertask.15=\u5199\u5165\u8868[{0}]\u5b58\u5728\u810f\u6570\u636e,record={1}, \u5199\u5165\u5f02\u5e38\u4e3a: + + +singletablewritertask.1=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0} +singletablewritertask.2=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0} +singletablewritertask.3=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0} +singletablewritertask.4=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}multitablewritertask.1=配置的tableList為多表,但未配置分表規則,請檢查您的配置 +multitablewritertask.2=配置的多庫中的表名有重複的,但未配置分庫規則和分表規則,請檢查您的配置 +multitablewritertask.3=配置的所有表名都相同,但未配置分庫規則,請檢查您的配置 +multitablewritertask.4=配置的table和db名稱都相同,此種回流方式不支援 +multitablewritertask.5=列配置資訊有錯誤. 因為您配置的任務中,源頭讀取欄位數:{0}與 目的表要寫入的欄位數:{1}不相等. 請檢查您的配置並作出修改. +multitablewritertask.6=通過規則計算出來的tableName查找對應的db不存在,tableName={0}, 請檢查您配置的規則. +multitablewritertask.7=通過規則計算出來的db和table不存在,算出的dbName={0},tableName={1}, 請檢查您配置的規則. +multitablewritertask.8=通過規則計算出來的db不存在,算出的dbName={0}, 請檢查您配置的規則. +multitablewritertask.9=通過規則計算出來的dbName[{0}], 存在多張分表,請配置您的分表規則. +multitablewritertask.10=遇到OB致命異常,回滾此次寫入, 休眠 5分鐘,SQLState:{0} +multitablewritertask.11=遇到OB可恢復異常,回滾此次寫入, 休眠 1分鐘,SQLState:{0} +multitablewritertask.12=遇到OB異常,回滾此次寫入, 休眠 1秒,採用逐條寫入提交,SQLState:{0} +multitablewritertask.13=遇到OB異常,回滾此次寫入, 採用逐條寫入提交,SQLState:{0} +multitablewritertask.14=寫入表[{0}]失敗,休眠[{1}]毫秒,數據:{2} +multitablewritertask.15=寫入表[{0}]存在髒數據,record={1}, 寫入異常為: + + +singletablewritertask.1=遇到OB致命異常,回滾此次寫入, 休眠 5分鐘,SQLState:{0} +singletablewritertask.2=遇到OB可恢復異常,回滾此次寫入, 休眠 1分鐘,SQLState:{0} +singletablewritertask.3=遇到OB異常,回滾此次寫入, 休眠 1秒,採用逐條寫入提交,SQLState:{0} +singletablewritertask.4=遇到OB異常,回滾此次寫入, 採用逐條寫入提交,SQLState:{0} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/MultiVersionWriteTask.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/MultiVersionWriteTask.java new file mode 100644 index 0000000000..4e400b060c --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/MultiVersionWriteTask.java @@ -0,0 +1,12 @@ +package com.alibaba.datax.plugin.writer.obhbasewriter.task; + +import com.alibaba.datax.common.util.Configuration; + +/** + * TODO(yuez)升级hbase api之后再补充暂时用不到 + */ +public class MultiVersionWriteTask extends ObHBaseWriteTask{ + public MultiVersionWriteTask(Configuration configuration) throws Exception { + super(configuration); + } +} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/NormalWriteTask.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/NormalWriteTask.java new file mode 100644 index 0000000000..3113c022ce --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/NormalWriteTask.java @@ -0,0 +1,12 @@ +package com.alibaba.datax.plugin.writer.obhbasewriter.task; + +import com.alibaba.datax.common.util.Configuration; + +/** + * TODO(yuez) 升级hbase api之后再补充暂时用不到 + */ +public class NormalWriteTask extends ObHBaseWriteTask{ + public NormalWriteTask(Configuration configuration) throws Exception { + super(configuration); + } +} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/ObHBaseWriteTask.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/ObHBaseWriteTask.java new file mode 100644 index 0000000000..d424f6eac2 --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/ObHBaseWriteTask.java @@ -0,0 +1,317 @@ +package com.alibaba.datax.plugin.writer.obhbasewriter.task; + +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.plugin.TaskPluginCollector; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.common.util.MessageSource; +import com.alibaba.datax.plugin.rdbms.reader.Key; +import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; +import com.alibaba.datax.plugin.rdbms.writer.CommonRdbmsWriter; +import com.alibaba.datax.plugin.writer.obhbasewriter.Config; +import com.alibaba.datax.plugin.writer.obhbasewriter.ConfigKey; +import com.alibaba.datax.plugin.writer.obhbasewriter.Constant; +import com.alibaba.datax.plugin.writer.obhbasewriter.NullModeType; +import com.alibaba.datax.plugin.writer.obhbasewriter.ObHTableInfo; +import com.alibaba.datax.plugin.writer.obhbasewriter.ext.ServerConnectInfo; +import com.google.common.collect.Lists; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ObHBaseWriteTask extends CommonRdbmsWriter.Task { + private final static MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(ObHBaseWriteTask.class); + private final static Logger LOG = LoggerFactory.getLogger(ObHBaseWriteTask.class); + + public NullModeType nullMode = null; + private int maxRetryCount; + + public List columns; + public List rowkeyColumn; + public Configuration versionColumn; + + public String hbaseTableName; + public String encoding; + public Boolean walFlag; + + String configUrl; + String dbName; + String ip; + String port; + + String fullUserName; + boolean usdOdpMode; + String sysUsername; + String sysPassword; + private ObHTableInfo obHTableInfo; + + private ConcurrentTableWriter concurrentWriter; + private boolean allTaskInQueue = false; + private long startTime = 0; + private String threadName = Thread.currentThread().getName(); + + private Lock lock = new ReentrantLock(); + private Condition condition = lock.newCondition(); + + public ObHBaseWriteTask(Configuration configuration) { + super(DataBaseType.MySql); + init(configuration); + } + + @Override + public void init(com.alibaba.datax.common.util.Configuration configuration) { + this.obHTableInfo = new ObHTableInfo(configuration); + this.hbaseTableName = configuration.getString(ConfigKey.TABLE); + this.columns = configuration.getListConfiguration(ConfigKey.COLUMN); + this.rowkeyColumn = configuration.getListConfiguration(ConfigKey.ROWKEY_COLUMN); + this.versionColumn = configuration.getConfiguration(ConfigKey.VERSION_COLUMN); + this.encoding = configuration.getString(ConfigKey.ENCODING, Constant.DEFAULT_ENCODING); + this.nullMode = NullModeType.getByTypeName(configuration.getString(ConfigKey.NULL_MODE, Constant.DEFAULT_NULL_MODE)); +// this.memstoreThreshold = configuration.getDouble(Config.MEMSTORE_THRESHOLD, Config.DEFAULT_MEMSTORE_THRESHOLD); + this.walFlag = configuration.getBool(ConfigKey.WAL_FLAG, true); + this.maxRetryCount = configuration.getInt(ConfigKey.MAX_RETRY_COUNT, 3); + + // default 1000 rows are committed together + this.batchSize = com.alibaba.datax.plugin.rdbms.writer.Constant.DEFAULT_BATCH_SIZE; + this.batchByteSize = com.alibaba.datax.plugin.rdbms.writer.Constant.DEFAULT_BATCH_BYTE_SIZE; + + this.configUrl = configuration.getString(ConfigKey.OBCONFIG_URL); + this.jdbcUrl = configuration.getString(ConfigKey.JDBC_URL); + this.username = configuration.getString(Key.USERNAME); + this.password = configuration.getString(Key.PASSWORD); + this.dbName = configuration.getString(Key.DBNAME); + this.usdOdpMode = configuration.getBool(ConfigKey.USE_ODP_MODE); + + ServerConnectInfo connectInfo = new ServerConnectInfo(jdbcUrl, username, password); + String clusterName = connectInfo.clusterName; + this.fullUserName = connectInfo.getFullUserName(); + final String[] ipPort = connectInfo.ipPort.split(":"); + if (usdOdpMode) { + this.ip = ipPort[0]; + this.port = ipPort[1]; + } else { + this.sysUsername = configuration.getString(ConfigKey.OB_SYS_USER); + this.sysPassword = configuration.getString(ConfigKey.OB_SYS_PASSWORD); + connectInfo.setSysUser(sysUsername); + connectInfo.setSysPass(sysPassword); + if (!configUrl.contains("ObRegion")) { + if (configUrl.contains("?")) { + configUrl += "&ObRegion=" + clusterName; + } else { + configUrl += "?ObRegion=" + clusterName; + } + } + if (!configUrl.contains("database")) { + configUrl += "&database=" + dbName; + } + } + if (null == concurrentWriter) { + concurrentWriter = new ConcurrentTableWriter(configuration, connectInfo); + allTaskInQueue = false; + } + } + + @Override + public void prepare(Configuration configuration) { + concurrentWriter.start(); + } + + @Override + public void startWrite(RecordReceiver recordReceiver, Configuration configuration, TaskPluginCollector taskPluginCollector) { + this.taskPluginCollector = taskPluginCollector; + int recordCount = 0; + int bufferBytes = 0; + List records = new ArrayList<>(); + try { + Record record; + while ((record = recordReceiver.getFromReader()) != null) { + recordCount++; + bufferBytes += record.getMemorySize(); + records.add(record); + // 按照指定的批大小进行批量写入 + if (records.size() >= batchSize || bufferBytes >= batchByteSize) { + concurrentWriter.addBatchRecords(Lists.newArrayList(records)); + records.clear(); + bufferBytes = 0; + } + } + + if (!records.isEmpty()) { + concurrentWriter.addBatchRecords(records); + } + } catch (Throwable e) { + LOG.warn("startWrite error unexpected ", e); + throw DataXException.asDataXException(DBUtilErrorCode.WRITE_DATA_ERROR, e); + } + LOG.info(recordCount + " rows received."); + waitTaskFinish(); + } + + public void waitTaskFinish() { + this.allTaskInQueue = true; + LOG.info("ConcurrentTableWriter has put all task in queue, queueSize = {}, total = {}, finished = {}", + concurrentWriter.getTaskQueueSize(), + concurrentWriter.getTotalTaskCount(), + concurrentWriter.getFinishTaskCount()); + + lock.lock(); + try { + while (!concurrentWriter.checkFinish()) { + condition.await(50, TimeUnit.MILLISECONDS); + // print statistic + LOG.debug("Statistic total task {}, finished {}, queue Size {}", + concurrentWriter.getTotalTaskCount(), + concurrentWriter.getFinishTaskCount(), + concurrentWriter.getTaskQueueSize()); + concurrentWriter.printStatistics(); + } + } catch (InterruptedException e) { + LOG.warn("Concurrent table writer wait task finish interrupt"); + } finally { + lock.unlock(); + } + LOG.debug("wait all InsertTask finished ..."); + } + + public boolean isFinished() { + return allTaskInQueue && concurrentWriter.checkFinish(); + } + + public void singalTaskFinish() { + lock.lock(); + try { + condition.signal(); + } finally { + lock.unlock(); + } + } + + public void collectDirtyRecord(Record record, Throwable throwable) { + this.taskPluginCollector.collectDirtyRecord(record, throwable); + } + + @Override + public void post(Configuration configuration) { + + } + + @Override + public void destroy(Configuration configuration) { + if (concurrentWriter != null) { + concurrentWriter.destory(); + } + super.destroy(configuration); + } + + public class ConcurrentTableWriter { + private BlockingQueue> queue; + private List putTasks; + private Configuration config; + private AtomicLong totalTaskCount; + private AtomicLong finishTaskCount; + private ServerConnectInfo connectInfo; + private ExecutorService executorService; + private final int threadCount; + + public ConcurrentTableWriter(Configuration config, ServerConnectInfo connectInfo) { + this.threadCount = config.getInt(Config.WRITER_THREAD_COUNT, Config.DEFAULT_WRITER_THREAD_COUNT); + this.queue = new LinkedBlockingQueue>(threadCount << 1); + this.putTasks = new ArrayList(threadCount); + this.config = config; + this.totalTaskCount = new AtomicLong(0); + this.finishTaskCount = new AtomicLong(0); + this.executorService = Executors.newFixedThreadPool(threadCount); + this.connectInfo = connectInfo; + } + + public long getTotalTaskCount() { + return totalTaskCount.get(); + } + + public long getFinishTaskCount() { + return finishTaskCount.get(); + } + + public int getTaskQueueSize() { + return queue.size(); + } + + public void increFinishCount() { + finishTaskCount.incrementAndGet(); + } + + // should check after put all the task in the queue + public boolean checkFinish() { + long finishCount = finishTaskCount.get(); + long totalCount = totalTaskCount.get(); + return finishCount == totalCount; + } + + public synchronized void start() { + for (int i = 0; i < threadCount; ++i) { + LOG.info("start {} insert task.", (i + 1)); + PutTask putTask = new PutTask(threadName, queue, config, connectInfo, obHTableInfo, ObHBaseWriteTask.this); + putTask.setWriter(this); + putTasks.add(putTask); + } + for (PutTask task : putTasks) { + executorService.execute(task); + } + } + + public void printStatistics() { + long insertTotalCost = 0; + long insertTotalCount = 0; + for (PutTask task : putTasks) { + insertTotalCost += task.getTotalCost(); + insertTotalCount += task.getPutCount(); + } + long avgCost = 0; + if (insertTotalCount != 0) { + avgCost = insertTotalCost / insertTotalCount; + } + ObHBaseWriteTask.LOG.debug("Put {} times, totalCost {} ms, average {} ms", + insertTotalCount, insertTotalCost, avgCost); + } + + public void addBatchRecords(final List records) throws InterruptedException { + boolean isSucc = false; + while (!isSucc) { + isSucc = queue.offer(records, 5, TimeUnit.MILLISECONDS); + } + totalTaskCount.incrementAndGet(); + } + + public synchronized void destory() { + if (putTasks != null) { + for (PutTask task : putTasks) { + task.setStop(); + task.destroy(); + } + } + destroyExecutor(); + } + + private void destroyExecutor() { + if (executorService != null && !executorService.isShutdown()) { + executorService.shutdown(); + try { + executorService.awaitTermination(0L, TimeUnit.SECONDS); + } catch (InterruptedException var2) { + } + } + } + } +} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/PutTask.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/PutTask.java new file mode 100644 index 0000000000..768772c0ab --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/PutTask.java @@ -0,0 +1,325 @@ +package com.alibaba.datax.plugin.writer.obhbasewriter.task; + +import com.alibaba.datax.common.element.DoubleColumn; +import com.alibaba.datax.common.element.LongColumn; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.common.util.MessageSource; +import com.alibaba.datax.plugin.writer.obhbasewriter.ColumnType; +import com.alibaba.datax.plugin.writer.obhbasewriter.Config; +import com.alibaba.datax.plugin.writer.obhbasewriter.ConfigKey; +import com.alibaba.datax.plugin.writer.obhbasewriter.Hbase094xWriterErrorCode; +import com.alibaba.datax.plugin.writer.obhbasewriter.ObHTableInfo; +import com.alibaba.datax.plugin.writer.obhbasewriter.ext.ObHbaseTableHolder; +import com.alibaba.datax.plugin.writer.obhbasewriter.ext.ServerConnectInfo; +import com.alipay.oceanbase.hbase.constants.OHConstants; +import com.alipay.oceanbase.rpc.property.Property; + +import com.google.common.base.Stopwatch; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Queue; +import java.util.concurrent.TimeUnit; +import org.apache.commons.lang3.tuple.Triple; +import org.apache.hadoop.hbase.client.HTableInterface; +import org.apache.hadoop.hbase.client.Put; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static com.alibaba.datax.plugin.writer.obhbasewriter.ConfigKey.OBHBASE_HTABLE_CLIENT_WRITE_BUFFER; +import static com.alibaba.datax.plugin.writer.obhbasewriter.ConfigKey.OBHBASE_HTABLE_PUT_WRITE_BUFFER_CHECK; +import static com.alibaba.datax.plugin.writer.obhbasewriter.ConfigKey.TABLE_CLIENT_RPC_EXECUTE_TIMEOUT; +import static com.alibaba.datax.plugin.writer.obhbasewriter.ConfigKey.WRITE_BUFFER_HIGH_MARK; +import static com.alibaba.datax.plugin.writer.obhbasewriter.ConfigKey.WRITE_BUFFER_LOW_MARK; +import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.DEFAULT_HBASE_HTABLE_CLIENT_WRITE_BUFFER; +import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.DEFAULT_HBASE_HTABLE_PUT_WRITE_BUFFER_CHECK; +import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.DEFAULT_NETTY_BUFFER_HIGH_WATERMARK; +import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.DEFAULT_NETTY_BUFFER_LOW_WATERMARK; +import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.DEFAULT_RPC_EXECUTE_TIMEOUT; +import static com.alibaba.datax.plugin.writer.obhbasewriter.util.ObHbaseWriterUtils.getColumnByte; +import static com.alibaba.datax.plugin.writer.obhbasewriter.util.ObHbaseWriterUtils.getRowkey; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_HTABLE_CLIENT_WRITE_BUFFER; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_HTABLE_PUT_WRITE_BUFFER_CHECK; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_DATABASE; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_FULL_USER_NAME; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_PARAM_URL; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_PASSWORD; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_SYS_USER_NAME; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_SYS_PASSWORD; + +public class PutTask implements Runnable { + + private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(PutTask.class); + + private static final Logger LOG = LoggerFactory.getLogger(PutTask.class); + + private ObHBaseWriteTask writerTask; + private ObHBaseWriteTask.ConcurrentTableWriter writer; + + private long totalCost = 0; + private long putCount = 0; + private boolean isStop; + + private ObHTableInfo obHTableInfo; + private final Configuration versionColumn; + // 失败重试次数 + private final int failTryCount; + + private String parentThreadName; + private Queue> queue; + private Configuration config; + private ServerConnectInfo connInfo; + + private ObHbaseTableHolder tableHolder; + + private final SimpleDateFormat df_second = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + private final SimpleDateFormat df_ms = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss SSS"); + + public PutTask(String parentThreadName, Queue> recordsQueue, Configuration config, ServerConnectInfo connectInfo, ObHTableInfo obHTableInfo, ObHBaseWriteTask writerTask) { + this.parentThreadName = parentThreadName; + this.queue = recordsQueue; + this.config = config; + this.connInfo = connectInfo; + this.obHTableInfo = obHTableInfo; + this.writerTask = writerTask; + this.versionColumn = config.getConfiguration(ConfigKey.VERSION_COLUMN); + this.failTryCount = config.getInt(Config.FAIL_TRY_COUNT, Config.DEFAULT_FAIL_TRY_COUNT); + this.isStop = false; + initTableHolder(); + } + + private void initTableHolder() { + try { + org.apache.hadoop.conf.Configuration c = new org.apache.hadoop.conf.Configuration(); + c.set(HBASE_OCEANBASE_FULL_USER_NAME, writerTask.fullUserName); + c.set(HBASE_OCEANBASE_PASSWORD, this.connInfo.password); + c.set(HBASE_OCEANBASE_DATABASE, writerTask.dbName); + // obkv-table-client is needed the code below + if (writerTask.usdOdpMode) { + c.setBoolean(OHConstants.HBASE_OCEANBASE_ODP_MODE, true); + c.set(OHConstants.HBASE_OCEANBASE_ODP_ADDR, connInfo.host); + c.set(OHConstants.HBASE_OCEANBASE_ODP_PORT, connInfo.port); + LOG.info("sysUser and sysPassword is empty, build HTABLE in odp mode."); + } else { + c.set(HBASE_OCEANBASE_PARAM_URL, writerTask.configUrl); + c.set(HBASE_OCEANBASE_SYS_USER_NAME, this.connInfo.sysUser); + c.set(HBASE_OCEANBASE_SYS_PASSWORD, this.connInfo.sysPass); + LOG.info("sysUser and sysPassword is not empty, build HTABLE in sys mode."); + } + c.set(HBASE_HTABLE_PUT_WRITE_BUFFER_CHECK, config.getString(OBHBASE_HTABLE_PUT_WRITE_BUFFER_CHECK, DEFAULT_HBASE_HTABLE_PUT_WRITE_BUFFER_CHECK)); + c.set(HBASE_HTABLE_CLIENT_WRITE_BUFFER, config.getString(OBHBASE_HTABLE_CLIENT_WRITE_BUFFER, DEFAULT_HBASE_HTABLE_CLIENT_WRITE_BUFFER)); + + c.set(Property.RS_LIST_ACQUIRE_CONNECT_TIMEOUT.getKey(), "500"); + c.set(Property.RS_LIST_ACQUIRE_READ_TIMEOUT.getKey(), "5000"); + c.set(Property.RPC_EXECUTE_TIMEOUT.getKey(), config.getString(TABLE_CLIENT_RPC_EXECUTE_TIMEOUT, DEFAULT_RPC_EXECUTE_TIMEOUT)); + c.set(Property.NETTY_BUFFER_LOW_WATERMARK.getKey(), config.getString(WRITE_BUFFER_LOW_MARK, DEFAULT_NETTY_BUFFER_LOW_WATERMARK)); + c.set(Property.NETTY_BUFFER_HIGH_WATERMARK.getKey(), config.getString(WRITE_BUFFER_HIGH_MARK, DEFAULT_NETTY_BUFFER_HIGH_WATERMARK)); + this.tableHolder = new ObHbaseTableHolder(c, obHTableInfo.getTableName()); + } catch (Exception e) { + LOG.error("init table holder failed, reason: {}", e.getMessage()); + throw new IllegalStateException(e); + } + + } + + private void batchWrite(final List buffer) { + HTableInterface ohTable = null; + Stopwatch stopwatch = Stopwatch.createStarted(); + try { + ohTable = this.tableHolder.getOhTable(); + List puts = buildBatchPutList(buffer); + ohTable.put(puts); + } catch (Exception e) { + if (Objects.isNull(ohTable)) { + LOG.error("build obHTable: {} failed. reason: {}", obHTableInfo.getTableName(), e.getMessage()); + throw DataXException.asDataXException(Hbase094xWriterErrorCode.GET_HBASE_TABLE_ERROR, Hbase094xWriterErrorCode.GET_HBASE_TABLE_ERROR.getDescription()); + } + // + LOG.error("hbase batch error: " + e); + // 出错了之后对该出错的batch逐条重试 + for (Record record : buffer) { + writeOneRecord(ohTable, record); + } + } finally { + this.writer.increFinishCount(); + putCount++; + totalCost += stopwatch.elapsed(TimeUnit.MILLISECONDS); + try { + if (!Objects.isNull(ohTable)) { + ohTable.close(); + } + } catch (Exception e) { + LOG.warn("error in closing htable: {}. Reason: {}", obHTableInfo.getFullHbaseTableName(), e.getMessage()); + } + } + } + + private void writeOneRecord(HTableInterface ohTable, Record record) { + int retryCount = 0; + while (retryCount < this.failTryCount) { + try { + byte[] rowkey = getRowkey(record, obHTableInfo); + Put put = new Put(rowkey); // row key + boolean hasValidValue = buildPut(put, record); + + if (hasValidValue) { + ohTable.put(put); + } + break; + } catch (Exception e) { + retryCount++; + LOG.error("error in writing: " + e.getMessage() + ", retry count: " + retryCount); + if (retryCount == this.failTryCount) { + LOG.warn("ERROR : record {}", record); + this.writerTask.collectDirtyRecord(record, e); + } + } + } + } + + private List buildBatchPutList(List buffer) { + List puts = new ArrayList<>(); + for (Record record : buffer) { + byte[] rowkey = getRowkey(record, obHTableInfo); + Put put = new org.apache.hadoop.hbase.client.Put(rowkey); // row key + boolean hasValidValue = buildPut(put, record); + if (hasValidValue) { + puts.add(put); + } + } + return puts; + } + + private boolean buildPut(Put put, Record record) { + boolean hasValidValue = false; + long timestamp = buildTimestamp(record); + for (Map.Entry> columnInfo : obHTableInfo.getIndexColumnInfoMap().entrySet()) { + Integer index = columnInfo.getKey(); + if (index >= record.getColumnNumber()) { + throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, + MESSAGE_SOURCE.message("normaltask.2", record.getColumnNumber(), index)); + } + ColumnType columnType = columnInfo.getValue().getRight(); + String familyName = columnInfo.getValue().getLeft(); + String columnName = columnInfo.getValue().getMiddle(); + + byte[] value = getColumnByte(columnType, record.getColumn(index), obHTableInfo); + if (value != null) { + hasValidValue = true; + if (timestamp == -1) { + put.add(familyName.getBytes(), // family + columnName.getBytes(), // Q + value); // V + } else { + put.add(familyName.getBytes(), // family + columnName.getBytes(), // Q + timestamp, // timestamp/version + value); // V + } + } + } + + return hasValidValue; + } + + private long buildTimestamp(Record record) { + if (versionColumn == null) { + return -1; + } + + int index = versionColumn.getInt(ConfigKey.INDEX); + long timestamp; + if (index == -1) { + // user specified the constant as timestamp + timestamp = versionColumn.getLong(ConfigKey.VALUE); + if (timestamp < 0) { + throw DataXException.asDataXException(Hbase094xWriterErrorCode.CONSTRUCT_VERSION_ERROR, + MESSAGE_SOURCE.message("normaltask.4")); + } + } else { + // 指定列作为版本,long/doubleColumn直接record.aslong, 其它类型尝试用yyyy-MM-dd HH:mm:ss, + // yyyy-MM-dd HH:mm:ss SSS去format + if (index >= record.getColumnNumber()) { + throw DataXException.asDataXException(Hbase094xWriterErrorCode.CONSTRUCT_VERSION_ERROR, + MESSAGE_SOURCE.message("normaltask.5", record.getColumnNumber(), index)); + } + + if (record.getColumn(index).getRawData() == null) { + throw DataXException.asDataXException(Hbase094xWriterErrorCode.CONSTRUCT_VERSION_ERROR, + MESSAGE_SOURCE.message("normaltask.6")); + } + + if (record.getColumn(index) instanceof LongColumn || record.getColumn(index) instanceof DoubleColumn) { + timestamp = record.getColumn(index).asLong(); + } else { + Date date; + try { + date = df_ms.parse(record.getColumn(index).asString()); + } catch (ParseException e) { + try { + date = df_second.parse(record.getColumn(index).asString()); + } catch (ParseException e1) { + LOG.info(MESSAGE_SOURCE.message("normaltask.7", index)); + throw DataXException.asDataXException(Hbase094xWriterErrorCode.CONSTRUCT_VERSION_ERROR, e1); + } + } + timestamp = date.getTime(); + } + } + + return timestamp; + } + + public void setStop() {isStop = true;} + + public long getTotalCost() {return totalCost;} + + public long getPutCount() {return putCount;} + + public void destroy() { + tableHolder.destroy(); + } + + void setWriterTask(ObHBaseWriteTask writerTask) { + this.writerTask = writerTask; + } + + void setWriter(ObHBaseWriteTask.ConcurrentTableWriter writer) { + this.writer = writer; + } + + @Override + public void run() { + String currentThreadName = String.format("%s-putTask-%d", parentThreadName, Thread.currentThread().getId()); + Thread.currentThread().setName(currentThreadName); + LOG.debug("Task {} start to execute...", currentThreadName); + int sleepTimes = 0; + while (!isStop) { + try { + List records = queue.poll(); + if (null != records) { + batchWrite(records); + } else if (writerTask.isFinished()) { + writerTask.singalTaskFinish(); + LOG.debug("not more task, thread exist ..."); + break; + } else { + TimeUnit.MILLISECONDS.sleep(5); + sleepTimes++; + } + } catch (InterruptedException e) { + LOG.debug("TableWriter is interrupt"); + } catch (Exception e) { + LOG.warn("ERROR UNEXPECTED {}", e); + } + } + LOG.debug("Thread exist..."); + LOG.debug("sleep {} times, total sleep time: {}", sleepTimes, sleepTimes * 5); + } +} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/util/LocalStrings.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/util/LocalStrings.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/util/LocalStrings_en_US.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/util/LocalStrings_en_US.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/util/LocalStrings_ja_JP.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/util/LocalStrings_ja_JP.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/util/LocalStrings_zh_CN.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/util/LocalStrings_zh_CN.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/util/LocalStrings_zh_HK.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/util/LocalStrings_zh_HK.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/util/LocalStrings_zh_TW.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/util/LocalStrings_zh_TW.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/util/ObHbaseWriterUtils.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/util/ObHbaseWriterUtils.java new file mode 100644 index 0000000000..9ccd3ed84b --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/util/ObHbaseWriterUtils.java @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2021 OceanBase ob-loader-dumper is licensed under Mulan PSL v2. You can use this software according to + * the terms and conditions of the Mulan PSL v2. You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING + * BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. See the Mulan PSL v2 for more + * details. + */ +package com.alibaba.datax.plugin.writer.obhbasewriter.util; + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.MessageSource; +import com.alibaba.datax.plugin.writer.obhbasewriter.ColumnType; +import com.alibaba.datax.plugin.writer.obhbasewriter.Hbase094xWriterErrorCode; +import com.alibaba.datax.plugin.writer.obhbasewriter.ObHTableInfo; +import com.alibaba.datax.plugin.writer.obhbasewriter.task.PutTask; +import java.nio.charset.Charset; +import org.apache.commons.lang3.tuple.Triple; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.util.Bytes; + +/** + * @author cjyyz + * @date 2023/03/23 + * @since + */ +public class ObHbaseWriterUtils { + + private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(PutTask.class); + + public static byte[] getRowkey(Record record, ObHTableInfo obHTableInfo) { + byte[] rowkeyBuffer = {}; + for (Triple rowKeyElement : obHTableInfo.getRowKeyElementList()) { + Integer index = rowKeyElement.getLeft(); + ColumnType columnType = rowKeyElement.getRight(); + if (index == -1) { + String value = rowKeyElement.getMiddle(); + rowkeyBuffer = Bytes.add(rowkeyBuffer, getValueByte(columnType, value, obHTableInfo.getEncoding())); + } else { + if (index >= record.getColumnNumber()) { + throw DataXException.asDataXException(Hbase094xWriterErrorCode.CONSTRUCT_ROWKEY_ERROR, MESSAGE_SOURCE.message("normaltask.3", record.getColumnNumber(), index)); + } + byte[] value = getColumnByte(columnType, record.getColumn(index), obHTableInfo); + rowkeyBuffer = Bytes.add(rowkeyBuffer, value); + } + } + + return rowkeyBuffer; + } + + public static byte[] getColumnByte(ColumnType columnType, Column column, ObHTableInfo obHTableInfo) { + byte[] bytes; + if (column.getRawData() != null && !(columnType == ColumnType.STRING && column.asString().equals("null"))) { + switch (columnType) { + case INT: + bytes = Bytes.toBytes(column.asLong().intValue()); + break; + case LONG: + bytes = Bytes.toBytes(column.asLong()); + break; + case DOUBLE: + bytes = Bytes.toBytes(column.asDouble()); + break; + case FLOAT: + bytes = Bytes.toBytes(column.asDouble().floatValue()); + break; + case SHORT: + bytes = Bytes.toBytes(column.asLong().shortValue()); + break; + case BOOLEAN: + bytes = Bytes.toBytes(column.asBoolean()); + break; + case STRING: + bytes = getValueByte(columnType, column.asString(), obHTableInfo.getEncoding()); + break; + case BINARY: + bytes = Bytes.toBytesBinary(column.asString()); + break; + default: + throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("hbaseabstracttask.2", columnType)); + } + } else { + switch (obHTableInfo.getNullModeType()) { + case Skip: + bytes = null; + break; + case Empty: + bytes = HConstants.EMPTY_BYTE_ARRAY; + break; + default: + throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("hbaseabstracttask.3")); + } + } + return bytes; + } + + /** + * @param columnType + * @param value + * @return byte[] + */ + private static byte[] getValueByte(ColumnType columnType, String value, String encoding) { + byte[] bytes; + if (value != null) { + switch (columnType) { + case INT: + bytes = Bytes.toBytes(Integer.parseInt(value)); + break; + case LONG: + bytes = Bytes.toBytes(Long.parseLong(value)); + break; + case DOUBLE: + bytes = Bytes.toBytes(Double.parseDouble(value)); + break; + case FLOAT: + bytes = Bytes.toBytes(Float.parseFloat(value)); + break; + case SHORT: + bytes = Bytes.toBytes(Short.parseShort(value)); + break; + case BOOLEAN: + bytes = Bytes.toBytes(Boolean.parseBoolean(value)); + break; + case STRING: + bytes = value.getBytes(Charset.forName(encoding)); + break; + default: + throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("hbaseabstracttask.4", columnType)); + } + } else { + bytes = HConstants.EMPTY_BYTE_ARRAY; + } + return bytes; + } +} \ No newline at end of file diff --git a/obhbasewriter/src/main/resources/plugin.json b/obhbasewriter/src/main/resources/plugin.json new file mode 100644 index 0000000000..6ea96196e4 --- /dev/null +++ b/obhbasewriter/src/main/resources/plugin.json @@ -0,0 +1,6 @@ +{ + "name": "obhbasewriter", + "class": "com.alibaba.datax.plugin.writer.obhbasewriter.ObHbaseWriter", + "description": "适用于: 生产环境. 原理: TODO", + "developer": "alibaba" +} diff --git a/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/ExecutorTemplate.java b/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/ExecutorTemplate.java new file mode 100644 index 0000000000..a027ed7f35 --- /dev/null +++ b/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/ExecutorTemplate.java @@ -0,0 +1,287 @@ +package com.alibaba.datax.plugin.reader.oceanbasev10reader.util; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorCompletionService; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +public class ExecutorTemplate { + + /** + * The default thread pool size. Set as the number of available processors by default. + */ + public static int DEFAULT_POOL_SIZE = Runtime.getRuntime().availableProcessors(); + + /** + * Indicate whether the executor closes automatically. + */ + private final boolean autoClose; + + /** + * + */ + private final List> futures; + + /** + * + */ + private final ExecutorService internalExecutor; + + private final ExecutorCompletionService completionService; + + /** + * Set pool size for ExecutorTemplate. + */ + public static void setPoolSize(int size) { + DEFAULT_POOL_SIZE = size; + } + + /** + * Default: 1024 AutoClose: true + * + * @param poolName + */ + public ExecutorTemplate(String poolName) { + this(defaultExecutor(poolName), true); + } + + /** + * Default: 1024 AutoClose: true + * + * @param poolName + */ + public ExecutorTemplate(String poolName, int poolSize) { + this(defaultExecutor(poolName, poolSize), true); + } + + public ExecutorTemplate(String poolName, int poolSize, boolean autoClose) { + this(defaultExecutor(poolName, poolSize), autoClose); + } + + /** + * Default: 1024 + * + * @param poolName + * @param autoClose + */ + public ExecutorTemplate(String poolName, boolean autoClose) { + this(defaultExecutor(poolName), autoClose); + } + + /** + * Default: 1024 AutoClose: true + * + * @param executor + */ + public ExecutorTemplate(ExecutorService executor) { + this(executor, true); + } + + /** + * @param executor + */ + public ExecutorTemplate(ExecutorService executor, boolean autoClose) { + this.autoClose = autoClose; + this.internalExecutor = executor; + this.completionService = new ExecutorCompletionService<>(executor); + this.futures = Collections.synchronizedList(new ArrayList<>()); + } + + /** + * @param poolName + * @return ExecutorService + */ + public static ExecutorService defaultExecutor(String poolName) { + return defaultExecutor(100000, poolName, DEFAULT_POOL_SIZE); + } + + /** + * @param poolName + * @param poolSize + * @return ExecutorService + */ + public static ExecutorService defaultExecutor(String poolName, int poolSize) { + return defaultExecutor(100000, poolName, poolSize); + } + + /** + * @param capacity + * @param poolName + * @return ExecutorService + */ + public static ExecutorService defaultExecutor(int capacity, String poolName, int poolSize) { + return new ThreadPoolExecutor(poolSize, poolSize, 30, TimeUnit.SECONDS, /* */ + new ArrayBlockingQueue<>(capacity), new NamedThreadFactory(poolName)); + } + + /** + * Submit a callable task + * + * @param task + */ + public void submit(Callable task) { + Future f = this.completionService.submit(task); + futures.add(f); + check(f); + } + + /** + * Submit a runnable task + * + * @param task + */ + public void submit(Runnable task) { + Future f = this.completionService.submit(task, null); + futures.add(f); + check(f); + } + + /** + * Wait all the task run finished, and get all the results. + * + * @return List + */ + public List waitForResult() { + try { + int index = 0; + Throwable ex = null; + List result = new ArrayList(); + while (index < futures.size()) { + try { + Future f = this.completionService.take(); + result.add(f.get()); + } catch (Throwable e) { + ex = getRootCause(e); + break; + } + index++; + } + if (ex != null) { + cancelAll(); + throw new RuntimeException(ex); + } else { + return result; + } + } finally { + clearFutures(); + if (autoClose) { + destroyExecutor(); + } + } + } + + /** + * + */ + public void cancelAll() { + for (Future f : futures) { + if (!f.isDone() && !f.isCancelled()) { + f.cancel(false); + } + } + } + + /** + * + */ + public void clearFutures() { + this.futures.clear(); + } + + /** + * + */ + public void destroyExecutor() { + if (internalExecutor != null && !internalExecutor.isShutdown()) { + this.internalExecutor.shutdown(); + try { + this.internalExecutor.awaitTermination(0, TimeUnit.SECONDS); + } catch (InterruptedException e) { + } + } + } + + /** + * Fast check the future + * + * @param f + */ + private void check(Future f) { + if (f != null && f.isDone()) { + try { + f.get(); + } catch (Throwable e) { + cancelAll(); + throw new RuntimeException(e); + } + } + } + + /** + * @param throwable + * @return Throwable + */ + private Throwable getRootCause(Throwable throwable) { + final Throwable holder = throwable; + final List list = new ArrayList<>(); + while (throwable != null && !list.contains(throwable)) { + list.add(throwable); + throwable = throwable.getCause(); + } + return list.size() < 2 ? holder : list.get(list.size() - 1); + } + + /** + * An internal named thread factory + */ + static class NamedThreadFactory implements ThreadFactory { + + /** + * + */ + private final boolean daemon; + + /** + * + */ + private final String name; + + /** + * + */ + private final AtomicInteger seq = new AtomicInteger(0); + + /** + * @param name + */ + public NamedThreadFactory(String name) { + this(name, false); + } + + /** + * @param name + * @param daemon + */ + public NamedThreadFactory(String name, boolean daemon) { + this.name = name; + this.daemon = daemon; + } + + @Override + public Thread newThread(Runnable r) { + Thread t = new Thread(r); + t.setDaemon(daemon); + t.setPriority(Thread.NORM_PRIORITY); + t.setName((name + seq.incrementAndGet())); + return t; + } + } +} \ No newline at end of file diff --git a/oceanbasev10writer/pom.xml b/oceanbasev10writer/pom.xml index 11997a1e3f..4f9cbf52ea 100644 --- a/oceanbasev10writer/pom.xml +++ b/oceanbasev10writer/pom.xml @@ -28,6 +28,16 @@ com.alibaba.datax plugin-rdbms-util ${datax-project-version} + + + guava + com.google.guava + + + com.alibaba + druid + + org.slf4j @@ -43,13 +53,11 @@ 4.0.4.RELEASE test - com.alipay.oceanbase @@ -64,6 +72,19 @@ + + + com.oceanbase + oceanbase-client + 2.4.11 + + + com.google.guava + guava + + + + com.oceanbase shade-ob-partition-calculator @@ -72,8 +93,13 @@ ${pom.basedir}/src/main/libs/shade-ob-partition-calculator-1.0-SNAPSHOT.jar - + com.google.guava + guava + 27.0-jre + + + log4j log4j 1.2.16 @@ -89,6 +115,152 @@ 4.11 test + + com.oceanbase + obkv-table-client + 1.2.6 + + + com.alibaba + fastjson + + + + org.slf4j + slf4j-api + + + + com.oceanbase + oceanbase-client + + + + com.google.guava + guava + + + commons-lang + commons-lang + + + com.alipay.sofa.common + sofa-common-tools + + + + io.netty + netty-codec-dns + + + io.netty + netty-codec-http + + + io.netty + netty-codec-http2 + + + io.netty + netty-codec-haproxy + + + io.netty + netty-codec-mqtt + + + io.netty + netty-codec-memcache + + + io.netty + netty-codec-redis + + + io.netty + netty-codec-smtp + + + io.netty + netty-codec-socks + + + io.netty + netty-codec-stomp + + + io.netty + netty-codec-xml + + + + io.netty + netty-handler-proxy + + + io.netty + netty-handler-ssl-ocsp + + + + io.netty + netty-resolver-dns + + + io.netty + netty-resolver-dns-classes-macos + + + io.netty + netty-resolver-dns-native-macos + + + + io.netty + netty-transport-rxtx + + + io.netty + netty-transport-udt + + + io.netty + netty-transport-sctp + + + + + com.alipay.sofa.common + sofa-common-tools + 1.3.11 + + + + org.slf4j + slf4j-api + + + + com.google.guava + guava + + + + + com.alibaba + fastjson + 1.2.83 + + + commons-lang + commons-lang + 2.6 + + + mysql + mysql-connector-java + ${mysql.driver.version} + diff --git a/package.xml b/package.xml index e51c11e1d5..624109f799 100644 --- a/package.xml +++ b/package.xml @@ -39,6 +39,13 @@ datax + + obhbasereader/target/datax/ + + **/*.* + + datax + drdsreader/target/datax/ @@ -476,6 +483,13 @@ datax + + obhbasewriter/target/datax/ + + **/*.* + + datax + gdbwriter/target/datax/ diff --git a/plugin-rdbms-util/pom.xml b/plugin-rdbms-util/pom.xml index c49f64af16..6dc69e06c6 100755 --- a/plugin-rdbms-util/pom.xml +++ b/plugin-rdbms-util/pom.xml @@ -33,6 +33,17 @@ ${mysql.driver.version} test + + com.oceanbase + oceanbase-client + 2.4.11 + + + com.google.guava + guava + + + org.slf4j slf4j-api diff --git a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/reader/util/ObVersion.java b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/reader/util/ObVersion.java index 0eb34feb0d..da078df924 100644 --- a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/reader/util/ObVersion.java +++ b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/reader/util/ObVersion.java @@ -16,6 +16,8 @@ public class ObVersion implements Comparable { private int patchNumber; public static final ObVersion V2276 = valueOf("2.2.76"); + public static final ObVersion V2252 = valueOf("2.2.52"); + public static final ObVersion V3 = valueOf("3.0.0.0"); public static final ObVersion V4000 = valueOf("4.0.0.0"); private static final ObVersion DEFAULT_VERSION = diff --git a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/SplitedSlice.java b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/SplitedSlice.java new file mode 100644 index 0000000000..d8de129490 --- /dev/null +++ b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/SplitedSlice.java @@ -0,0 +1,37 @@ +package com.alibaba.datax.plugin.rdbms.util; + +public class SplitedSlice { + private String begin; + private String end; + private String range; + + public SplitedSlice(String begin, String end, String range) { + this.begin = begin; + this.end = end; + this.range = range; + } + + public String getBegin() { + return begin; + } + + public void setBegin(String begin) { + this.begin = begin; + } + + public String getEnd() { + return end; + } + + public void setEnd(String end) { + this.end = end; + } + + public String getRange() { + return range; + } + + public void setRange(String range) { + this.range = range; + } +} diff --git a/pom.xml b/pom.xml index eeb4bfaffe..c7f43f1725 100644 --- a/pom.xml +++ b/pom.xml @@ -55,6 +55,7 @@ oraclereader cassandrareader oceanbasev10reader + obhbasereader rdbmsreader odpsreader @@ -93,6 +94,7 @@ kingbaseeswriter adswriter oceanbasev10writer + obhbasewriter adbpgwriter hologresjdbcwriter rdbmswriter diff --git a/selectdbwriter/doc/stream2selectdb.json b/selectdbwriter/doc/stream2selectdb.json index d5e14c4884..0c5be6dc0a 100644 --- a/selectdbwriter/doc/stream2selectdb.json +++ b/selectdbwriter/doc/stream2selectdb.json @@ -1,92 +1,61 @@ { - "core":{ - "transport":{ - "channel":{ - "speed":{ - "byte":10485760 + "core": { + "transport": { + "channel": { + "speed": { + "byte": 10485760 } } } }, - "job":{ - "content":[ + "job": { + "content": [ { - "reader":{ - "name":"streamreader", - "parameter":{ - "column":[ - { - "type":"string", - "value":"DataX" - }, - { - "type":"int", - "value":19890604 - }, - { - "type":"date", - "value":"1989-06-04 00:00:00" - }, - { - "type":"bool", - "value":true - }, - { - "type":"string", - "value":"test" - } - ], - "sliceRecordCount":1000000 - } - }, - "writer":{ - "name":"selectdbwriter", - "parameter":{ - "loadUrl":[ + "reader": {}, + "writer": { + "name": "selectdbwriter", + "parameter": { + "loadUrl": [ "xxx:35871" ], - "loadProps":{ - "file.type":"json", - "file.strip_outer_array":"true" + "loadProps": { + "file.type": "json", + "file.strip_outer_array": "true" }, - "database":"db1", - "column":[ + "database": "db1", + "column": [ "k1", "k2", "k3", "k4", "k5" ], - "username":"admin", - "password":"SelectDB2022", - "postSql":[ - - ], - "preSql":[ - - ], - "connection":[ + "username": "admin", + "password": "SelectDB2022", + "postSql": [], + "preSql": [], + "connection": [ { - "jdbcUrl":"jdbc:mysql://xxx:32386/cl_test", - "table":[ + "jdbcUrl": "jdbc:mysql://xxx:32386/cl_test", + "table": [ "test_selectdb" ], - "selectedDatabase":"cl_test" + "selectedDatabase": "cl_test" } ], - "maxBatchRows":200000, - "batchSize":53687091200 + "maxBatchRows": 200000, + "batchSize": 53687091200 } } } ], - "setting":{ - "errorLimit":{ - "percentage":0.02, - "record":0 + "setting": { + "errorLimit": { + "percentage": 0.02, + "record": 0 }, - "speed":{ - "byte":10485760 + "speed": { + "byte": 10485760 } } } diff --git a/transformer/doc/transformer.md b/transformer/doc/transformer.md index 0a00dbaa9c..a9da83a4ed 100644 --- a/transformer/doc/transformer.md +++ b/transformer/doc/transformer.md @@ -47,7 +47,7 @@ dx_replace(1,"5","10","****") column 1的value为“dataxTest”=>"datax****" 4. dx_filter (关联filter暂不支持,即多个字段的联合判断,函参太过复杂,用户难以使用。) * 参数: * 第一个参数:字段编号,对应record中第几个字段。 - * 第二个参数:运算符,支持一下运算符:like, not like, >, =, <, >=, !=, <= + * 第二个参数:运算符,支持以下运算符:like, not like, >, =, <, >=, !=, <= * 第三个参数:正则表达式(java正则表达式)、值。 * 返回: * 如果匹配正则表达式,返回Null,表示过滤该行。不匹配表达式时,表示保留该行。(注意是该行)。对于>=<都是对字段直接compare的结果. @@ -145,11 +145,11 @@ String code3 = "Column column = record.getColumn(1);\n" + "type": "string" }, { - "value": 19890604, + "value": 1724154616370, "type": "long" }, { - "value": "1989-06-04 00:00:00", + "value": "2024-01-01 00:00:00", "type": "date" }, { @@ -157,11 +157,11 @@ String code3 = "Column column = record.getColumn(1);\n" + "type": "bool" }, { - "value": "test", + "value": "TestRawData", "type": "bytes" } ], - "sliceRecordCount": 100000 + "sliceRecordCount": 100 } }, "writer": { @@ -174,38 +174,44 @@ String code3 = "Column column = record.getColumn(1);\n" + "transformer": [ { "name": "dx_substr", - "parameter": - { - "columnIndex":5, - "paras":["1","3"] - } + "parameter": { + "columnIndex": 5, + "paras": [ + "1", + "3" + ] + } }, { "name": "dx_replace", - "parameter": - { - "columnIndex":4, - "paras":["3","4","****"] - } + "parameter": { + "columnIndex": 4, + "paras": [ + "3", + "4", + "****" + ] + } }, { "name": "dx_digest", - "parameter": - { - "columnIndex":3, - "paras":["md5", "toLowerCase"] - } + "parameter": { + "columnIndex": 3, + "paras": [ + "md5", + "toLowerCase" + ] + } }, { "name": "dx_groovy", - "parameter": - { - "code": "//groovy code//", - "extraPackage":[ - "import somePackage1;", - "import somePackage2;" - ] - } + "parameter": { + "code": "//groovy code//", + "extraPackage": [ + "import somePackage1;", + "import somePackage2;" + ] + } } ] }