diff --git a/README.md b/README.md index 89aeb87c04..0125bee8cd 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ DataX目前已经有了比较全面的插件体系,主流的RDBMS数据库、N |--------------|---------------------------|:---------:|:---------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:| | RDBMS 关系型数据库 | MySQL | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/mysqlreader/doc/mysqlreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/mysqlwriter/doc/mysqlwriter.md) | | | Oracle | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/oraclereader/doc/oraclereader.md) 、[写](https://github.com/alibaba/DataX/blob/master/oraclewriter/doc/oraclewriter.md) | -| | OceanBase | √ | √ | [读](https://open.oceanbase.com/docs/community/oceanbase-database/V3.1.0/use-datax-to-full-migration-data-to-oceanbase) 、[写](https://open.oceanbase.com/docs/community/oceanbase-database/V3.1.0/use-datax-to-full-migration-data-to-oceanbase) | +| | OceanBase | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/oceanbasev10reader/doc/oceanbasev10reader.md) 、[写](https://github.com/alibaba/DataX/blob/master/oceanbasev10writer/doc/oceanbasev10writer.md) | | | SQLServer | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/sqlserverreader/doc/sqlserverreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/sqlserverwriter/doc/sqlserverwriter.md) | | | PostgreSQL | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/postgresqlreader/doc/postgresqlreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/postgresqlwriter/doc/postgresqlwriter.md) | | | DRDS | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/drdsreader/doc/drdsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/drdswriter/doc/drdswriter.md) | @@ -108,7 +108,7 @@ DataX目前已经有了比较全面的插件体系,主流的RDBMS数据库、N # 重要版本更新说明 -DataX 后续计划月度迭代更新,也欢迎感兴趣的同学提交 Pull requests,月度更新内容会介绍介绍如下。 +DataX 后续计划月度迭代更新,也欢迎感兴趣的同学提交 Pull requests,月度更新内容如下。 - [datax_v202309](https://github.com/alibaba/DataX/releases/tag/datax_v202309) - 支持Phoenix 同步数据添加 where条件 diff --git a/adbpgwriter/src/main/doc/adbpgwriter.md b/adbpgwriter/src/main/doc/adbpgwriter.md index 6d3857bc8c..c03447d416 100644 --- a/adbpgwriter/src/main/doc/adbpgwriter.md +++ b/adbpgwriter/src/main/doc/adbpgwriter.md @@ -149,6 +149,7 @@ COPY命令将数据写入ADB PG数据库中。 注意:1、我们强烈不推荐你这样配置,因为当你目的表字段个数、类型等有改动时,你的任务可能运行不正确或者失败 2、此处 column 不能配置任何常量值 + 3、大写字段名,此处配置时,不需要拼接转义符号:\" * 必选:是 @@ -229,4 +230,4 @@ create table schematest.test_datax ( #### 4.2.2 性能测试小结 1. `channel数对性能影响很大` -2. `通常不建议写入数据库时,通道个数 > 32` \ No newline at end of file +2. `通常不建议写入数据库时,通道个数 > 32` diff --git a/adbpgwriter/src/main/java/com/alibaba/datax/plugin/writer/adbpgwriter/copy/Adb4pgClientProxy.java b/adbpgwriter/src/main/java/com/alibaba/datax/plugin/writer/adbpgwriter/copy/Adb4pgClientProxy.java index 4998607683..d09f4e2b8a 100644 --- a/adbpgwriter/src/main/java/com/alibaba/datax/plugin/writer/adbpgwriter/copy/Adb4pgClientProxy.java +++ b/adbpgwriter/src/main/java/com/alibaba/datax/plugin/writer/adbpgwriter/copy/Adb4pgClientProxy.java @@ -56,7 +56,7 @@ public Adb4pgClientProxy(Configuration configuration,TaskPluginCollector taskPl int retryIntervalTime = configuration.getInt(Key.RETRY_INTERVAL_TIME, 1000); databaseConfig.setRetryIntervalTime(retryIntervalTime); - // 设置自动提交的SQL长度(单位Byte),默认为32KB,一般不建议设置 + // 设置自动提交的SQL长度(单位Byte),默认为10MB,一般不建议设置 int commitSize = configuration.getInt("commitSize", 10 * 1024 * 1024); databaseConfig.setCommitSize(commitSize); diff --git a/common/src/main/java/com/alibaba/datax/common/util/Configuration.java b/common/src/main/java/com/alibaba/datax/common/util/Configuration.java index c1194532a7..ef29320dd2 100755 --- a/common/src/main/java/com/alibaba/datax/common/util/Configuration.java +++ b/common/src/main/java/com/alibaba/datax/common/util/Configuration.java @@ -1047,7 +1047,7 @@ private void checkPath(final String path) { "系统编程错误, 该异常代表系统编程错误, 请联系DataX开发团队!."); } - for (final String each : StringUtils.split(".")) { + for (final String each : StringUtils.split(path, ".")) { if (StringUtils.isBlank(each)) { throw new IllegalArgumentException(String.format( "系统编程错误, 路径[%s]不合法, 路径层次之间不能出现空白字符 .", path)); diff --git a/core/src/main/job/job.json b/core/src/main/job/job.json index cc35387778..ad5d4a85c5 100755 --- a/core/src/main/job/job.json +++ b/core/src/main/job/job.json @@ -2,11 +2,10 @@ "job": { "setting": { "speed": { - "channel":1 + "channel": 2 }, "errorLimit": { - "record": 0, - "percentage": 0.02 + "record": 0 } }, "content": [ @@ -14,17 +13,17 @@ "reader": { "name": "streamreader", "parameter": { - "column" : [ + "column": [ { "value": "DataX", "type": "string" }, { - "value": 19890604, + "value": 1724154616370, "type": "long" }, { - "value": "1989-06-04 00:00:00", + "value": "2024-01-01 00:00:00", "type": "date" }, { @@ -32,11 +31,11 @@ "type": "bool" }, { - "value": "test", + "value": "TestRawData", "type": "bytes" } ], - "sliceRecordCount": 100000 + "sliceRecordCount": 100 } }, "writer": { @@ -49,4 +48,4 @@ } ] } -} +} \ No newline at end of file diff --git a/doriswriter/doc/doriswriter.md b/doriswriter/doc/doriswriter.md index 58a688b8dc..2070113b4d 100644 --- a/doriswriter/doc/doriswriter.md +++ b/doriswriter/doc/doriswriter.md @@ -36,8 +36,6 @@ DorisWriter 通过Doris原生支持Stream load方式导入数据, DorisWriter "name": "doriswriter", "parameter": { "loadUrl": ["172.16.0.13:8030"], - "loadProps": { - }, "column": ["emp_no", "birth_date", "first_name","last_name","gender","hire_date"], "username": "root", "password": "xxxxxx", @@ -178,4 +176,4 @@ DorisWriter 通过Doris原生支持Stream load方式导入数据, DorisWriter } ``` -更多信息请参照 Doris 官网:[Stream load - Apache Doris](https://doris.apache.org/zh-CN/docs/data-operate/import/import-way/stream-load-manual) \ No newline at end of file +更多信息请参照 Doris 官网:[Stream load - Apache Doris](https://doris.apache.org/zh-CN/docs/data-operate/import/import-way/stream-load-manual) diff --git a/elasticsearchwriter/doc/elasticsearchwriter.md b/elasticsearchwriter/doc/elasticsearchwriter.md index 9a22f13c22..3a3315edc3 100644 --- a/elasticsearchwriter/doc/elasticsearchwriter.md +++ b/elasticsearchwriter/doc/elasticsearchwriter.md @@ -167,79 +167,4 @@ * dynamic * 描述: 不使用datax的mappings,使用es自己的自动mappings * 必选: 否 - * 默认值: false - - - -## 4 性能报告 - -### 4.1 环境准备 - -* 总数据量 1kw条数据, 每条0.1kb -* 1个shard, 0个replica -* 不加id,这样默认是append_only模式,不检查版本,插入速度会有20%左右的提升 - -#### 4.1.1 输入数据类型(streamreader) - -``` -{"value": "1.1.1.1", "type": "string"}, -{"value": 19890604.0, "type": "double"}, -{"value": 19890604, "type": "long"}, -{"value": 19890604, "type": "long"}, -{"value": "hello world", "type": "string"}, -{"value": "hello world", "type": "string"}, -{"value": "41.12,-71.34", "type": "string"}, -{"value": "2017-05-25", "type": "string"}, -``` - -#### 4.1.2 输出数据类型(eswriter) - -``` -{ "name": "col_ip","type": "ip" }, -{ "name": "col_double","type": "double" }, -{ "name": "col_long","type": "long" }, -{ "name": "col_integer","type": "integer" }, -{ "name": "col_keyword", "type": "keyword" }, -{ "name": "col_text", "type": "text"}, -{ "name": "col_geo_point", "type": "geo_point" }, -{ "name": "col_date", "type": "date"} -``` - -#### 4.1.2 机器参数 - -1. cpu: 32 Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz -2. mem: 128G -3. net: 千兆双网卡 - -#### 4.1.3 DataX jvm 参数 - --Xms1024m -Xmx1024m -XX:+HeapDumpOnOutOfMemoryError - -### 4.2 测试报告 - -| 通道数| 批量提交行数| DataX速度(Rec/s)|DataX流量(MB/s)| -|--------|--------| --------|--------| -| 4| 256| 11013| 0.828| -| 4| 1024| 19417| 1.43| -| 4| 4096| 23923| 1.76| -| 4| 8172| 24449| 1.80| -| 8| 256| 21459| 1.58| -| 8| 1024| 37037| 2.72| -| 8| 4096| 45454| 3.34| -| 8| 8172| 45871| 3.37| -| 16| 1024| 67567| 4.96| -| 16| 4096| 78125| 5.74| -| 16| 8172| 77519| 5.69| -| 32| 1024| 94339| 6.93| -| 32| 4096| 96153| 7.06| -| 64| 1024| 91743| 6.74| - -### 4.3 测试总结 - -* 最好的结果是32通道,每次传4096,如果单条数据很大, 请适当减少批量数,防止oom -* 当然这个很容易水平扩展,而且es也是分布式的,多设置几个shard也可以水平扩展 - -## 5 约束限制 - -* 如果导入id,这样数据导入失败也会重试,重新导入也仅仅是覆盖,保证数据一致性 -* 如果不导入id,就是append_only模式,elasticsearch自动生成id,速度会提升20%左右,但数据无法修复,适合日志型数据(对数据精度要求不高的) \ No newline at end of file + * 默认值: false \ No newline at end of file diff --git a/gaussdbwriter/src/main/java/com/alibaba/datax/plugin/reader/gaussdbwriter/GaussDbWriter.java b/gaussdbwriter/src/main/java/com/alibaba/datax/plugin/writer/gaussdbwriter/GaussDbWriter.java similarity index 98% rename from gaussdbwriter/src/main/java/com/alibaba/datax/plugin/reader/gaussdbwriter/GaussDbWriter.java rename to gaussdbwriter/src/main/java/com/alibaba/datax/plugin/writer/gaussdbwriter/GaussDbWriter.java index 3f758ee708..751defca89 100644 --- a/gaussdbwriter/src/main/java/com/alibaba/datax/plugin/reader/gaussdbwriter/GaussDbWriter.java +++ b/gaussdbwriter/src/main/java/com/alibaba/datax/plugin/writer/gaussdbwriter/GaussDbWriter.java @@ -1,4 +1,4 @@ -package com.alibaba.datax.plugin.reader.gaussdbwriter; +package com.alibaba.datax.plugin.writer.gaussdbwriter; import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.plugin.RecordReceiver; diff --git a/hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/HBase20xSQLReaderTask.java b/hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/HBase20xSQLReaderTask.java index 866cef38bc..3ebc704cd5 100644 --- a/hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/HBase20xSQLReaderTask.java +++ b/hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/HBase20xSQLReaderTask.java @@ -86,16 +86,18 @@ private Column convertPhoenixValueToDataxColumn(int sqlType, Object value) { column = new LongColumn((Integer) value); break; case Types.TINYINT: - column = new LongColumn(((Byte) value).longValue()); + Byte aByte = (Byte) value; + column = new LongColumn(null == aByte ? null : aByte.longValue()); break; case Types.SMALLINT: - column = new LongColumn(((Short) value).longValue()); + Short aShort = (Short) value; + column = new LongColumn(null == aShort ? null : aShort.longValue()); break; case Types.BIGINT: column = new LongColumn((Long) value); break; case Types.FLOAT: - column = new DoubleColumn((Float.valueOf(value.toString()))); + column = new DoubleColumn(null == value ? null : (Float.valueOf(value.toString()))); break; case Types.DECIMAL: column = new DoubleColumn((BigDecimal)value); diff --git a/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/HdfsHelper.java b/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/HdfsHelper.java index 09fd272389..e2900f7c32 100644 --- a/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/HdfsHelper.java +++ b/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/HdfsHelper.java @@ -31,6 +31,7 @@ import parquet.schema.*; import java.io.IOException; +import java.sql.Timestamp; import java.text.SimpleDateFormat; import java.util.*; @@ -440,7 +441,7 @@ public List getColumnTypeInspectors(List column objectInspector = ObjectInspectorFactory.getReflectionObjectInspector(Double.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); break; case TIMESTAMP: - objectInspector = ObjectInspectorFactory.getReflectionObjectInspector(java.sql.Timestamp.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + objectInspector = ObjectInspectorFactory.getReflectionObjectInspector(org.apache.hadoop.hive.common.type.Timestamp.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); break; case DATE: objectInspector = ObjectInspectorFactory.getReflectionObjectInspector(java.sql.Date.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); @@ -533,7 +534,13 @@ public static MutablePair, Boolean> transportOneRecord( recordList.add(new java.sql.Date(column.asDate().getTime())); break; case TIMESTAMP: - recordList.add(new java.sql.Timestamp(column.asDate().getTime())); + Date date = column.asDate(); + if (date == null) { + recordList.add(null); + } else { + Timestamp ts = new Timestamp(date.getTime()); + recordList.add(org.apache.hadoop.hive.common.type.Timestamp.ofEpochMilli(ts.getTime(), ts.getNanos())); + } break; default: throw DataXException @@ -630,7 +637,14 @@ public void parquetFileStartWrite(RecordReceiver lineReceiver, Configuration con MessageType messageType = null; ParquetFileProccessor proccessor = null; Path outputPath = new Path(fileName); - String schema = config.getString(Key.PARQUET_SCHEMA); + String schema = config.getString(Key.PARQUET_SCHEMA, null); + if (schema == null) { + List columns = config.getListConfiguration(Key.COLUMN); + if (columns == null || columns.isEmpty()) { + throw DataXException.asDataXException("parquetSchema or column can't be blank!"); + } + schema = HdfsHelper.generateParquetSchemaFromColumnAndType(columns); + } try { messageType = MessageTypeParser.parseMessageType(schema); } catch (Exception e) { diff --git a/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/HdfsWriter.java b/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/HdfsWriter.java index e77074619f..7535687cec 100644 --- a/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/HdfsWriter.java +++ b/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/HdfsWriter.java @@ -228,6 +228,12 @@ public List split(int mandatoryNumber) { String endFullFileName = null; fileSuffix = UUID.randomUUID().toString().replace('-', '_'); + if (fileType.equalsIgnoreCase("PARQUET")) { + if (StringUtils.isNotBlank(this.compress)) { + fileSuffix += "." + this.compress.toLowerCase(); + } + fileSuffix += ".parquet"; + } fullFileName = String.format("%s%s%s__%s", defaultFS, storePath, filePrefix, fileSuffix); endFullFileName = String.format("%s%s%s__%s", defaultFS, endStorePath, filePrefix, fileSuffix); diff --git a/milvuswriter/doc/milvuswriter.md b/milvuswriter/doc/milvuswriter.md new file mode 100644 index 0000000000..3788c62ac6 --- /dev/null +++ b/milvuswriter/doc/milvuswriter.md @@ -0,0 +1,273 @@ +# DataX milvuswriter + + +--- + + +## 1 快速介绍 + +milvuswriter 插件实现了写入数据到 milvus集合的功能; 面向ETL开发工程师,使用 milvuswriter 从数仓导入数据到 milvus, 同时 milvuswriter 亦可以作为数据迁移工具为DBA等用户提供服务。 + + +## 2 实现原理 + +milvuswriter 通过 DataX 框架获取 Reader 生成的协议数据,通过 `upsert/insert `方式写入数据到milvus, 并通过batchSize累积的方式进行数据提交。 + + + 注意:upsert写入方式(推荐): 在非autid表场景下根据主键更新 Collection 中的某个 Entity;autid表场景下会将 Entity 中的主键替换为自动生成的主键并插入数据。 + insert写入方式: 多用于autid表插入数据milvus自动生成主键, 非autoid表下使用insert会导致数据重复。 + + +## 3 功能说明 + +### 3.1 配置样例 + +* 这里提供一份从内存产生数据导入到 milvus的配置样例。 + +```json +{ + "job": { + "content": [ + { + "reader": { + "name": "streamreader", + "parameter": { + "column" : [ + { + "value": 1, + "type": "long" + }, + { + "value": "[1.1,1.2,1.3]", + "type": "string" + }, + { + "value": 100, + "type": "long" + }, + { + "value": 200, + "type": "long" + }, + { + "value": 300, + "type": "long" + }, + { + "value": 3.14159, + "type": "double" + }, + { + "value": 3.1415926, + "type": "double" + }, + { + "value": "testvarcharvalue", + "type": "string" + }, + { + "value": true, + "type": "bool" + }, + { + "value": "[1.123,1.2456,1.3789]", + "type": "string" + }, + { + "value": "[2.123,2.2456,2.3789]", + "type": "string" + }, + { + "value": "12345678", + "type": "string" + }, + { + "value": "{\"a\":1,\"b\":2,\"c\":3}", + "type": "string" + }, + { + "value": "[1,2,3,4]", + "type": "string" + } + ], + "sliceRecordCount": 1 + } + }, + "writer": { + "parameter": { + "schemaCreateMode": "createIfNotExist", + "connectTimeoutMs": 60000, + "writeMode": "upsert", + "collection": "demo01", + "type": "milvus", + "token": "xxxxxxx", + "endpoint": "https://xxxxxxxx.com:443", + "batchSize": 1024, + "column": [ + { + "name": "id", + "type": "Int64", + "primaryKey": "true" + }, + { + "name": "floatvector", + "type": "FloatVector", + "dimension": "3" + }, + { + "name": "int8col", + "type": "Int8" + }, + { + "name": "int16col", + "type": "Int16" + }, + { + "name": "int32col", + "type": "Int32" + }, + { + "name": "floatcol", + "type": "Float" + }, + { + "name": "doublecol", + "type": "Double" + }, + { + "name": "varcharcol", + "type": "VarChar" + }, + { + "name": "boolcol", + "type": "Bool" + }, + { + "name": "bfloat16vectorcol", + "type": "BFloat16Vector", + "dimension": "3" + }, + { + "name": "float16vectorcol", + "type": "Float16Vector", + "dimension": "3" + }, + { + "name": "binaryvectorcol", + "type": "BinaryVector", + "dimension": "64" + }, + { + "name": "jsoncol", + "type": "JSON" + }, + { + "name": "intarraycol", + "maxCapacity": "8", + "type": "Array", + "elementType": "Int32" + } + ] + }, + "name": "milvuswriter" + } + } + ], + "setting": { + "errorLimit": { + "record": "0" + }, + "speed": { + "concurrent": 2, + "channel": 2 + } + } + } +} + +``` + + +### 3.2 参数说明 + +* **endpoint** + * 描述:milvus数据库的连接信息,包含地址和端口,例如https://xxxxxxxx.com:443 + + 注意:1、在一个数据库上只能配置一个 endpoint 值 + 2、一个milvus 写入任务仅能配置一个 endpoint + * 必选:是 + * 默认值:无 +* *schemaCreateMode* + * 描述: 集合创建的模式,同步时milvus集合不存在的处理方式, 根据配置的column属性进行创建 + * 取值 + * createIfNotExist: 如果集合不存在,则创建集合,如果集合存在,则不执行任何操作 + * ignore: 如果集合不存在,任务异常报错,如果集合存在,则不执行任何操作 + * recreate: 如果集合不存在,则创建集合,如果集合存在,则删除集合重建集合 + * 必选:否 + * 默认值:createIfNotExist +* **connectTimeoutMs** + * 描述:与milvus交互是客户端的连接超时时间,单位毫秒 + * 必选:否 + * 默认值:10000 +* **token** + * 描述:milvus实例认证的token秘钥,与username认证方式二选一配置 + * 必选:否 + * 默认值:无 +* **username** + * 描述:目的milvus数据库的用户名, 与token二选一配置 + * 必选:否 + * 默认值:无 +* **password** + * 描述:目的milvus数据库的密码 + * 必选:否 + * 默认值:无 +* *writeMode* + * 描述: 写入milvus集合的写入方式 + * 取值 + * upsert(推荐): 在非autid表场景下根据主键更新 Collection 中的某个 Entity;autid表场景下会将 Entity 中的主键替换为自动生成的主键并插入数据。 + * insert: 多用于autid表插入数据milvus自动生成主键, 非autoid表下使用insert会导致数据重复。 + * 必选:是 + * 默认值:upsert +* **collection** + * 描述:目的集合名称。 只能配置一个milvus的集合名称。 + * 必选:是 + * 默认值:无 +* **batchSize** + * 描述:一次性批量提交的记录数大小,该值可以极大减少DataX与milvus的网络交互次数,并提升整体吞吐量。但是该值设置过大可能会造成DataX运行进程OOM情况。 + * 必选:否 + * 默认值:1024 + +* **column** + * 描述:目的集合需要写入数据的字段,字段内容用json格式描述,字段之间用英文逗号分隔。字段属性必填name、type, 其他属性在需要schemaCreateMode创建集合按需填入,例如: + + "column": [ + { + "name": "id", + "type": "Int64", + "primaryKey": "true" + }, + { + "name": "floatvector", + "type": "FloatVector", + "dimension": "3" + }] + * 必选:是 + * 默认值:否 +### 3.3 支持同步milvus字段类型 + Bool, + Int8, + Int16, + Int32, + Int64, + Float, + Double, + String, + VarChar, + Array, + JSON, + BinaryVector, + FloatVector, + Float16Vector, + BFloat16Vector, + SparseFloatVector + diff --git a/milvuswriter/pom.xml b/milvuswriter/pom.xml new file mode 100644 index 0000000000..16c00560de --- /dev/null +++ b/milvuswriter/pom.xml @@ -0,0 +1,125 @@ + + + 4.0.0 + + com.alibaba.datax + datax-all + 0.0.1-SNAPSHOT + + + milvuswriter + + + UTF-8 + official + 1.8 + + + + com.alibaba.fastjson2 + fastjson2 + 2.0.49 + + + com.alibaba.datax + datax-common + ${datax-project-version} + + + slf4j-log4j12 + org.slf4j + + + + + org.projectlombok + lombok + 1.18.30 + + + guava + com.google.guava + 32.0.1-jre + + + io.milvus + milvus-sdk-java + 2.5.2 + + + org.mockito + mockito-core + 3.3.3 + test + + + junit + junit + 4.11 + test + + + org.jetbrains.kotlin + kotlin-stdlib + 2.0.0 + + + org.powermock + powermock-module-junit4 + 2.0.9 + test + + + org.powermock + powermock-api-mockito2 + 2.0.9 + test + + + + + + + + src/main/resources + + **/*.* + + true + + + + + + maven-compiler-plugin + + ${jdk-version} + ${jdk-version} + ${project-sourceEncoding} + + + + + maven-assembly-plugin + + + src/main/assembly/package.xml + + datax + + + + dwzip + package + + single + + + + + + + + \ No newline at end of file diff --git a/milvuswriter/src/main/assembly/package.xml b/milvuswriter/src/main/assembly/package.xml new file mode 100644 index 0000000000..62357b4ae5 --- /dev/null +++ b/milvuswriter/src/main/assembly/package.xml @@ -0,0 +1,36 @@ + + + + + dir + + false + + + src/main/resources + + plugin.json + plugin_job_template.json + + plugin/writer/milvuswriter + + + target/ + + milvuswriter-0.0.1-SNAPSHOT.jar + + plugin/writer/milvuswriter + + + + + + false + plugin/writer/milvuswriter/libs + runtime + + + diff --git a/milvuswriter/src/main/java/com/alibaba/datax/plugin/writer/milvuswriter/KeyConstant.java b/milvuswriter/src/main/java/com/alibaba/datax/plugin/writer/milvuswriter/KeyConstant.java new file mode 100644 index 0000000000..28f1ff13e5 --- /dev/null +++ b/milvuswriter/src/main/java/com/alibaba/datax/plugin/writer/milvuswriter/KeyConstant.java @@ -0,0 +1,17 @@ +package com.alibaba.datax.plugin.writer.milvuswriter; + +public class KeyConstant { + public static final String USERNAME = "username"; + public static final String PASSWORD = "password"; + public static final String ENDPOINT = "endpoint"; + public static final String TOKEN = "token"; + public static final String DATABASE = "database"; + public static final String COLLECTION = "collection"; + public static final String BATCH_SIZE = "batchSize"; + public static final String COLUMN = "column"; + public static final String SCHAME_CREATE_MODE = "schemaCreateMode"; + public static final String WRITE_MODE = "writeMode"; + public static final String PARTITION = "partition"; + public static final String CONNECT_TIMEOUT_MS = "connectTimeoutMs"; + public static final String ENABLE_DYNAMIC_SCHEMA = "enableDynamicSchema"; +} \ No newline at end of file diff --git a/milvuswriter/src/main/java/com/alibaba/datax/plugin/writer/milvuswriter/MilvusBufferWriter.java b/milvuswriter/src/main/java/com/alibaba/datax/plugin/writer/milvuswriter/MilvusBufferWriter.java new file mode 100644 index 0000000000..b78728e44e --- /dev/null +++ b/milvuswriter/src/main/java/com/alibaba/datax/plugin/writer/milvuswriter/MilvusBufferWriter.java @@ -0,0 +1,166 @@ +package com.alibaba.datax.plugin.writer.milvuswriter; + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.plugin.TaskPluginCollector; +import com.alibaba.datax.plugin.writer.milvuswriter.enums.WriteModeEnum; +import com.alibaba.fastjson2.JSONArray; +import com.google.gson.Gson; +import com.google.gson.JsonObject; +import io.milvus.v2.common.DataType; +import io.milvus.v2.service.vector.request.data.BFloat16Vec; +import io.milvus.v2.service.vector.request.data.Float16Vec; +import lombok.extern.slf4j.Slf4j; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; +import java.util.TreeMap; +import java.util.stream.Collectors; + +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.TypeReference; + +@Slf4j +public class MilvusBufferWriter { + + private final MilvusClient milvusClient; + private final String collection; + private final Integer batchSize; + private List dataCache; + private List milvusColumnMeta; + private WriteModeEnum writeMode; + private String partition; + + public MilvusBufferWriter(MilvusClient milvusClient, Configuration writerSliceConfig) { + this.milvusClient = milvusClient; + this.collection = writerSliceConfig.getString(KeyConstant.COLLECTION); + this.batchSize = writerSliceConfig.getInt(KeyConstant.BATCH_SIZE, 100); + this.dataCache = new ArrayList<>(batchSize); + this.milvusColumnMeta = JSON.parseObject(writerSliceConfig.getString(KeyConstant.COLUMN), new TypeReference>() { + }); + this.writeMode = WriteModeEnum.getEnum(writerSliceConfig.getString(KeyConstant.WRITE_MODE)); + this.partition = writerSliceConfig.getString(KeyConstant.PARTITION); + } + + public void add(Record record, TaskPluginCollector taskPluginCollector) { + try { + JsonObject data = this.convertByType(milvusColumnMeta, record); + dataCache.add(data); + } catch (Exception e) { + taskPluginCollector.collectDirtyRecord(record, String.format("parse record error errorMessage: %s", e.getMessage())); + } + } + + public Boolean needCommit() { + return dataCache.size() >= batchSize; + } + + public void commit() { + if (dataCache.isEmpty()) { + log.info("dataCache is empty, skip commit"); + return; + } + if (writeMode == WriteModeEnum.INSERT) { + milvusClient.insert(collection, partition, dataCache); + } else { + milvusClient.upsert(collection, partition, dataCache); + } + dataCache = new ArrayList<>(batchSize); + } + + public int getDataCacheSize() { + return dataCache.size(); + } + + private JsonObject convertByType(List milvusColumnMeta, Record record) { + JsonObject data = new JsonObject(); + Gson gson = new Gson(); + for (int i = 0; i < record.getColumnNumber(); i++) { + MilvusColumn milvusColumn = milvusColumnMeta.get(i); + DataType fieldType = milvusColumn.getMilvusTypeEnum(); + String fieldName = milvusColumn.getName(); + Column column = record.getColumn(i); + try { + Object field = convertToMilvusField(fieldType, column, milvusColumn); + data.add(fieldName, gson.toJsonTree(field)); + } catch (Exception e) { + log.error("parse error for column: {} errorMessage: {}", fieldName, e.getMessage(), e); + throw e; + } + } + return data; + } + + //值需要跟这里匹配:io.milvus.param.ParamUtils#checkFieldData(io.milvus.param.collection.FieldType, java.util.List>, boolean) + private Object convertToMilvusField(DataType type, Column column, MilvusColumn milvusColumn) { + if (column.getRawData() == null) { + return null; + } + switch (type) { + case Int8: + case Int16: + case Int32: + case Int64: + return column.asLong(); + case Float: + case Double: + return column.asDouble(); + case String: + case VarChar: + return column.asString(); + case Bool: + return column.asBoolean(); + case BFloat16Vector: + JSONArray bFloat16ArrayJson = JSON.parseArray(column.asString()); + List bfloat16Vector = new ArrayList<>(); + for (int i = 0; i < bFloat16ArrayJson.size(); i++) { + Float value = Float.parseFloat(bFloat16ArrayJson.getString(i)); + bfloat16Vector.add(value); + } + BFloat16Vec bFloat16Vec = new BFloat16Vec(bfloat16Vector); + ByteBuffer byteBuffer = (ByteBuffer) bFloat16Vec.getData(); + return byteBuffer.array(); + case Float16Vector: + JSONArray float16ArrayJson = JSON.parseArray(column.asString()); + List float16Vector = new ArrayList<>(); + for (int i = 0; i < float16ArrayJson.size(); i++) { + Float floatValue = Float.parseFloat(float16ArrayJson.getString(i)); + float16Vector.add(floatValue); + } + Float16Vec float16Vec = new Float16Vec(float16Vector); + ByteBuffer data = (ByteBuffer) float16Vec.getData(); + return data.array(); + case BinaryVector: + return column.asBytes(); + case FloatVector: + JSONArray arrayJson = JSON.parseArray(column.asString()); + return arrayJson.stream().map(item -> Float.parseFloat(String.valueOf(item))).collect(Collectors.toList()); + case SparseFloatVector: + //[3:0.5, 24:0.8, 76:0.2] + try { + JSONArray sparseFloatArray = JSON.parseArray(column.asString()); + TreeMap mapValue = new TreeMap<>(); + for (int i = 0; i < sparseFloatArray.size(); i++) { + String value = sparseFloatArray.getString(i); + String[] split = value.split(":"); + Long key = Long.parseLong(split[0]); + Float val = Float.parseFloat(split[1]); + mapValue.put(key, val); + } + return mapValue; + } catch (Exception e) { + log.error("parse column[{}] SparseFloatVector value error, value should like [3:0.5, 24:0.8, 76:0.2], but get:{}", milvusColumn.getName(), column.asString()); + throw e; + } + case JSON: + return column.asString(); + case Array: + JSONArray parseArray = JSON.parseArray(column.asString()); + return parseArray.stream().map(item -> String.valueOf(item)).collect(Collectors.toList()); + default: + throw new RuntimeException(String.format("Unsupported data type[%s]", type)); + } + } +} \ No newline at end of file diff --git a/milvuswriter/src/main/java/com/alibaba/datax/plugin/writer/milvuswriter/MilvusClient.java b/milvuswriter/src/main/java/com/alibaba/datax/plugin/writer/milvuswriter/MilvusClient.java new file mode 100644 index 0000000000..1bf4743b01 --- /dev/null +++ b/milvuswriter/src/main/java/com/alibaba/datax/plugin/writer/milvuswriter/MilvusClient.java @@ -0,0 +1,95 @@ +package com.alibaba.datax.plugin.writer.milvuswriter; + +import java.util.List; + +import com.alibaba.datax.common.util.Configuration; + +import com.google.gson.JsonObject; +import io.milvus.v2.client.ConnectConfig; +import io.milvus.v2.client.MilvusClientV2; +import io.milvus.v2.service.collection.request.CreateCollectionReq; +import io.milvus.v2.service.collection.request.DropCollectionReq; +import io.milvus.v2.service.collection.request.HasCollectionReq; +import io.milvus.v2.service.partition.request.CreatePartitionReq; +import io.milvus.v2.service.partition.request.HasPartitionReq; +import io.milvus.v2.service.vector.request.InsertReq; +import io.milvus.v2.service.vector.request.UpsertReq; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.StringUtils; + +/** + * @author ziming(子茗) + * @date 12/27/24 + * @description + */ +@Slf4j +public class MilvusClient { + private MilvusClientV2 milvusClientV2; + + public MilvusClient(Configuration conf) { + // connect to milvus + ConnectConfig connectConfig = ConnectConfig.builder().uri(conf.getString(KeyConstant.ENDPOINT)).build(); + String token = null; + if (conf.getString(KeyConstant.TOKEN) != null) { + token = conf.getString(KeyConstant.TOKEN); + } else { + token = conf.getString(KeyConstant.USERNAME) + ":" + conf.getString(KeyConstant.PASSWORD); + } + connectConfig.setToken(token); + String database = conf.getString(KeyConstant.DATABASE); + if (StringUtils.isNotBlank(database)) { + log.info("use database {}", database); + connectConfig.setDbName(conf.getString(KeyConstant.DATABASE)); + } + Integer connectTimeOut = conf.getInt(KeyConstant.CONNECT_TIMEOUT_MS); + if (connectTimeOut != null) { + connectConfig.setConnectTimeoutMs(connectTimeOut); + } + this.milvusClientV2 = new MilvusClientV2(connectConfig); + } + + public void upsert(String collection, String partition, List data) { + UpsertReq upsertReq = UpsertReq.builder().collectionName(collection).data(data).build(); + if (StringUtils.isNotEmpty(partition)) { + upsertReq.setPartitionName(partition); + } + milvusClientV2.upsert(upsertReq); + } + + public void insert(String collection, String partition, List data) { + InsertReq insertReq = InsertReq.builder().collectionName(collection).data(data).build(); + if (StringUtils.isNotEmpty(partition)) { + insertReq.setPartitionName(partition); + } + milvusClientV2.insert(insertReq); + } + + public Boolean hasCollection(String collection) { + HasCollectionReq build = HasCollectionReq.builder().collectionName(collection).build(); + return milvusClientV2.hasCollection(build); + } + + public void createCollection(String collection, CreateCollectionReq.CollectionSchema schema) { + CreateCollectionReq createCollectionReq = CreateCollectionReq.builder().collectionName(collection).collectionSchema(schema).build(); + milvusClientV2.createCollection(createCollectionReq); + } + + public void dropCollection(String collection) { + DropCollectionReq request = DropCollectionReq.builder().collectionName(collection).build(); + milvusClientV2.dropCollection(request); + } + public Boolean hasPartition(String collection, String partition) { + HasPartitionReq hasPartitionReq = HasPartitionReq.builder().collectionName(collection).partitionName(partition).build(); + return milvusClientV2.hasPartition(hasPartitionReq); + } + + public void createPartition(String collectionName, String partitionName) { + CreatePartitionReq createPartitionReq = CreatePartitionReq.builder().collectionName(collectionName).partitionName(partitionName).build(); + milvusClientV2.createPartition(createPartitionReq); + } + + public void close() { + log.info("Closing Milvus client"); + milvusClientV2.close(); + } +} diff --git a/milvuswriter/src/main/java/com/alibaba/datax/plugin/writer/milvuswriter/MilvusColumn.java b/milvuswriter/src/main/java/com/alibaba/datax/plugin/writer/milvuswriter/MilvusColumn.java new file mode 100644 index 0000000000..06070248a9 --- /dev/null +++ b/milvuswriter/src/main/java/com/alibaba/datax/plugin/writer/milvuswriter/MilvusColumn.java @@ -0,0 +1,112 @@ +package com.alibaba.datax.plugin.writer.milvuswriter; + +import io.milvus.v2.common.DataType; + +import java.util.Arrays; + +/** + * @author ziming(子茗) + * @date 12/27/24 + * @description + */ +public class MilvusColumn { + private String name; + private String type; + private DataType milvusTypeEnum; + private Boolean isPrimaryKey; + private Integer dimension; + private Boolean isPartitionKey; + private Integer maxLength; + private Boolean isAutoId; + private Integer maxCapacity; + private String elementType; + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + for (DataType item : DataType.values()) { + if (item.name().equalsIgnoreCase(type)) { + this.milvusTypeEnum = item; + break; + } + } + if (this.milvusTypeEnum == null) { + throw new RuntimeException("Unsupported type: " + type + " supported types: " + Arrays.toString(DataType.values())); + } + } + + public Integer getDimension() { + return dimension; + } + + public void setDimension(Integer dimension) { + this.dimension = dimension; + } + + public Integer getMaxLength() { + return maxLength; + } + + public void setMaxLength(Integer maxLength) { + this.maxLength = maxLength; + } + + public Boolean getPrimaryKey() { + return isPrimaryKey; + } + + public Boolean getPartitionKey() { + return isPartitionKey; + } + + public void setPartitionKey(Boolean partitionKey) { + isPartitionKey = partitionKey; + } + + public void setPrimaryKey(Boolean primaryKey) { + isPrimaryKey = primaryKey; + } + + public Boolean getAutoId() { + return isAutoId; + } + + public void setAutoId(Boolean autoId) { + isAutoId = autoId; + } + + public Integer getMaxCapacity() { + return maxCapacity; + } + + public void setMaxCapacity(Integer maxCapacity) { + this.maxCapacity = maxCapacity; + } + + public String getElementType() { + return elementType; + } + + public void setElementType(String elementType) { + this.elementType = elementType; + } + + public DataType getMilvusTypeEnum() { + return milvusTypeEnum; + } + + public void setMilvusTypeEnum(DataType milvusTypeEnum) { + this.milvusTypeEnum = milvusTypeEnum; + } +} diff --git a/milvuswriter/src/main/java/com/alibaba/datax/plugin/writer/milvuswriter/MilvusCreateCollection.java b/milvuswriter/src/main/java/com/alibaba/datax/plugin/writer/milvuswriter/MilvusCreateCollection.java new file mode 100644 index 0000000000..84e296f4d6 --- /dev/null +++ b/milvuswriter/src/main/java/com/alibaba/datax/plugin/writer/milvuswriter/MilvusCreateCollection.java @@ -0,0 +1,102 @@ +package com.alibaba.datax.plugin.writer.milvuswriter; + +import java.util.List; + +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.writer.milvuswriter.enums.SchemaCreateModeEnum; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.TypeReference; + +import io.milvus.v2.common.DataType; +import io.milvus.v2.service.collection.request.AddFieldReq; +import io.milvus.v2.service.collection.request.CreateCollectionReq; +import lombok.extern.slf4j.Slf4j; + +import static io.milvus.v2.common.DataType.valueOf; + +/** + * @author ziming(子茗) + * @date 12/27/24 + * @description + */ +@Slf4j +public class MilvusCreateCollection { + + private Configuration config; + + MilvusCreateCollection(Configuration originalConfig) { + this.config = originalConfig; + } + + public void createCollectionByMode(MilvusClient milvusClient) { + String collection = this.config.getString(KeyConstant.COLLECTION); + SchemaCreateModeEnum schemaCreateMode = SchemaCreateModeEnum.getEnum(this.config.getString(KeyConstant.SCHAME_CREATE_MODE)); + List milvusColumnMeta = JSON.parseObject(config.getString(KeyConstant.COLUMN), new TypeReference>() { + }); + Boolean hasCollection = milvusClient.hasCollection(collection); + if (schemaCreateMode == SchemaCreateModeEnum.CREATEIFNOTEXIT) { + // create collection + if (hasCollection) { + log.info("collection[{}] already exists, continue create", collection); + } else { + log.info("creating collection[{}]", collection); + CreateCollectionReq.CollectionSchema collectionSchema = prepareCollectionSchema(milvusColumnMeta); + milvusClient.createCollection(collection, collectionSchema); + } + } else if (schemaCreateMode == SchemaCreateModeEnum.RECREATE) { + if (hasCollection) { + log.info("collection already exist, try to drop"); + milvusClient.dropCollection(collection); + } + log.info("creating collection[{}]", collection); + CreateCollectionReq.CollectionSchema collectionSchema = prepareCollectionSchema(milvusColumnMeta); + milvusClient.createCollection(collection, collectionSchema); + } else if (schemaCreateMode == SchemaCreateModeEnum.IGNORE && !hasCollection) { + log.error("Collection not exist, throw exception"); + throw new RuntimeException("Collection not exist"); + } + } + + private CreateCollectionReq.CollectionSchema prepareCollectionSchema(List milvusColumnMeta) { + CreateCollectionReq.CollectionSchema collectionSchema = CreateCollectionReq.CollectionSchema.builder().build(); + for (int i = 0; i < milvusColumnMeta.size(); i++) { + MilvusColumn milvusColumn = milvusColumnMeta.get(i); + AddFieldReq addFieldReq = AddFieldReq.builder() + .fieldName(milvusColumn.getName()) + .dataType(valueOf(milvusColumn.getType())) + .build(); + if (milvusColumn.getPrimaryKey() != null) { + addFieldReq.setIsPrimaryKey(milvusColumn.getPrimaryKey()); + } + if (milvusColumn.getDimension() != null) { + addFieldReq.setDimension(milvusColumn.getDimension()); + } + if (milvusColumn.getPartitionKey() != null) { + addFieldReq.setIsPartitionKey(milvusColumn.getPartitionKey()); + } + if (milvusColumn.getMaxLength() != null) { + addFieldReq.setMaxLength(milvusColumn.getMaxLength()); + } + if (milvusColumn.getAutoId() != null) { + addFieldReq.setAutoID(milvusColumn.getAutoId()); + } + if (milvusColumn.getMaxCapacity() != null) { + addFieldReq.setMaxCapacity(milvusColumn.getMaxCapacity()); + } + if (milvusColumn.getElementType() != null) { + addFieldReq.setElementType(DataType.valueOf(milvusColumn.getElementType())); + } + try { + collectionSchema.addField(addFieldReq); + } catch (Exception e) { + log.error("add filed[{}] error", milvusColumn.getName()); + throw e; + } + } + Boolean enableDynamic = config.getBool(KeyConstant.ENABLE_DYNAMIC_SCHEMA); + if (enableDynamic != null) { + collectionSchema.setEnableDynamicField(enableDynamic); + } + return collectionSchema; + } +} diff --git a/milvuswriter/src/main/java/com/alibaba/datax/plugin/writer/milvuswriter/MilvusWriter.java b/milvuswriter/src/main/java/com/alibaba/datax/plugin/writer/milvuswriter/MilvusWriter.java new file mode 100644 index 0000000000..764a9d943e --- /dev/null +++ b/milvuswriter/src/main/java/com/alibaba/datax/plugin/writer/milvuswriter/MilvusWriter.java @@ -0,0 +1,110 @@ +package com.alibaba.datax.plugin.writer.milvuswriter; + +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.spi.Writer; +import com.alibaba.datax.common.util.Configuration; + +import lombok.extern.slf4j.Slf4j; + +import java.util.ArrayList; +import java.util.List; + +@Slf4j +public class MilvusWriter extends Writer { + public static class Job extends Writer.Job { + private Configuration originalConfig = null; + + @Override + public void init() { + this.originalConfig = super.getPluginJobConf(); + originalConfig.getNecessaryValue(KeyConstant.ENDPOINT, MilvusWriterErrorCode.REQUIRED_VALUE); + originalConfig.getNecessaryValue(KeyConstant.COLUMN, MilvusWriterErrorCode.REQUIRED_VALUE); + originalConfig.getNecessaryValue(KeyConstant.COLLECTION, MilvusWriterErrorCode.REQUIRED_VALUE); + } + + @Override + public void prepare() { + //collection create process + MilvusClient milvusClient = new MilvusClient(originalConfig); + try { + MilvusCreateCollection milvusCreateCollection = new MilvusCreateCollection(originalConfig); + milvusCreateCollection.createCollectionByMode(milvusClient); + String collection = originalConfig.getString(KeyConstant.COLLECTION); + String partition = originalConfig.getString(KeyConstant.PARTITION); + if (partition != null && !milvusClient.hasPartition(collection, partition)) { + log.info("collection[{}] not contain partition[{}],try to create partition", collection, partition); + milvusClient.createPartition(collection, partition); + } + } catch (Exception e) { + throw DataXException.asDataXException(MilvusWriterErrorCode.MILVUS_COLLECTION, e.getMessage(), e); + } finally { + milvusClient.close(); + } + } + + /** + * 切分任务。 + * + * @param mandatoryNumber 为了做到Reader、Writer任务数对等,这里要求Writer插件必须按照源端的切分数进行切分。否则框架报错! + */ + @Override + public List split(int mandatoryNumber) { + List configList = new ArrayList<>(); + for (int i = 0; i < mandatoryNumber; i++) { + configList.add(this.originalConfig.clone()); + } + return configList; + } + + @Override + public void destroy() { + + } + } + + public static class Task extends Writer.Task { + + private MilvusBufferWriter milvusBufferWriter; + MilvusClient milvusClient; + + @Override + public void init() { + log.info("Initializing Milvus writer"); + // get configuration + Configuration writerSliceConfig = this.getPluginJobConf(); + this.milvusClient = new MilvusClient(writerSliceConfig); + this.milvusBufferWriter = new MilvusBufferWriter(this.milvusClient, writerSliceConfig); + log.info("Milvus writer initialized"); + } + + @Override + public void startWrite(RecordReceiver lineReceiver) { + Record record = null; + while ((record = lineReceiver.getFromReader()) != null) { + milvusBufferWriter.add(record, this.getTaskPluginCollector()); + if (milvusBufferWriter.needCommit()) { + log.info("begin committing data size[{}]", milvusBufferWriter.getDataCacheSize()); + milvusBufferWriter.commit(); + } + } + if (milvusBufferWriter.getDataCacheSize() > 0) { + log.info("begin committing data size[{}]", milvusBufferWriter.getDataCacheSize()); + milvusBufferWriter.commit(); + } + } + + @Override + public void prepare() { + super.prepare(); + } + + @Override + public void destroy() { + if (this.milvusClient != null) { + this.milvusClient.close(); + } + } + } +} \ No newline at end of file diff --git a/milvuswriter/src/main/java/com/alibaba/datax/plugin/writer/milvuswriter/MilvusWriterErrorCode.java b/milvuswriter/src/main/java/com/alibaba/datax/plugin/writer/milvuswriter/MilvusWriterErrorCode.java new file mode 100644 index 0000000000..7264160fd9 --- /dev/null +++ b/milvuswriter/src/main/java/com/alibaba/datax/plugin/writer/milvuswriter/MilvusWriterErrorCode.java @@ -0,0 +1,35 @@ +package com.alibaba.datax.plugin.writer.milvuswriter; + +import com.alibaba.datax.common.spi.ErrorCode; + +/** + * @author ziming(子茗) + * @date 12/27/24 + * @description + */ +public enum MilvusWriterErrorCode implements ErrorCode { + MILVUS_COLLECTION("MilvusWriter-01", "collection process error"), + REQUIRED_VALUE("MilvusWriter-02", "miss required parameter"); + private final String code; + private final String description; + + MilvusWriterErrorCode(String code, String description) { + this.code = code; + this.description = description; + } + + @Override + public String getCode() { + return this.code; + } + + @Override + public String getDescription() { + return this.description; + } + + @Override + public String toString() { + return String.format("Code:[%s], Description:[%s]. ", this.code, this.description); + } +} diff --git a/milvuswriter/src/main/java/com/alibaba/datax/plugin/writer/milvuswriter/enums/SchemaCreateModeEnum.java b/milvuswriter/src/main/java/com/alibaba/datax/plugin/writer/milvuswriter/enums/SchemaCreateModeEnum.java new file mode 100644 index 0000000000..b8c88bf1b3 --- /dev/null +++ b/milvuswriter/src/main/java/com/alibaba/datax/plugin/writer/milvuswriter/enums/SchemaCreateModeEnum.java @@ -0,0 +1,34 @@ +package com.alibaba.datax.plugin.writer.milvuswriter.enums; + +import lombok.extern.slf4j.Slf4j; + +/** + * @author ziming(子茗) + * @date 12/27/24 + * @description + */ +@Slf4j +public enum SchemaCreateModeEnum { + CREATEIFNOTEXIT("createIfNotExist"), + IGNORE("ignore"), + RECREATE("recreate"); + String type; + + SchemaCreateModeEnum(String type) { + this.type = type; + } + + public String getType() { + return type; + } + + public static SchemaCreateModeEnum getEnum(String name) { + for (SchemaCreateModeEnum value : SchemaCreateModeEnum.values()) { + if (value.getType().equalsIgnoreCase(name)) { + return value; + } + } + log.info("use default CREATEIFNOTEXIT schame create mode"); + return CREATEIFNOTEXIT; + } +} \ No newline at end of file diff --git a/milvuswriter/src/main/java/com/alibaba/datax/plugin/writer/milvuswriter/enums/WriteModeEnum.java b/milvuswriter/src/main/java/com/alibaba/datax/plugin/writer/milvuswriter/enums/WriteModeEnum.java new file mode 100644 index 0000000000..0098dbaddd --- /dev/null +++ b/milvuswriter/src/main/java/com/alibaba/datax/plugin/writer/milvuswriter/enums/WriteModeEnum.java @@ -0,0 +1,28 @@ +package com.alibaba.datax.plugin.writer.milvuswriter.enums; + +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public enum WriteModeEnum { + INSERT("insert"), + UPSERT("upsert"); + String mode; + + public String getMode() { + return mode; + } + + WriteModeEnum(String mode) { + this.mode = mode; + } + + public static WriteModeEnum getEnum(String mode) { + for (WriteModeEnum writeModeEnum : WriteModeEnum.values()) { + if (writeModeEnum.getMode().equalsIgnoreCase(mode)) { + return writeModeEnum; + } + } + log.info("use default write mode upsert"); + return UPSERT; + } +} diff --git a/milvuswriter/src/main/resources/plugin.json b/milvuswriter/src/main/resources/plugin.json new file mode 100644 index 0000000000..8b91230934 --- /dev/null +++ b/milvuswriter/src/main/resources/plugin.json @@ -0,0 +1,6 @@ +{ + "name": "milvuswriter", + "class": "com.alibaba.datax.plugin.writer.milvuswriter.MilvusWriter", + "description": "useScene: prod. mechanism: via milvusclient connect milvus write data concurrent.", + "developer": "nianliuu" +} diff --git a/milvuswriter/src/main/resources/plugin_job_template.json b/milvuswriter/src/main/resources/plugin_job_template.json new file mode 100644 index 0000000000..33bd941a52 --- /dev/null +++ b/milvuswriter/src/main/resources/plugin_job_template.json @@ -0,0 +1,12 @@ +{ + "name": "milvuswriter", + "parameter": { + "endpoint": "", + "username": "", + "password": "", + "database": "", + "collection": "", + "column": [], + "enableDynamicSchema": "" + } +} \ No newline at end of file diff --git a/mysqlwriter/doc/mysqlwriter.md b/mysqlwriter/doc/mysqlwriter.md index 5368775cc1..294846c2e6 100644 --- a/mysqlwriter/doc/mysqlwriter.md +++ b/mysqlwriter/doc/mysqlwriter.md @@ -6,7 +6,7 @@ ## 1 快速介绍 -MysqlWriter 插件实现了写入数据到 Mysql 主库的目的表的功能。在底层实现上, MysqlWriter 通过 JDBC 连接远程 Mysql 数据库,并执行相应的 insert into ... 或者 ( replace into ...) 的 sql 语句将数据写入 Mysql,内部会分批次提交入库,需要数据库本身采用 innodb 引擎。 +MysqlWriter 插件实现了写入数据到 Mysql 主库的目的表的功能。在底层实现上, MysqlWriter 通过 JDBC 连接远程 Mysql 数据库,并执行相应的 insert into ... 或者 ( replace into ...) 的 sql 语句将数据写入 Mysql,内部会分批次提交入库,需要数据库本身采用 InnoDB 引擎。 MysqlWriter 面向ETL开发工程师,他们使用 MysqlWriter 从数仓导入数据到 Mysql。同时 MysqlWriter 亦可以作为数据迁移工具为DBA等用户提供服务。 diff --git a/obhbasereader/doc/obhbasereader.md b/obhbasereader/doc/obhbasereader.md new file mode 100644 index 0000000000..675f6ce795 --- /dev/null +++ b/obhbasereader/doc/obhbasereader.md @@ -0,0 +1,178 @@ +OceanBase的table api为应用提供了ObHBase的访问接口,因此,OceanBase的table api的reader与HBase Reader的结构和配置方法类似。 +obhbasereader插件支持sql和hbase api两种读取方式,两种方式存在如下区别: + +1. sql方式可以按照分区或者K值进行数据切片,而hbase api方式的数据切片需要用户手动设置。 +2. sql方式会将从obhbase读取的kqtv形式的数据转换为单一横行,而hbase api则不做行列转换,直接以kqtv形式将数据传递给下游。 +3. sql方式需要配置column属性,hbase api则不需要配置,数据均为固定的kqtv四列。 +4. sql方式仅支持获取获得最新或者最旧版本的数据,而hbase api支持获得多版本数据。 +#### 脚本配置 +```json +{ + "job": { + "setting": { + "speed": { + "channel": 3, + "byte": 104857600 + }, + "errorLimit": { + "record": 10 + } + }, + "content": [ + { + "reader": { + "name": "obhbasereader", + "parameter": { + "username": "username", + "password": "password", + "encoding": "utf8", + "column": [ + { + "name": "f1:column1_1", + "type": "string" + }, + { + "name": "f1:column2_2", + "type": "string" + }, + { + "name": "f1:column1_1", + "type": "string" + }, + { + "name": "f1:column2_2", + "type": "string" + } + ], + "range": [ + { + "startRowkey": "aaa", + "endRowkey": "ccc", + "isBinaryRowkey": false + }, + { + "startRowkey": "eee", + "endRowkey": "zzz", + "isBinaryRowkey": false + } + ], + "mode": "normal", + "readByPartition": "true", + "scanCacheSize": "", + "readerHint": "", + "readBatchSize": "1000", + "connection": [ + { + "table": [ + "htable1", + "htable2" + ], + "jdbcUrl": [ + "||_dsc_ob10_dsc_||集群:租户||_dsc_ob10_dsc_||jdbc:mysql://ip:port/dbName1" + ], + "username": "username", + "password": "password" + }, + { + "table": [ + "htable1", + "htable2" + ], + "jdbcUrl": [ + "jdbc:mysql://ip:port/database" + ] + } + ] + } + }, + "writer": { + "name": "txtfilewriter", + "parameter": { + "path": "/Users/xujing/datax/txtfile", + "charset": "UTF-8", + "fieldDelimiter": ",", + "fileName": "hbase", + "nullFormat": "null", + "writeMode": "truncate" + } + } + } + ] + } +} +``` +##### 参数解释 + +- **connection** + - 描述:配置分库分表的jdbcUrl和分表名。如果一个分库中有多个分表可以用逗号隔开,也可以写成表名[起始序号-截止序号] + - 必须:是 + - 默认值:无 +- **jdbcUrl** + - 描述:连接ob使用的jdbc url,支持如下两种格式: + - jdbc:mysql://obproxyIp:obproxyPort/db + - 此格式下username需要写成三段式格式 + - ||_dsc_ob10_dsc_||集群名:租户名||_dsc_ob10_dsc_||jdbc:mysql://obproxyIp:obproxyPort/db + - 此格式下username仅填写用户名本身,无需三段式写法 + + - 必选:是 + - 默认值:无 +- **table** + - 描述:所选取的需要同步的表。使用JSON的数组描述,因此支持多张表同时抽取。当配置为多张表时,用户自己需保证多张表是同一schema结构,obhbasereader不予检查表是否同一逻辑表。注意,table必须包含在connection配置单元中。 + - 必选:是 + - 默认值:无 +- **readByPartition** + - 描述:使用sql方式读取时,配置**仅**按照分区进行切片。 + - 必须:否 + - 默认值:false +- **partitionName** + - 描述:使用sql方式读取时,标识仅读取指定分区名的数据,用户需要保证配置的分区名在表结构中真实存在(要求严格大小写)。 + - 必须:否 + - 默认值:无 +- **readBatchSize** + - 描述:使用sql方式读取时,分页大小。 + - 必须:否 + - 默认值:10w +- **fetchSize** + - 描述:使用sql方式读取时,控制每次读取数据时从结果集中获取的数据行数。 + - 必须:否 + - 默认值:-2147483648 +- **scanCacheSize** + - 描述:使用hbase api读取时,每次rpc从服务器端读取的行数 + - 必须:否 + - 默认值:256 +- **readerHint** + - 描述:obhbasereader使用sql方式读取时使用的hint + - 必须:否 + - 默认值:/*+READ_CONSISTENCY(weak),QUERY_TIMEOUT(86400000000)*/ +- **column** + - 描述:使用sql方式读取数据时,所配置的表中需要同步的列名集合,使用JSON的数组描述字段信息。 + - 支持列裁剪,即列可以挑选部分列进行导出。 +``` +支持列换序,即列可以不按照表schema信息进行导出,同时支持通配符*,在使用之前需仔细核对列信息。 +``` + +- 必选:sql方式读取时必选 + - 默认值:无 +- **range** + - 描述**:**指定hbasereader读取的rowkey范围 + - 必须:否 + - 默认值:无 +- **username** + - 描述:访问OceanBase的用户名 + - 必选:是 + - 默认值:无 +- **mode** + - 描述:读取obhbase的模式,normal 模式,即仅读取一个版本的数据。 + - 必选:是 + - 默认值:normal +- **version** + - 描述:读取obhbase的版本,当前支持oldest、latest模式,分别表示读取最旧和最新的数据。 + - 必须:是 + - 默认值:oldest + +一些注意点: +注:如果配置了**partitionName**,则无需再配置readByPartition,即便配置了也会忽略readByPartition选项,而是仅会读取指定分区的数据。 +注:如果配置了**readByPartition**,任务将仅按照分区切分任务,而不会再按照K值进行切分。如果是非分区表,则整张表会被当作一个任务而不会再切分。 + + + diff --git a/obhbasereader/pom.xml b/obhbasereader/pom.xml new file mode 100755 index 0000000000..7c8ffb1e5c --- /dev/null +++ b/obhbasereader/pom.xml @@ -0,0 +1,153 @@ + + 4.0.0 + + com.alibaba.datax + datax-all + 0.0.1-SNAPSHOT + + + obhbasereader + com.alibaba.datax + obhbasereader + 0.0.1-SNAPSHOT + + + + com.alibaba.datax + datax-core + ${datax-project-version} + provided + + + com.alibaba.datax + oceanbasev10reader + 0.0.1-SNAPSHOT + + + guava + com.google.guava + + + + + org.apache.zookeeper + zookeeper + 3.3.2 + + + log4j + log4j + + + + + commons-collections + commons-collections + 3.2.1 + + + + + + + + + + com.oceanbase + obkv-hbase-client + 0.1.4.2 + + + guava + com.google.guava + + + + + + com.google.guava + guava + ${guava-version} + + + + org.json + json + 20160810 + + + junit + junit + 4.11 + test + + + org.powermock + powermock-module-junit4 + 1.4.10 + test + + + org.powermock + powermock-api-mockito + 1.4.10 + test + + + org.mockito + mockito-core + 1.8.5 + test + + + + + + + + src/main/java + + **/*.properties + + + + + + + maven-compiler-plugin + + ${jdk-version} + ${jdk-version} + ${project-sourceEncoding} + + + + + maven-assembly-plugin + + + src/main/assembly/package.xml + + datax + + + + dwzip + package + + single + + + + + + + + diff --git a/obhbasereader/src/main/assembly/package.xml b/obhbasereader/src/main/assembly/package.xml new file mode 100755 index 0000000000..43da622d5c --- /dev/null +++ b/obhbasereader/src/main/assembly/package.xml @@ -0,0 +1,35 @@ + + + + dir + + false + + + src/main/resources + + plugin.json + plugin_job_template.json + + plugin/reader/obhbasereader + + + target/ + + obhbasereader-0.0.1-SNAPSHOT.jar + + plugin/reader/obhbasereader + + + + + + false + plugin/reader/obhbasereader/libs + runtime + + + diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/Constant.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/Constant.java new file mode 100755 index 0000000000..40dd32d282 --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/Constant.java @@ -0,0 +1,34 @@ +package com.alibaba.datax.plugin.reader.obhbasereader; + +import ch.qos.logback.classic.Level; + +public final class Constant { + public static final String ROWKEY_FLAG = "rowkey"; + public static final int DEFAULT_SCAN_CACHE = 256; + public static final int DEFAULT_FETCH_SIZE = Integer.MIN_VALUE; + public static final int DEFAULT_READ_BATCH_SIZE = 100000; + // timeout:24 * 3600 = 86400s + public static final String OB_READ_HINT = "/*+READ_CONSISTENCY(weak),QUERY_TIMEOUT(86400000000)*/"; + public static final String DEFAULT_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss"; + public static final String DEFAULT_ENCODING = "UTF-8"; + public static final String DEFAULT_TIMEZONE = "UTC"; + public static final boolean DEFAULT_USE_SQLREADER = true; + public static final boolean DEFAULT_USE_ODPMODE = true; + public static final String OB_TABLE_CLIENT_PROPERTY = "logging.path.com.alipay.oceanbase-table-client"; + public static final String OB_TABLE_HBASE_PROPERTY = "logging.path.com.alipay.oceanbase-table-hbase"; + public static final String OB_TABLE_CLIENT_LOG_LEVEL = "logging.level.oceanbase-table-client"; + public static final String OB_TABLE_HBASE_LOG_LEVEL = "logging.level.oceanbase-table-hbase"; + public static final String OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL = "logging.level.com.alipay.oceanbase-table-client"; + public static final String OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL = "logging.level.com.alipay.oceanbase-table-hbase"; + public static final String OB_HBASE_LOG_PATH = System.getProperty("datax.home") + "/log/"; + public static final String DEFAULT_OB_TABLE_CLIENT_LOG_LEVEL = Level.OFF.toString(); + public static final String DEFAULT_OB_TABLE_HBASE_LOG_LEVEL = Level.OFF.toString(); + public static final String OBMYSQL_KEYWORDS = + "CUME_DIST,DENSE_RANK,EMPTY,FIRST_VALUE,GROUPING,GROUPS,INTERSECT,JSON_TABLE,LAG,LAST_VALUE,LATERAL,LEAD,NTH_VALUE,NTILE,OF,OVER,PERCENT_RANK,RANK,RECURSIVE,ROW_NUMBER,SYSTEM,WINDOW,ACCESSIBLE,ACCOUNT,ACTION,ADD,AFTER,AGAINST,AGGREGATE,ALGORITHM,ALL,ALTER,ALWAYS,ANALYSE,AND,ANY,AS,ASC,ASCII,ASENSITIVE,AT,AUTO_INCREMENT,AUTOEXTEND_SIZE,AVG,AVG_ROW_LENGTH,BACKUP,BEFORE,BEGIN,BETWEEN,BIGINT,BINARY,BINLOG,BIT,BLOB,BLOCK,BOOL,BOOLEAN,BOTH,BTREE,BY,BYTE,CACHE,CALL,CASCADE,CASCADED,CASE,CATALOG_NAME,CHAIN,CHANGE,CHANGED,CHANNEL,CHAR,CHARACTER,CHARSET,CHECK,CHECKSUM,CIPHER,CLASS_ORIGIN,CLIENT,CLOSE,COALESCE,CODE,COLLATE,COLLATION,COLUMN,COLUMN_FORMAT,COLUMN_NAME,COLUMNS,COMMENT,COMMIT,COMMITTED,COMPACT,COMPLETION,COMPRESSED,COMPRESSION,CONCURRENT,CONDITION,CONNECTION,CONSISTENT,CONSTRAINT,CONSTRAINT_CATALOG,CONSTRAINT_NAME,CONSTRAINT_SCHEMA,CONTAINS,CONTEXT,CONTINUE,CONVERT,CPU,CREATE,CROSS,CUBE,CURRENT,CURRENT_DATE,CURRENT_TIME,CURRENT_TIMESTAMP,CURRENT_USER,CURSOR," + + "CURSOR_NAME,DATA,DATABASE,DATABASES,DATAFILE,DATE,DATETIME,DAY,DAY_HOUR,DAY_MICROSECOND,DAY_MINUTE,DAY_SECOND,DEALLOCATE,DEC,DECIMAL,DECLARE,DEFAULT,DEFAULT_AUTH,DEFINER,DELAY_KEY_WRITE,DELAYED,DELETE,DES_KEY_FILE,DESC,DESCRIBE,DETERMINISTIC,DIAGNOSTICS,DIRECTORY,DISABLE,DISCARD,DISK,DISTINCT,DISTINCTROW,DIV,DO,DOUBLE,DROP,DUAL,DUMPFILE,DUPLICATE,DYNAMIC,EACH,ELSE,ELSEIF,ENABLE,ENCLOSED,ENCRYPTION,END,ENDS,ENGINE,ENGINES,ENUM,ERROR,ERRORS,ESCAPE,ESCAPED,EVENT,EVENTS,EVERY,EXCHANGE,EXECUTE,EXISTS,EXIT,EXPANSION,EXPIRE,EXPLAIN,EXPORT,EXTENDED,EXTENT_SIZE,FAST,FAULTS,FETCH,FIELDS,FILE,FILE_BLOCK_SIZE,FILTER,FIRST,FIXED,FLOAT,FLOAT4,FLOAT8,FLUSH,FOLLOWS,FOR,FORCE,FOREIGN,FORMAT,FOUND,FROM,FULL,FULLTEXT,FUNCTION,GENERAL,GENERATED,GEOMETRY,GEOMETRYCOLLECTION,GET,GET_FORMAT,GLOBAL,GRANT,GRANTS,GROUP,GROUP_REPLICATION,HANDLER,HASH,HAVING,HELP,HIGH_PRIORITY,HOST,HOSTS,HOUR,HOUR_MICROSECOND,HOUR_MINUTE,HOUR_SECOND,IDENTIFIED,IF,IGNORE,IGNORE_SERVER_IDS,IMPORT,IN,INDEX," + + "INDEXES," + "INFILE,INITIAL_SIZE,INNER,INOUT,INSENSITIVE,INSERT,INSERT_METHOD,INSTALL,INSTANCE,INT,INT1,INT2,INT3,INT4,INT8,INTEGER,INTERVAL,INTO,INVOKE,INVOKER,IO,IO_AFTER_GTIDS,IO_BEFORE_GTIDS,IO_THREAD,IPC,IS,ISOLATION,ISSUER,ITERATE,JOIN,JSON,KEY,KEY_BLOCK_SIZE,KEYS,KILL,LANGUAGE,LAST,LEADING,LEAVE,LEAVES,LEFT,LESS,LEVEL,LIKE,LIMIT,LINEAR,LINES,LINESTRING,LIST,LOAD,LOCAL,LOCALTIME,LOCALTIMESTAMP,LOCK,LOCKS,LOGFILE,LOGS,LONG,LONGBLOB,LONGTEXT,LOOP,LOW_PRIORITY,MASTER,MASTER_AUTO_POSITION,MASTER_BIND,MASTER_CONNECT_RETRY,MASTER_DELAY,MASTER_HEARTBEAT_PERIOD,MASTER_HOST,MASTER_LOG_FILE,MASTER_LOG_POS,MASTER_PASSWORD,MASTER_PORT,MASTER_RETRY_COUNT,MASTER_SERVER_ID,MASTER_SSL,MASTER_SSL_CA,MASTER_SSL_CAPATH,MASTER_SSL_CERT,MASTER_SSL_CIPHER,MASTER_SSL_CRL,MASTER_SSL_CRLPATH,MASTER_SSL_KEY,MASTER_SSL_VERIFY_SERVER_CERT,MASTER_TLS_VERSION,MASTER_USER,MATCH,MAX_CONNECTIONS_PER_HOUR,MAX_QUERIES_PER_HOUR,MAX_ROWS,MAX_SIZE,MAX_STATEMENT_TIME,MAX_UPDATES_PER_HOUR," + + "MAX_USER_CONNECTIONS," + + "MAXVALUE,MEDIUM,MEDIUMBLOB,MEDIUMINT,MEDIUMTEXT,MEMORY,MERGE,MESSAGE_TEXT,MICROSECOND,MIDDLEINT,MIGRATE,MIN_ROWS,MINUTE,MINUTE_MICROSECOND,MINUTE_SECOND,MOD,MODE,MODIFIES,MODIFY,MONTH,MULTILINESTRING,MULTIPOINT,MULTIPOLYGON,MUTEX,MYSQL_ERRNO,NAME,NAMES,NATIONAL,NATURAL,NCHAR,NDB,NDBCLUSTER,NEVER,NEW,NEXT,NO,NO_WAIT,NO_WRITE_TO_BINLOG,NODEGROUP,NONBLOCKING,NONE,NOT,NUMBER,NUMERIC,NVARCHAR,OFFSET,OLD_PASSWORD,ON,ONE,ONLY,OPEN,OPTIMIZE,OPTIMIZER_COSTS,OPTION,OPTIONALLY,OPTIONS,OR,ORDER,OUT,OUTER,OUTFILE,OWNER,PACK_KEYS,PAGE,PARSE_GCOL_EXPR,PARSER,PARTIAL,PARTITION,PARTITIONING,PARTITIONS,PASSWORD,PHASE,PLUGIN,PLUGIN_DIR,PLUGINS,POINT,POLYGON,PORT,PRECEDES,PRECISION,PREPARE,PRESERVE,PREV,PRIMARY,PRIVILEGES,PROCEDURE,PROCESSLIST,PROFILE,PROFILES,PROXY,PURGE,QUARTER,QUERY,QUICK,RANGE,READ,READ_ONLY,READ_WRITE,READS,REAL,REBUILD,RECOVER,REDO_BUFFER_SIZE,REDOFILE,REDUNDANT,REFERENCES,REGEXP,RELAY,RELAY_LOG_FILE,RELAY_LOG_POS,RELAY_THREAD,RELAYLOG,RELEASE,RELOAD,REMOVE," + + "RENAME,REORGANIZE,REPAIR,REPEAT,REPEATABLE,REPLACE,REPLICATE_DO_DB,REPLICATE_DO_TABLE,REPLICATE_IGNORE_DB,REPLICATE_IGNORE_TABLE,REPLICATE_REWRITE_DB,REPLICATE_WILD_DO_TABLE,REPLICATE_WILD_IGNORE_TABLE,REPLICATION,REQUIRE,RESET,RESIGNAL,RESTORE,RESTRICT,RESUME,RETURN,RETURNED_SQLSTATE,RETURNS,REVERSE,REVOKE,RIGHT,RLIKE,ROLLBACK,ROLLUP,ROTATE,ROUTINE,ROW,ROW_COUNT,ROW_FORMAT,ROWS,RTREE,SAVEPOINT,SCHEDULE,SCHEMA,SCHEMA_NAME,SCHEMAS,SECOND,SECOND_MICROSECOND,SECURITY,SELECT,SENSITIVE,SEPARATOR,SERIAL,SERIALIZABLE,SERVER,SESSION,SET,SHARE,SHOW,SHUTDOWN,SIGNAL,SIGNED,SIMPLE,SLAVE,SLOW,SMALLINT,SNAPSHOT,SOCKET,SOME,SONAME,SOUNDS,SOURCE,SPATIAL,SPECIFIC,SQL,SQL_AFTER_GTIDS,SQL_AFTER_MTS_GAPS,SQL_BEFORE_GTIDS,SQL_BIG_RESULT,SQL_BUFFER_RESULT,SQL_CACHE,SQL_CALC_FOUND_ROWS,SQL_NO_CACHE,SQL_SMALL_RESULT,SQL_THREAD,SQL_TSI_DAY,SQL_TSI_HOUR,SQL_TSI_MINUTE,SQL_TSI_MONTH,SQL_TSI_QUARTER,SQL_TSI_SECOND,SQL_TSI_WEEK,SQL_TSI_YEAR,SQLEXCEPTION,SQLSTATE,SQLWARNING,SSL,STACKED," + + "START," + "STARTING,STARTS,STATS_AUTO_RECALC,STATS_PERSISTENT,STATS_SAMPLE_PAGES,STATUS,STOP,STORAGE,STORED,STRAIGHT_JOIN,STRING,SUBCLASS_ORIGIN,SUBJECT,SUBPARTITION,SUBPARTITIONS,SUPER,SUSPEND,SWAPS,SWITCHES,TABLE,TABLE_CHECKSUM,TABLE_NAME,TABLES,TABLESPACE,TEMPORARY,TEMPTABLE,TERMINATED,TEXT,THAN,THEN,TIME,TIMESTAMP,TIMESTAMPADD,TIMESTAMPDIFF,TINYBLOB,TINYINT,TINYTEXT,TO,TRAILING,TRANSACTION,TRIGGER,TRIGGERS,TRUNCATE,TYPE,TYPES,UNCOMMITTED,UNDEFINED,UNDO,UNDO_BUFFER_SIZE,UNDOFILE,UNICODE,UNINSTALL,UNION,UNIQUE,UNKNOWN,UNLOCK,UNSIGNED,UNTIL,UPDATE,UPGRADE,USAGE,USE,USE_FRM,USER,USER_RESOURCES,USING,UTC_DATE,UTC_TIME,UTC_TIMESTAMP,VALIDATION,VALUE,VALUES,VARBINARY,VARCHAR,VARCHARACTER,VARIABLES,VARYING,VIEW,VIRTUAL,WAIT,WARNINGS,WEEK,WEIGHT_STRING,WHEN,WHERE,WHILE,WITH,WITHOUT,WORK,WRAPPER,WRITE,X509,XA,XID,XML,XOR,YEAR,YEAR_MONTH,ZEROFILL,FALSE,TRUE"; +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/HTableManager.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/HTableManager.java new file mode 100755 index 0000000000..c36114fbb8 --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/HTableManager.java @@ -0,0 +1,19 @@ +package com.alibaba.datax.plugin.reader.obhbasereader; + +import com.alipay.oceanbase.hbase.OHTable; +import org.apache.hadoop.conf.Configuration; + +import java.io.IOException; + +public final class HTableManager { + + public static OHTable createHTable(Configuration config, String tableName) throws IOException { + return new OHTable(config, tableName); + } + + public static void closeHTable(OHTable hTable) throws IOException { + if (hTable != null) { + hTable.close(); + } + } +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/HbaseColumnCell.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/HbaseColumnCell.java new file mode 100755 index 0000000000..1f794ae0aa --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/HbaseColumnCell.java @@ -0,0 +1,124 @@ +package com.alibaba.datax.plugin.reader.obhbasereader; + +import com.alibaba.datax.common.base.BaseObject; +import com.alibaba.datax.plugin.reader.obhbasereader.enums.ColumnType; +import com.alibaba.datax.plugin.reader.obhbasereader.util.ObHbaseReaderUtil; + +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.Validate; +import org.apache.hadoop.hbase.util.Bytes; + +/** + * 描述 hbasereader 插件中,column 配置中的一个单元项实体 + */ +public class HbaseColumnCell extends BaseObject { + private ColumnType columnType; + + // columnName 格式为:列族:列名 + private String columnName; + + private byte[] cf; + private byte[] qualifier; + + //对于常量类型,其常量值放到 columnValue 里 + private String columnValue; + + //当配置了 columnValue 时,isConstant=true(这个成员变量是用于方便使用本类的地方判断是否是常量类型字段) + private boolean isConstant; + + // 只在类型是时间类型时,才会设置该值,无默认值。形式如:yyyy-MM-dd HH:mm:ss + private String dateformat; + + private HbaseColumnCell(Builder builder) { + this.columnType = builder.columnType; + + //columnName 和 columnValue 必须有一个为 null + Validate.isTrue(builder.columnName == null || builder.columnValue == null, "In obhbasereader, column cannot configure both column name and column value. Choose one of them."); + + //columnName 和 columnValue 不能都为 null + Validate.isTrue(builder.columnName != null || builder.columnValue != null, "In obhbasereader, column cannot configure both column name and column value. Choose one of them."); + + if (builder.columnName != null) { + this.isConstant = false; + this.columnName = builder.columnName; + + // 如果 columnName 不是 rowkey,则必须配置为:列族:列名 格式 + if (!ObHbaseReaderUtil.isRowkeyColumn(this.columnName)) { + + String promptInfo = "In obhbasereader, the column configuration format of column should be: 'family:column'. The column you configured is wrong:" + this.columnName; + String[] cfAndQualifier = this.columnName.split(":"); + Validate.isTrue(cfAndQualifier.length == 2 && StringUtils.isNotBlank(cfAndQualifier[0]) && StringUtils.isNotBlank(cfAndQualifier[1]), promptInfo); + + this.cf = Bytes.toBytes(cfAndQualifier[0].trim()); + this.qualifier = Bytes.toBytes(cfAndQualifier[1].trim()); + } + } else { + this.isConstant = true; + this.columnValue = builder.columnValue; + } + + if (builder.dateformat != null) { + this.dateformat = builder.dateformat; + } + } + + public ColumnType getColumnType() { + return columnType; + } + + public String getColumnName() { + return columnName; + } + + public byte[] getCf() { + return cf; + } + + public byte[] getQualifier() { + return qualifier; + } + + public String getDateformat() { + return dateformat; + } + + public String getColumnValue() { + return columnValue; + } + + public boolean isConstant() { + return isConstant; + } + + // 内部 builder 类 + public static class Builder { + private ColumnType columnType; + private String columnName; + private String columnValue; + + private String dateformat; + + public Builder(ColumnType columnType) { + this.columnType = columnType; + } + + public Builder columnName(String columnName) { + this.columnName = columnName; + return this; + } + + public Builder columnValue(String columnValue) { + this.columnValue = columnValue; + return this; + } + + public Builder dateformat(String dateformat) { + this.dateformat = dateformat; + return this; + } + + public HbaseColumnCell build() { + return new HbaseColumnCell(this); + } + } +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/HbaseReaderErrorCode.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/HbaseReaderErrorCode.java new file mode 100755 index 0000000000..551b19b630 --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/HbaseReaderErrorCode.java @@ -0,0 +1,36 @@ +package com.alibaba.datax.plugin.reader.obhbasereader; + +import com.alibaba.datax.common.spi.ErrorCode; + +public enum HbaseReaderErrorCode implements ErrorCode { + REQUIRED_VALUE("ObHbaseReader-00", "Missing required parameters."), + ILLEGAL_VALUE("ObHbaseReader-01", "Illegal configuration."), + PREPAR_READ_ERROR("ObHbaseReader-02", "Preparing to read ObHBase error."), + SPLIT_ERROR("ObHbaseReader-03", "Splitting ObHBase table error."), + INIT_TABLE_ERROR("ObHbaseReader-04", "Initializing ObHBase extraction table error"), + PARSE_COLUMN_ERROR("ObHbaseReader-05", "Parse column failed."), + READ_ERROR("ObHbaseReader-06", "Read ObHBase error."); + + private final String code; + private final String description; + + private HbaseReaderErrorCode(String code, String description) { + this.code = code; + this.description = description; + } + + @Override + public String getCode() { + return this.code; + } + + @Override + public String getDescription() { + return this.description; + } + + @Override + public String toString() { + return String.format("Code:[%s], Description:[%s]. ", this.code, this.description); + } +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/Key.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/Key.java new file mode 100755 index 0000000000..6415efd098 --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/Key.java @@ -0,0 +1,103 @@ +package com.alibaba.datax.plugin.reader.obhbasereader; + +public final class Key { + + public final static String HBASE_CONFIG = "hbaseConfig"; + + /** + * mode 可以取 normal 或者 multiVersionFixedColumn 或者 multiVersionDynamicColumn 三个值,无默认值。 + * + * normal 配合 column(Map 结构的)使用 + * + * multiVersionFixedColumn 配合 maxVersion,tetradType, column(List 结构的)使用 + * + * multiVersionDynamicColumn 配合 maxVersion,tetradType, columnFamily(List 结构的)使用 + */ + public final static String MODE = "mode"; + + /** + * 配合 mode = multiVersion 时使用,指明需要读取的版本个数。无默认值 + * -1 表示去读全部版本 + * 不能为0,1 + * >1 表示最多读取对应个数的版本数(不能超过 Integer 的最大值) + */ + public final static String MAX_VERSION = "maxVersion"; + + /** + * 多版本情况下,必须配置 四元组的类型(rowkey,column,timestamp,value) + */ + public final static String TETRAD_TYPE = "tetradType"; + + /** + * 默认为 utf8 + */ + public final static String ENCODING = "encoding"; + + public final static String TABLE = "table"; + + public final static String USERNAME = "username"; + + public final static String OB_SYS_USERNAME = "obSysUser"; + + public final static String CONFIG_URL = "obConfigUrl"; + + public final static String ODP_HOST = "odpHost"; + + public final static String ODP_PORT = "odpPort"; + + public final static String DB_NAME = "dbName"; + + public final static String PASSWORD = "password"; + + public final static String OB_SYS_PASSWORD = "obSysPassword"; + + public final static String COLUMN_FAMILY = "columnFamily"; + + public final static String COLUMN = "column"; + + public final static String START_ROWKEY = "startRowkey"; + + public final static String END_ROWKEY = "endRowkey"; + + public final static String IS_BINARY_ROWKEY = "isBinaryRowkey"; + + public final static String SCAN_CACHE = "scanCache"; + + public final static String RS_URL = "rsUrl"; + + public final static String MAX_ACTIVE_CONNECTION = "maxActiveConnection"; + + public final static int DEFAULT_MAX_ACTIVE_CONNECTION = 2000; + + public final static String TIMEOUT = "timeout"; + + public final static long DEFAULT_TIMEOUT = 30; + + public final static String PARTITION_NAME = "partitionName"; + + public final static String JDBC_URL = "jdbcUrl"; + + public final static String TIMEZONE = "timezone"; + + public final static String FETCH_SIZE = "fetchSize"; + + public final static String READ_BATCH_SIZE = "readBatchSize"; + + public final static String SESSION = "session"; + + public final static String READER_HINT = "readerHint"; + + public final static String QUERY_SQL = "querySql"; + + public final static String SAMPLE_PERCENTAGE = "samplePercentage"; + // 是否使用独立密码 + public final static String USE_SPECIAL_SECRET = "useSpecialSecret"; + + public final static String USE_SQL_READER = "useSqlReader"; + + public final static String USE_ODP_MODE = "useOdpMode"; + + public final static String RANGE = "range"; + + public final static String READ_BY_PARTITION = "readByPartition"; +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/LocalStrings.properties b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/LocalStrings.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/LocalStrings_en_US.properties b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/LocalStrings_en_US.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/LocalStrings_ja_JP.properties b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/LocalStrings_ja_JP.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/LocalStrings_zh_CN.properties b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/LocalStrings_zh_CN.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/LocalStrings_zh_HK.properties b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/LocalStrings_zh_HK.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/LocalStrings_zh_TW.properties b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/LocalStrings_zh_TW.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/ObHbaseReader.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/ObHbaseReader.java new file mode 100755 index 0000000000..15472d6eaf --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/ObHbaseReader.java @@ -0,0 +1,445 @@ +package com.alibaba.datax.plugin.reader.obhbasereader; + +import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_OB_TABLE_CLIENT_LOG_LEVEL; +import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_OB_TABLE_HBASE_LOG_LEVEL; +import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_USE_ODPMODE; +import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL; +import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL; +import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_HBASE_LOG_PATH; +import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_TABLE_CLIENT_LOG_LEVEL; +import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_TABLE_CLIENT_PROPERTY; +import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_TABLE_HBASE_LOG_LEVEL; +import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_TABLE_HBASE_PROPERTY; +import static org.apache.commons.lang3.StringUtils.EMPTY; + +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.RecordSender; +import com.alibaba.datax.common.spi.Reader; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.reader.Constant; +import com.alibaba.datax.plugin.rdbms.reader.util.ObVersion; +import com.alibaba.datax.plugin.rdbms.util.DBUtil; +import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; +import com.alibaba.datax.plugin.rdbms.util.TableExpandUtil; +import com.alibaba.datax.plugin.reader.obhbasereader.enums.ModeType; +import com.alibaba.datax.plugin.reader.obhbasereader.ext.ServerConnectInfo; +import com.alibaba.datax.plugin.reader.obhbasereader.task.AbstractHbaseTask; +import com.alibaba.datax.plugin.reader.obhbasereader.task.SQLNormalModeReader; +import com.alibaba.datax.plugin.reader.obhbasereader.task.ScanMultiVersionReader; +import com.alibaba.datax.plugin.reader.obhbasereader.task.ScanNormalModeReader; +import com.alibaba.datax.plugin.reader.obhbasereader.util.HbaseSplitUtil; +import com.alibaba.datax.plugin.reader.obhbasereader.util.ObHbaseReaderUtil; +import com.alibaba.datax.plugin.reader.obhbasereader.util.SqlReaderSplitUtil; +import com.alibaba.datax.plugin.reader.oceanbasev10reader.util.ObReaderUtils; + +import com.google.common.base.Preconditions; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.Validate; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.Connection; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +/** + * ObHbaseReader 支持分库分表 + * 仅支持ob3.x及以上版本 + */ +public class ObHbaseReader extends Reader { + + public static class Job extends Reader.Job { + static private final String ACCESS_DENIED_ERROR = "Access denied for user"; + private static Logger LOG = LoggerFactory.getLogger(ObHbaseReader.class); + private Configuration originalConfig; + + @Override + public void init() { + if (System.getProperty(OB_TABLE_CLIENT_PROPERTY) == null) { + LOG.info(OB_TABLE_CLIENT_PROPERTY + " not set"); + System.setProperty(OB_TABLE_CLIENT_PROPERTY, OB_HBASE_LOG_PATH); + } + if (System.getProperty(OB_TABLE_HBASE_PROPERTY) == null) { + LOG.info(OB_TABLE_HBASE_PROPERTY + " not set"); + System.setProperty(OB_TABLE_HBASE_PROPERTY, OB_HBASE_LOG_PATH); + } + if (System.getProperty(OB_TABLE_CLIENT_LOG_LEVEL) == null) { + LOG.info(OB_TABLE_CLIENT_LOG_LEVEL + " not set"); + System.setProperty(OB_TABLE_CLIENT_LOG_LEVEL, DEFAULT_OB_TABLE_CLIENT_LOG_LEVEL); + } + if (System.getProperty(OB_TABLE_HBASE_LOG_LEVEL) == null) { + LOG.info(OB_TABLE_HBASE_LOG_LEVEL + " not set"); + System.setProperty(OB_TABLE_HBASE_LOG_LEVEL, DEFAULT_OB_TABLE_HBASE_LOG_LEVEL); + } + if (System.getProperty(OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL) == null) { + LOG.info(OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL + " not set"); + System.setProperty(OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL, DEFAULT_OB_TABLE_CLIENT_LOG_LEVEL); + } + if (System.getProperty(OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL) == null) { + LOG.info(OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL + " not set"); + System.setProperty(OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL, DEFAULT_OB_TABLE_HBASE_LOG_LEVEL); + } + + LOG.info("{} is set to {}, {} is set to {}", + OB_TABLE_CLIENT_PROPERTY, OB_HBASE_LOG_PATH, OB_TABLE_HBASE_PROPERTY, OB_HBASE_LOG_PATH); + this.originalConfig = super.getPluginJobConf(); + ObHbaseReaderUtil.doPretreatment(originalConfig); + List conns = originalConfig.getList(Constant.CONN_MARK, Object.class); + // 逻辑表配置 + Preconditions.checkArgument(CollectionUtils.isNotEmpty(conns), "connection information is empty."); + dealLogicConnAndTable(conns); + if (LOG.isDebugEnabled()) { + LOG.debug("After init(), now originalConfig is:\n{}\n", this.originalConfig); + } + } + + @Override + public void destroy() { + } + + private void dealLogicConnAndTable(List conns) { + String unifiedUsername = originalConfig.getString(Key.USERNAME); + String unifiedPassword = originalConfig.getString(Key.PASSWORD); + boolean useSqlReader = originalConfig.getBool(Key.USE_SQL_READER, com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_USE_SQLREADER); + boolean checkSlave = originalConfig.getBool(com.alibaba.datax.plugin.rdbms.reader.Key.CHECK_SLAVE, false); + Set keywords = Arrays.stream(com.alibaba.datax.plugin.reader.obhbasereader.Constant.OBMYSQL_KEYWORDS.split(",")).collect(Collectors.toSet()); + List preSql = originalConfig.getList(com.alibaba.datax.plugin.rdbms.reader.Key.PRE_SQL, String.class); + + int tableNum = 0; + + for (int i = 0, len = conns.size(); i < len; i++) { + Configuration connConf = Configuration.from(conns.get(i).toString()); + String curUsername = connConf.getString(Key.USERNAME, unifiedUsername); + Preconditions.checkArgument(StringUtils.isNotEmpty(curUsername), "username is empty."); + String curPassword = connConf.getString(Key.PASSWORD, unifiedPassword); + + originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, i, Key.USERNAME), curUsername); + originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, i, Key.PASSWORD), curPassword); + + List jdbcUrls = connConf.getList(Key.JDBC_URL, new ArrayList<>(), String.class); + String jdbcUrl; + if (useSqlReader) { + // sql模式下,jdbcUrl必须配置,只有使用sql模式的情况才检查地址 + Preconditions.checkArgument(CollectionUtils.isNotEmpty(jdbcUrls), "if using sql mode, jdbcUrl is needed"); + jdbcUrl = DBUtil.chooseJdbcUrlWithoutRetry(DataBaseType.MySql, jdbcUrls, curUsername, curPassword, preSql, checkSlave); + jdbcUrl = DataBaseType.MySql.appendJDBCSuffixForReader(jdbcUrl); + // 回写到connection[i].jdbcUrl + originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, i, Key.JDBC_URL), jdbcUrl); + LOG.info("Available jdbcUrl:{}.", jdbcUrl); + } else { + jdbcUrl = jdbcUrls.get(0); + jdbcUrl = StringUtils.isNotBlank(jdbcUrl) ? DataBaseType.MySql.appendJDBCSuffixForReader(jdbcUrl) : EMPTY; + checkAndSetHbaseConnConf(jdbcUrl, curUsername, curPassword, connConf, i); + } + + // table 方式 + // 对每一个connection 上配置的table 项进行解析(已对表名称进行了 ` 处理的) + List tables = connConf.getList(Key.TABLE, String.class); + + List expandedTables = TableExpandUtil.expandTableConf(DataBaseType.MySql, tables); + + if (expandedTables.isEmpty()) { + throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_VALUE, "The specified table list is empty."); + } + + for (int ti = 0; ti < expandedTables.size(); ti++) { + String tableName = expandedTables.get(ti); + if (keywords.contains(tableName.toUpperCase())) { + expandedTables.set(ti, "`" + tableName + "`"); + } + } + tableNum += expandedTables.size(); + originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, i, Key.TABLE), expandedTables); + } + + if (tableNum == 0) { + // 分库分表读,未匹配到可以抽取的表 + LOG.error("sharding rule result is empty."); + throw DataXException.asDataXException("No tables were matched"); + } + originalConfig.set(Constant.TABLE_NUMBER_MARK, tableNum); + } + + /** + * In public cloud, only odp mode can be used. + * In private cloud, both odp mode and ocp mode can be used. + * + * @param jdbcUrl + * @param curUsername + * @param curPassword + * @param connConf + */ + private void checkAndSetHbaseConnConf(String jdbcUrl, String curUsername, String curPassword, Configuration connConf, int curIndex) { + ServerConnectInfo serverConnectInfo = new ServerConnectInfo(jdbcUrl, curUsername, curPassword); + if (!originalConfig.getBool(Key.USE_ODP_MODE, false)) { + // Normally, only need to query at first time + // In ocp mode, dbName, configUrl, sysUser and sysPass are needed. + String sysUser = connConf.getString(Key.OB_SYS_USERNAME, originalConfig.getString(Key.OB_SYS_USERNAME)); + String sysPass = connConf.getString(Key.OB_SYS_PASSWORD, originalConfig.getString(Key.OB_SYS_PASSWORD)); + serverConnectInfo.setSysUser(sysUser); + serverConnectInfo.setSysPass(sysPass); + String configUrl = connConf.getString(Key.CONFIG_URL, originalConfig.getString(Key.CONFIG_URL)); + if (StringUtils.isBlank(configUrl)) { + configUrl = queryRsUrl(serverConnectInfo); + } + originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, curIndex, Key.USERNAME), curUsername); + originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, curIndex, Key.OB_SYS_USERNAME), serverConnectInfo.sysUser); + originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, curIndex, Key.OB_SYS_PASSWORD), serverConnectInfo.sysPass); + originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, curIndex, Key.CONFIG_URL), configUrl); + } else { + // In odp mode, dbName, odp host and odp port are needed. + String odpHost = connConf.getString(Key.ODP_HOST, serverConnectInfo.host); + String odpPort = connConf.getString(Key.ODP_PORT, serverConnectInfo.port); + originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, curIndex, Key.ODP_HOST), odpHost); + originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, curIndex, Key.ODP_PORT), odpPort); + } + originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, curIndex, Key.DB_NAME), serverConnectInfo.databaseName); + } + + private String queryRsUrl(ServerConnectInfo serverInfo) { + Preconditions.checkArgument(checkVersionAfterV3(serverInfo.jdbcUrl, serverInfo.getFullUserName(), serverInfo.password), "ob before 3.x is not supported."); + String configUrl = originalConfig.getString(Key.CONFIG_URL, null); + if (configUrl == null) { + try { + Connection conn = null; + int retry = 0; + final String sysJDBCUrl = serverInfo.jdbcUrl.replace(serverInfo.databaseName, "oceanbase"); + do { + try { + if (retry > 0) { + int sleep = retry > 9 ? 500 : 1 << retry; + try { + TimeUnit.SECONDS.sleep(sleep); + } catch (InterruptedException e) { + } + LOG.warn("retry fetch RsUrl the {} times", retry); + } + conn = DBUtil.getConnection(DataBaseType.OceanBase, sysJDBCUrl, serverInfo.sysUser, serverInfo.sysPass); + String sql = "show parameters like 'obconfig_url'"; + LOG.info("query param: {}", sql); + PreparedStatement stmt = conn.prepareStatement(sql); + ResultSet result = stmt.executeQuery(); + if (result.next()) { + configUrl = result.getString("Value"); + } + if (StringUtils.isNotBlank(configUrl)) { + break; + } + } catch (Exception e) { + ++retry; + LOG.warn("fetch root server list(rsList) error {}", e.getMessage()); + } finally { + DBUtil.closeDBResources(null, conn); + } + } while (retry < 3); + + LOG.info("configure url is: " + configUrl); + originalConfig.set(Key.CONFIG_URL, configUrl); + } catch (Exception e) { + LOG.error("Fail to get configure url: {}", e.getMessage(), e); + throw DataXException.asDataXException(HbaseReaderErrorCode.REQUIRED_VALUE, "未配置obConfigUrl,且无法获取obConfigUrl"); + } + } + return configUrl; + } + + @Override + public void prepare() { + } + + @Override + public void post() { + } + + @Override + public List split(int adviceNumber) { + Map hbaseColumnCells = ObHbaseReaderUtil.parseColumn(originalConfig.getList(Key.COLUMN, Map.class)); + if (hbaseColumnCells.size() == 0) { + LOG.error("no column cells specified."); + throw new RuntimeException("no column cells specified"); + } + String columnFamily = ObHbaseReaderUtil.parseColumnFamily(hbaseColumnCells.values()); + Preconditions.checkArgument(StringUtils.isNotEmpty(columnFamily), "column family is empty."); + List conns = originalConfig.getList(Constant.CONN_MARK, Object.class); + Preconditions.checkArgument(conns != null && !conns.isEmpty(), "connection information is necessary."); + return splitLogicTables(adviceNumber, conns, columnFamily); + } + + private List splitLogicTables(int adviceNumber, List conns, String columnFamily) { + // adviceNumber这里是channel数量大小, 即datax并发task数量 + // eachTableShouldSplittedNumber是单表应该切分的份数 + int eachTableShouldSplittedNumber = (int) Math.ceil(1.0 * adviceNumber / originalConfig.getInt(Constant.TABLE_NUMBER_MARK)); + boolean useSqlReader = originalConfig.getBool(Key.USE_SQL_READER, com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_USE_SQLREADER); + boolean odpMode = originalConfig.getBool(Key.USE_ODP_MODE, DEFAULT_USE_ODPMODE); + boolean readByPartition = originalConfig.getBool(Key.READ_BY_PARTITION, false); + List splittedConfigs = new ArrayList<>(); + + for (int i = 0, len = conns.size(); i < len; i++) { + Configuration sliceConfig = originalConfig.clone(); + Configuration connConf = Configuration.from(conns.get(i).toString()); + copyConnConfByMode(useSqlReader, odpMode, sliceConfig, connConf); + // 说明是配置的 table 方式 + // 已在之前进行了扩展和`处理,可以直接使用 + List tables = connConf.getList(Key.TABLE, String.class); + Validate.isTrue(null != tables && !tables.isEmpty(), "error in your configuration for the reading database table."); + int tempEachTableShouldSplittedNumber = eachTableShouldSplittedNumber; + if (tables.size() == 1) { + Integer splitFactor = originalConfig.getInt(com.alibaba.datax.plugin.rdbms.reader.Key.SPLIT_FACTOR, Constant.SPLIT_FACTOR); + tempEachTableShouldSplittedNumber = eachTableShouldSplittedNumber * splitFactor; + } + for (String table : tables) { + Configuration tempSlice; + tempSlice = sliceConfig.clone(); + tempSlice.set(Key.TABLE, table); + splittedConfigs.addAll( + useSqlReader ? SqlReaderSplitUtil.splitSingleTable(tempSlice, table, columnFamily, tempEachTableShouldSplittedNumber, readByPartition) : HbaseSplitUtil.split(tempSlice)); + } + } + return splittedConfigs; + } + + private void copyConnConfByMode(boolean useSqlReader, boolean odpMode, Configuration targetConf, Configuration sourceConnConf) { + String username = sourceConnConf.getNecessaryValue(Key.USERNAME, DBUtilErrorCode.REQUIRED_VALUE); + targetConf.set(Key.USERNAME, username); + String password = sourceConnConf.getNecessaryValue(Key.PASSWORD, DBUtilErrorCode.REQUIRED_VALUE); + targetConf.set(Key.PASSWORD, password); + + if (useSqlReader) { + String jdbcUrl = sourceConnConf.getNecessaryValue(Key.JDBC_URL, DBUtilErrorCode.REQUIRED_VALUE); + targetConf.set(Key.JDBC_URL, jdbcUrl); + } else if (odpMode) { + String dbName = sourceConnConf.getNecessaryValue(Key.DB_NAME, DBUtilErrorCode.REQUIRED_VALUE); + targetConf.set(Key.DB_NAME, dbName); + String odpHost = sourceConnConf.getNecessaryValue(Key.ODP_HOST, DBUtilErrorCode.REQUIRED_VALUE); + targetConf.set(Key.ODP_HOST, odpHost); + String odpPort = sourceConnConf.getNecessaryValue(Key.ODP_PORT, DBUtilErrorCode.REQUIRED_VALUE); + targetConf.set(Key.ODP_PORT, odpPort); + } else { + String dbName = sourceConnConf.getNecessaryValue(Key.DB_NAME, DBUtilErrorCode.REQUIRED_VALUE); + targetConf.set(Key.DB_NAME, dbName); + String sysUser = sourceConnConf.getNecessaryValue(Key.OB_SYS_USERNAME, DBUtilErrorCode.REQUIRED_VALUE); + targetConf.set(Key.OB_SYS_USERNAME, sysUser); + String sysPass = sourceConnConf.getString(Key.OB_SYS_PASSWORD); + targetConf.set(Key.OB_SYS_PASSWORD, sysPass); + } + targetConf.remove(Constant.CONN_MARK); + } + + private boolean checkVersionAfterV3(String jdbcUrl, String username, String password) { + int retryLimit = 3; + int retryCount = 0; + Connection conn = null; + while (retryCount++ <= retryLimit) { + try { + conn = DBUtil.getConnectionWithoutRetry(DataBaseType.MySql, jdbcUrl, username, password); + ObVersion obVersion = ObReaderUtils.getObVersion(conn); + return ObVersion.V3.compareTo(obVersion) <= 0; + } catch (Exception e) { + LOG.error("fail to check ob version, will retry: " + e.getMessage()); + if (e.getMessage().contains(ACCESS_DENIED_ERROR)) { + throw new RuntimeException(e); + } + try { + TimeUnit.SECONDS.sleep(1); + } catch (Exception ex) { + LOG.error("interrupted while waiting for retry."); + } + } finally { + DBUtil.closeDBResources(null, conn); + } + } + return false; + } + } + + public static class Task extends Reader.Task { + private static Logger LOG = LoggerFactory.getLogger(Task.class); + private Configuration taskConfig; + private AbstractHbaseTask hbaseTaskProxy; + + @Override + public void init() { + this.taskConfig = super.getPluginJobConf(); + + String mode = this.taskConfig.getString(Key.MODE); + ModeType modeType = ModeType.getByTypeName(mode); + boolean useSqlReader = this.taskConfig.getBool(Key.USE_SQL_READER, com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_USE_SQLREADER); + LOG.info("init reader with mode: " + modeType); + + switch (modeType) { + case Normal: + this.hbaseTaskProxy = useSqlReader ? new SQLNormalModeReader(this.taskConfig) : new ScanNormalModeReader(this.taskConfig); + break; + case MultiVersionFixedColumn: + this.hbaseTaskProxy = new ScanMultiVersionReader(this.taskConfig); + break; + default: + throw DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE, "This type of mode is not supported by hbasereader:" + modeType); + } + } + + @Override + public void destroy() { + if (this.hbaseTaskProxy != null) { + try { + this.hbaseTaskProxy.close(); + } catch (Exception e) { + // + } + } + } + + @Override + public void prepare() { + try { + this.hbaseTaskProxy.prepare(); + } catch (Exception e) { + throw DataXException.asDataXException(HbaseReaderErrorCode.PREPAR_READ_ERROR, e); + } + } + + @Override + public void post() { + super.post(); + } + + @Override + public void startRead(RecordSender recordSender) { + Record record = recordSender.createRecord(); + boolean fetchOK; + int retryTimes = 0; + int maxRetryTimes = 3; + while (true) { + try { + // TODO check exception + fetchOK = this.hbaseTaskProxy.fetchLine(record); + } catch (Exception e) { + LOG.info("fetch record failed. reason: {}.", e.getMessage(), e); + super.getTaskPluginCollector().collectDirtyRecord(record, e); + if (retryTimes++ > maxRetryTimes) { + throw DataXException.asDataXException(HbaseReaderErrorCode.READ_ERROR, "read from obhbase failed", e); + } + record = recordSender.createRecord(); + continue; + } + if (fetchOK) { + recordSender.sendToWriter(record); + record = recordSender.createRecord(); + } else { + break; + } + } + recordSender.flush(); + } + } +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/enums/ColumnType.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/enums/ColumnType.java new file mode 100755 index 0000000000..ca4d73a73f --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/enums/ColumnType.java @@ -0,0 +1,44 @@ +package com.alibaba.datax.plugin.reader.obhbasereader.enums; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.plugin.reader.obhbasereader.HbaseReaderErrorCode; + +import java.util.Arrays; + +/** + * 只对 normal 模式读取时有用,多版本读取时,不存在列类型的 + */ +public enum ColumnType { + STRING("string"), + BINARY_STRING("binarystring"), + BYTES("bytes"), + BOOLEAN("boolean"), + SHORT("short"), + INT("int"), + LONG("long"), + FLOAT("float"), + DOUBLE("double"), + DATE("date"); + + private String typeName; + + ColumnType(String typeName) { + this.typeName = typeName; + } + + public static ColumnType getByTypeName(String typeName) { + for (ColumnType columnType : values()) { + if (columnType.typeName.equalsIgnoreCase(typeName)) { + return columnType; + } + } + + throw DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE, + String.format("The type %s is not supported by hbasereader, currently supported type is:%s .", typeName, Arrays.asList(values()))); + } + + @Override + public String toString() { + return this.typeName; + } +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/enums/FetchVersion.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/enums/FetchVersion.java new file mode 100644 index 0000000000..2bf273c8d1 --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/enums/FetchVersion.java @@ -0,0 +1,28 @@ +package com.alibaba.datax.plugin.reader.obhbasereader.enums; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.plugin.reader.obhbasereader.HbaseReaderErrorCode; + +import java.util.Arrays; +import java.util.Optional; +import java.util.stream.Stream; + +public enum FetchVersion { + + OLDEST("oldest"), LATEST("latest"); + + private final String version; + + FetchVersion(String version) { + this.version = version; + } + + public static FetchVersion getByDesc(String name) { + Optional result = Stream.of(values()).filter(v -> v.version.equalsIgnoreCase(name)) + .findFirst(); + return result.orElseThrow(() -> { + return DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE, + String.format("obHBasereader 不支持该类型:%s, 目前支持的类型是:%s", name, Arrays.asList(values()))); + }); + } +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/enums/ModeType.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/enums/ModeType.java new file mode 100644 index 0000000000..ccaf879632 --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/enums/ModeType.java @@ -0,0 +1,30 @@ +package com.alibaba.datax.plugin.reader.obhbasereader.enums; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.plugin.reader.obhbasereader.HbaseReaderErrorCode; + +import java.util.Arrays; + +public enum ModeType { + Normal("normal"), + MultiVersionFixedColumn("multiVersionFixedColumn"), + MultiVersionDynamicColumn("multiVersionDynamicColumn"), + ; + + private String mode; + + ModeType(String mode) { + this.mode = mode.toLowerCase(); + } + + public static ModeType getByTypeName(String modeName) { + for (ModeType modeType : values()) { + if (modeType.mode.equalsIgnoreCase(modeName)) { + return modeType; + } + } + + throw DataXException.asDataXException( + HbaseReaderErrorCode.ILLEGAL_VALUE, String.format("The mode type is not supported by hbasereader:%s, and the currently supported mode type is:%s", modeName, Arrays.asList(values()))); + } +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/ext/ServerConnectInfo.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/ext/ServerConnectInfo.java new file mode 100644 index 0000000000..7dca6f5324 --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/ext/ServerConnectInfo.java @@ -0,0 +1,146 @@ +package com.alibaba.datax.plugin.reader.obhbasereader.ext; + +import com.google.common.base.Preconditions; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import static org.apache.commons.lang3.StringUtils.EMPTY; + +public class ServerConnectInfo { + + public String clusterName; + public String tenantName; + // userName doesn't contain tenantName or clusterName + public String userName; + public String password; + public String databaseName; + public String ipPort; + public String jdbcUrl; + public String host; + public String port; + public boolean publicCloud; + public int rpcPort; + public String sysUser; + public String sysPass; + + /** + * + * @param jdbcUrl format is jdbc:oceanbase//ip:port + * @param username format is cluster:tenant:username or username@tenant#cluster or user@tenant or user + * @param password + */ + public ServerConnectInfo(final String jdbcUrl, final String username, final String password) { + this(jdbcUrl, username, password, null, null); + } + + public ServerConnectInfo(final String jdbcUrl, final String username, final String password, final String sysUser, final String sysPass) { + if (jdbcUrl.startsWith(com.alibaba.datax.plugin.rdbms.writer.Constant.OB10_SPLIT_STRING)) { + String[] ss = jdbcUrl.split(com.alibaba.datax.plugin.rdbms.writer.Constant.OB10_SPLIT_STRING_PATTERN); + Preconditions.checkArgument(ss.length == 3, "jdbc url format is not correct:" + jdbcUrl); + this.userName = username; + this.clusterName = ss[1].trim().split(":")[0]; + this.tenantName = ss[1].trim().split(":")[1]; + this.jdbcUrl = ss[2]; + } else { + this.jdbcUrl = jdbcUrl; + } + this.password = password; + this.sysUser = sysUser; + this.sysPass = sysPass; + parseJdbcUrl(jdbcUrl); + parseFullUserName(username); + } + + private void parseJdbcUrl(final String jdbcUrl) { + Pattern pattern = Pattern.compile("//([\\w\\.\\-]+:\\d+)/([\\w-]+)\\?"); + Matcher matcher = pattern.matcher(jdbcUrl); + if (matcher.find()) { + String ipPort = matcher.group(1); + String dbName = matcher.group(2); + this.ipPort = ipPort; + String[] hostPort = ipPort.split(":"); + this.host = hostPort[0]; + this.port = hostPort[1]; + this.databaseName = dbName; + this.publicCloud = host.endsWith("aliyuncs.com"); + } else { + throw new RuntimeException("Invalid argument:" + jdbcUrl); + } + } + + private void parseFullUserName(final String fullUserName) { + int tenantIndex = fullUserName.indexOf("@"); + int clusterIndex = fullUserName.indexOf("#"); + // 适用于jdbcUrl以||_dsc_ob10_dsc_开头的场景 + if (fullUserName.contains(":") && tenantIndex < 0) { + String[] names = fullUserName.split(":"); + if (names.length != 3) { + throw new RuntimeException("invalid argument: " + fullUserName); + } else { + this.clusterName = names[0]; + this.tenantName = names[1]; + this.userName = names[2]; + } + } else if (tenantIndex < 0) { + // 适用于short jdbcUrl,且username中不含租户名(主要是公有云场景,此场景下不计算分区) + this.userName = fullUserName; + this.clusterName = EMPTY; + this.tenantName = EMPTY; + } else { + // 适用于short jdbcUrl,且username中含租户名 + this.userName = fullUserName.substring(0, tenantIndex); + if (clusterIndex < 0) { + this.clusterName = EMPTY; + this.tenantName = fullUserName.substring(tenantIndex + 1); + } else { + this.clusterName = fullUserName.substring(clusterIndex + 1); + this.tenantName = fullUserName.substring(tenantIndex + 1, clusterIndex); + } + } + } + + @Override + public String toString() { + return "ServerConnectInfo{" + + "clusterName='" + clusterName + '\'' + + ", tenantName='" + tenantName + '\'' + + ", userName='" + userName + '\'' + + ", password='" + password + '\'' + + ", databaseName='" + databaseName + '\'' + + ", ipPort='" + ipPort + '\'' + + ", jdbcUrl='" + jdbcUrl + '\'' + + ", publicCloud=" + publicCloud + + ", rpcPort=" + rpcPort + + '}'; + } + + public String getFullUserName() { + StringBuilder builder = new StringBuilder(); + builder.append(userName); + if (publicCloud || (rpcPort != 0 && EMPTY.equals(clusterName))) { + return builder.toString(); + } + if (!EMPTY.equals(tenantName)) { + builder.append("@").append(tenantName); + } + + if (!EMPTY.equals(clusterName)) { + builder.append("#").append(clusterName); + } + if (EMPTY.equals(this.clusterName) && EMPTY.equals(this.tenantName)) { + return this.userName; + } + return builder.toString(); + } + + public void setRpcPort(int rpcPort) { + this.rpcPort = rpcPort; + } + + public void setSysUser(String sysUser) { + this.sysUser = sysUser; + } + + public void setSysPass(String sysPass) { + this.sysPass = sysPass; + } +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/task/AbstractHbaseTask.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/task/AbstractHbaseTask.java new file mode 100755 index 0000000000..6f43a8ac84 --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/task/AbstractHbaseTask.java @@ -0,0 +1,41 @@ +package com.alibaba.datax.plugin.reader.obhbasereader.task; + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.reader.obhbasereader.Constant; +import com.alibaba.datax.plugin.reader.obhbasereader.HbaseColumnCell; +import com.alibaba.datax.plugin.reader.obhbasereader.Key; +import com.alibaba.datax.plugin.reader.obhbasereader.enums.ModeType; +import com.alibaba.datax.plugin.reader.obhbasereader.util.ObHbaseReaderUtil; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +public abstract class AbstractHbaseTask { + protected String encoding; + protected String timezone = null; + protected Map hbaseColumnCellMap; + // 常量字段 + protected Map constantMap; + protected ModeType modeType; + + public AbstractHbaseTask() { + } + + public AbstractHbaseTask(Configuration configuration) { + this.timezone = configuration.getString(Key.TIMEZONE, Constant.DEFAULT_TIMEZONE); + this.encoding = configuration.getString(Key.ENCODING, Constant.DEFAULT_ENCODING); + String mode = configuration.getString(Key.MODE, "Normal"); + this.modeType = ModeType.getByTypeName(mode); + this.constantMap = new HashMap<>(); + this.hbaseColumnCellMap = ObHbaseReaderUtil.parseColumn(configuration.getList(Key.COLUMN, Map.class), constantMap, encoding, timezone); + } + + public abstract void prepare() throws Exception; + + public abstract boolean fetchLine(Record record) throws Exception; + + public abstract void close() throws IOException; +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/task/AbstractScanReader.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/task/AbstractScanReader.java new file mode 100755 index 0000000000..8d1e8ce364 --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/task/AbstractScanReader.java @@ -0,0 +1,99 @@ +package com.alibaba.datax.plugin.reader.obhbasereader.task; + +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.reader.obhbasereader.Constant; +import com.alibaba.datax.plugin.reader.obhbasereader.HTableManager; +import com.alibaba.datax.plugin.reader.obhbasereader.HbaseColumnCell; +import com.alibaba.datax.plugin.reader.obhbasereader.Key; +import com.alibaba.datax.plugin.reader.obhbasereader.util.ObHbaseReaderUtil; + +import com.alipay.oceanbase.hbase.OHTable; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.util.Bytes; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; + +public abstract class AbstractScanReader extends AbstractHbaseTask { + private static Logger LOG = LoggerFactory.getLogger(AbstractScanReader.class); + protected OHTable ohtable; + protected Result lastResult = null; + protected Scan scan; + protected ResultScanner resultScanner; + protected int maxVersion; + private int scanCache; + private byte[] startKey = null; + private byte[] endKey = null; + + public AbstractScanReader(Configuration configuration) { + super(configuration); + this.maxVersion = configuration.getInt(Key.MAX_VERSION, 1); + this.scanCache = configuration.getInt(Key.SCAN_CACHE, Constant.DEFAULT_SCAN_CACHE); + this.ohtable = ObHbaseReaderUtil.initOHtable(configuration); + this.startKey = ObHbaseReaderUtil.convertInnerStartRowkey(configuration); + this.endKey = ObHbaseReaderUtil.convertInnerEndRowkey(configuration); + LOG.info("The task set startRowkey=[{}], endRowkey=[{}].", Bytes.toStringBinary(this.startKey), Bytes.toStringBinary(this.endKey)); + } + + @Override + public void prepare() throws Exception { + this.scan = new Scan(); + this.scan.setSmall(false); + this.scan.setCacheBlocks(false); + this.scan.setStartRow(startKey); + this.scan.setStopRow(endKey); + LOG.info("The task set startRowkey=[{}], endRowkey=[{}].", Bytes.toStringBinary(this.startKey), Bytes.toStringBinary(this.endKey)); + this.scan.setCaching(this.scanCache); + if (this.maxVersion == -1 || this.maxVersion == Integer.MAX_VALUE) { + this.scan.setMaxVersions(); + } else { + this.scan.setMaxVersions(this.maxVersion); + } + initScanColumns(); + this.resultScanner = this.ohtable.getScanner(this.scan); + } + + @Override + public void close() throws IOException { + if (this.resultScanner != null) { + this.resultScanner.close(); + } + HTableManager.closeHTable(this.ohtable); + } + + protected void initScanColumns() { + boolean isConstant; + boolean isRowkeyColumn; + for (HbaseColumnCell cell : this.hbaseColumnCellMap.values()) { + isConstant = cell.isConstant(); + isRowkeyColumn = ObHbaseReaderUtil.isRowkeyColumn(cell.getColumnName()); + if (!isConstant && !isRowkeyColumn) { + LOG.info("columnFamily: " + new String(cell.getCf()) + ", qualifier: " + new String(cell.getQualifier())); + this.scan.addColumn(cell.getCf(), cell.getQualifier()); + } + } + } + + protected Result getNextHbaseRow() throws Exception { + Result result = null; + try { + result = resultScanner.next(); + } catch (Exception e) { + LOG.error("failed to get result", e); + if (lastResult != null) { + scan.setStartRow(lastResult.getRow()); + } + resultScanner = this.ohtable.getScanner(scan); + result = resultScanner.next(); + if (lastResult != null && Bytes.equals(lastResult.getRow(), result.getRow())) { + result = resultScanner.next(); + } + } + lastResult = result; + // may be null + return result; + } +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/task/SQLNormalModeReader.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/task/SQLNormalModeReader.java new file mode 100755 index 0000000000..327ac971fe --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/task/SQLNormalModeReader.java @@ -0,0 +1,257 @@ +package com.alibaba.datax.plugin.reader.obhbasereader.task; + +import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_READ_HINT; + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.util.DBUtil; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; +import com.alibaba.datax.plugin.reader.obhbasereader.Constant; +import com.alibaba.datax.plugin.reader.obhbasereader.HbaseColumnCell; +import com.alibaba.datax.plugin.reader.obhbasereader.HbaseReaderErrorCode; +import com.alibaba.datax.plugin.reader.obhbasereader.Key; +import com.alibaba.datax.plugin.reader.obhbasereader.enums.FetchVersion; +import com.alibaba.datax.plugin.reader.obhbasereader.util.ObHbaseReaderUtil; +import com.alibaba.datax.plugin.reader.oceanbasev10reader.util.ObReaderUtils; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Predicate; +import java.util.stream.Collectors; + +public class SQLNormalModeReader extends AbstractHbaseTask { + private final static String QUERY_SQL_TEMPLATE = "select %s K, Q, T, V, hex(K) as `hex` from %s %s"; + private static Logger LOG = LoggerFactory.getLogger(SQLNormalModeReader.class); + private final Map columnMap; + private final Map versionMap; + private final FetchVersion fetchVersion; + private Set columnNames; + private boolean noMoreData = false; + private String querySQL = null; + private Connection conn = null; + private PreparedStatement stmt = null; + private ResultSet rs = null; + private String jdbcUrl = null; + private String columnFamily = null; + private String username = null; + private String password = null; + private int fetchSize = com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_FETCH_SIZE; + private long readBatchSize = com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_READ_BATCH_SIZE; + private Configuration configuration; + private boolean hasRange = false; + private String[] savepoint = new String[3]; + // only used by unit test + protected boolean reuseConn = false; + + public SQLNormalModeReader(Configuration configuration) { + this.configuration = configuration; + this.hbaseColumnCellMap = ObHbaseReaderUtil.parseColumn(configuration.getList(Key.COLUMN, Map.class)); + if (hbaseColumnCellMap.size() == 0) { + LOG.error("no column cells specified."); + throw new RuntimeException("no column cells specified"); + } + columnFamily = ObHbaseReaderUtil.parseColumnFamily(hbaseColumnCellMap.values()); + this.columnNames = + hbaseColumnCellMap.keySet().stream().map(e -> ObHbaseReaderUtil.isRowkeyColumn(e) ? Constant.ROWKEY_FLAG : e.substring((columnFamily + ":").length())).collect(Collectors.toSet()); + + String partInfo = ""; + String partName = configuration.getString(Key.PARTITION_NAME, null); + if (partName != null) { + partInfo = "partition(" + partName + ")"; + } + + String tableName = configuration.getString(Key.TABLE, null); + String hint = configuration.getString(Key.READER_HINT, OB_READ_HINT); + this.hasRange = !StringUtils.isEmpty(configuration.getString(Key.RANGE, null)); + this.querySQL = String.format(QUERY_SQL_TEMPLATE, hint, tableName + "$" + columnFamily, partInfo); + if (hasRange) { + this.querySQL = querySQL + " where (" + configuration.getString(Key.RANGE) + ")"; + } + this.jdbcUrl = configuration.getString(Key.JDBC_URL, null); + this.username = configuration.getString(Key.USERNAME, null); + this.password = configuration.getString(Key.PASSWORD, null); + this.columnMap = Maps.newHashMap(); + this.versionMap = Maps.newHashMap(); + this.fetchVersion = FetchVersion.getByDesc(configuration.getString("version", FetchVersion.LATEST.name())); + this.timezone = configuration.getString(Key.TIMEZONE, "UTC"); + this.encoding = configuration.getString(Key.ENCODING, Constant.DEFAULT_ENCODING); + this.fetchSize = configuration.getInt(Key.FETCH_SIZE, com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_FETCH_SIZE); + this.readBatchSize = configuration.getLong(Key.READ_BATCH_SIZE, com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_READ_BATCH_SIZE); + LOG.info("read from jdbcUrl {} with fetchSize {}, readBatchSize {}", jdbcUrl, fetchSize, readBatchSize); + } + + private boolean notFinished(String currentKey) throws SQLException { + boolean updateSuccess = updateResultSet(); + if (updateSuccess) { + String newKey = rs.getString("K"); + return newKey.equals(currentKey); + } else { + noMoreData = true; + Arrays.fill(savepoint, null); + return false; + } + } + + private boolean updateResultSet() throws SQLException { + if (rs != null && rs.next()) { + return true; + } + if (savepoint[0] != null) { + int retryLimit = 10; + int retryCount = 0; + String tempQuery = querySQL + (hasRange ? " and " : " where ") + "(K,Q,T) > (unhex(?),?,?) order by K,Q,T limit " + readBatchSize; + while (retryCount < retryLimit) { + retryCount++; + try { + resetConnection(); + DBUtil.closeDBResources(rs, stmt, null); + stmt = conn.prepareStatement(tempQuery, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); + stmt.setFetchSize(fetchSize); + for (int i = 0; i < savepoint.length; i++) { + stmt.setObject(i + 1, savepoint[i]); + } + rs = stmt.executeQuery(); + if (rs.next()) { + LOG.info("execute sql: {}, savepoint:[{}]", tempQuery, Arrays.stream(savepoint).map(e -> "'" + e + "'").collect(Collectors.joining(","))); + return true; + } + // All data in this task are read + break; + } catch (Exception ex) { + LOG.error("failed to query sql, will retry {} times", retryCount, ex); + DBUtil.closeDBResources(rs, stmt, conn); + if (retryCount > retryLimit) { + LOG.error("Sql: [{}] executed failed, savepoint:[{}], reason: {}", tempQuery, Arrays.stream(savepoint).map(e -> "'" + e + "'").collect(Collectors.joining(",")), + ex.getMessage()); + throw new RuntimeException(ex); + } + } + } + } + return false; + } + + @Override + public void prepare() { + int retryLimit = 10; + int retryCount = 0; + while (true) { + retryCount++; + try { + resetConnection(); + String tempQuery = querySQL + " order by K,Q,T limit " + readBatchSize; + stmt = conn.prepareStatement(tempQuery, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); + stmt.setFetchSize(fetchSize); + LOG.info("execute sql : {}", tempQuery); + rs = stmt.executeQuery(); + if (!rs.next()) { + noMoreData = true; + } + break; + } catch (Exception e) { + LOG.error("failed to query sql, will retry {} times", retryCount, e); + DBUtil.closeDBResources(rs, stmt, conn); + if (retryCount > retryLimit) { + LOG.error("Sql: [{}] executed failed, reason: {}", querySQL, e.getMessage()); + throw new RuntimeException(e); + } + } + } + } + + @Override + public boolean fetchLine(Record record) throws Exception { + try { + if (noMoreData) { + return false; + } + String currentKey = rs.getString("K"); + savepoint[0] = rs.getString("hex"); + columnMap.put(Constant.ROWKEY_FLAG, currentKey.getBytes()); + do { + String columnName = rs.getString("Q"); + savepoint[1] = columnName; + if (!this.columnNames.contains(columnName)) { + continue; + } + Long version = rs.getLong("T"); + savepoint[2] = String.valueOf(version); + byte[] value = rs.getBytes("V"); + Predicate predicate; + switch (this.fetchVersion) { + case OLDEST: + predicate = v -> v.compareTo(versionMap.getOrDefault(columnName, Long.MIN_VALUE)) > 0; + break; + case LATEST: + predicate = v -> v.compareTo(versionMap.getOrDefault(columnName, Long.MAX_VALUE)) < 0; + break; + default: + throw DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE, "Not support version: " + this.fetchVersion); + } + + if (predicate.test(version)) { + versionMap.put(columnName, version); + columnMap.put(columnName, value); + } + } while (notFinished(currentKey)); + + for (HbaseColumnCell cell : this.hbaseColumnCellMap.values()) { + Column column = null; + if (cell.isConstant()) { + // 对常量字段的处理 + column = this.constantMap.get(cell.getColumnName()); + } else { + String columnName = ObHbaseReaderUtil.isRowkeyColumn(cell.getColumnName()) ? Constant.ROWKEY_FLAG : cell.getColumnName().substring((columnFamily + ":").length()); + byte[] value = null; + if (!columnMap.containsKey(columnName)) { + LOG.debug("{} is not contained in the record with K value={}. consider this record as null record.", columnName, currentKey); + } else { + value = columnMap.get(columnName); + } + column = ObHbaseReaderUtil.buildColumn(value, cell.getColumnType(), encoding, cell.getDateformat(), timezone); + } + record.addColumn(column); + } + } finally { + this.columnMap.clear(); + this.versionMap.clear(); + } + return true; + } + + @Override + public void close() throws IOException { + DBUtil.closeDBResources(rs, stmt, conn); + } + + private void resetConnection() throws SQLException { + if (reuseConn && conn != null && !conn.isClosed()) { + return; + } + // set ob_query_timeout and ob_trx_timeout to a large time in case timeout + int queryTimeoutSeconds = 60 * 60 * 48; + String setQueryTimeout = "set ob_query_timeout=" + (queryTimeoutSeconds * 1000 * 1000L); + String setTrxTimeout = "set ob_trx_timeout=" + ((queryTimeoutSeconds + 5) * 1000 * 1000L); + List newSessionConfig = Lists.newArrayList(setQueryTimeout, setTrxTimeout); + List sessionConfig = configuration.getList(Key.SESSION, new ArrayList<>(), String.class); + newSessionConfig.addAll(sessionConfig); + configuration.set(Key.SESSION, newSessionConfig); + conn = DBUtil.getConnection(DataBaseType.MySql, jdbcUrl, this.username, this.password); + } +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/task/ScanMultiVersionReader.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/task/ScanMultiVersionReader.java new file mode 100755 index 0000000000..872b5f5f58 --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/task/ScanMultiVersionReader.java @@ -0,0 +1,98 @@ +package com.alibaba.datax.plugin.reader.obhbasereader.task; + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.LongColumn; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.reader.obhbasereader.Constant; +import com.alibaba.datax.plugin.reader.obhbasereader.HbaseColumnCell; +import com.alibaba.datax.plugin.reader.obhbasereader.HbaseReaderErrorCode; +import com.alibaba.datax.plugin.reader.obhbasereader.enums.ColumnType; +import com.alibaba.datax.plugin.reader.obhbasereader.util.ObHbaseReaderUtil; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.util.Bytes; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.List; + +public class ScanMultiVersionReader extends AbstractScanReader { + private final static Logger LOG = LoggerFactory.getLogger(ScanMultiVersionReader.class); + private static byte[] COLON_BYTE; + private List kvList = new ArrayList<>(); + private int currentReadPosition = 0; + + // rowKey类型 + private ColumnType rowkeyReadoutType = null; + + public ScanMultiVersionReader(Configuration configuration) { + super(configuration); + HbaseColumnCell rowKey = hbaseColumnCellMap.get(Constant.ROWKEY_FLAG); + if (rowKey != null && rowKey.getColumnType() != null) { + this.rowkeyReadoutType = rowKey.getColumnType(); + } else { + this.rowkeyReadoutType = ColumnType.BYTES; + } + try { + ScanMultiVersionReader.COLON_BYTE = ":".getBytes(encoding); + } catch (UnsupportedEncodingException e) { + throw DataXException.asDataXException(HbaseReaderErrorCode.PREPAR_READ_ERROR, "Failed to get binary of column family and column name colon separator inside the system.", e); + } + } + + private void convertKVToLine(KeyValue keyValue, Record record) throws Exception { + byte[] rawRowkey = keyValue.getRow(); + long timestamp = keyValue.getTimestamp(); + byte[] cfAndQualifierName = Bytes.add(keyValue.getFamily(), ScanMultiVersionReader.COLON_BYTE, keyValue.getQualifier()); + + record.addColumn(convertBytesToAssignType(this.rowkeyReadoutType, rawRowkey)); + + record.addColumn(convertBytesToAssignType(ColumnType.STRING, cfAndQualifierName)); + + // 直接忽略了用户配置的 timestamp 的类型 + record.addColumn(new LongColumn(timestamp)); + + String cfAndQualifierNameStr = Bytes.toString(cfAndQualifierName); + HbaseColumnCell currentCell = hbaseColumnCellMap.get(cfAndQualifierNameStr); + ColumnType valueReadoutType = currentCell != null ? currentCell.getColumnType() : ColumnType.BYTES; + String dateFormat = currentCell != null ? currentCell.getDateformat() : null; + record.addColumn(convertBytesToAssignType(valueReadoutType, keyValue.getValue(), dateFormat)); + } + + private Column convertBytesToAssignType(ColumnType columnType, byte[] byteArray) throws Exception { + return convertBytesToAssignType(columnType, byteArray, null); + } + + private Column convertBytesToAssignType(ColumnType columnType, byte[] byteArray, String dateFormat) throws Exception { + return ObHbaseReaderUtil.buildColumn(byteArray, columnType, encoding, dateFormat, timezone); + } + + @Override + public boolean fetchLine(Record record) throws Exception { + Result result; + if (this.kvList.size() == this.currentReadPosition) { + result = getNextHbaseRow(); + if (result == null) { + return false; + } + this.kvList = result.list(); + if (this.kvList == null) { + return false; + } + this.currentReadPosition = 0; + } + + try { + KeyValue keyValue = this.kvList.get(this.currentReadPosition); + convertKVToLine(keyValue, record); + } finally { + this.currentReadPosition++; + } + return true; + } +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/task/ScanNormalModeReader.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/task/ScanNormalModeReader.java new file mode 100644 index 0000000000..37d173025e --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/task/ScanNormalModeReader.java @@ -0,0 +1,65 @@ +package com.alibaba.datax.plugin.reader.obhbasereader.task; + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.element.StringColumn; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.reader.obhbasereader.HbaseColumnCell; +import com.alibaba.datax.plugin.reader.obhbasereader.enums.ColumnType; +import com.alibaba.datax.plugin.reader.obhbasereader.util.ObHbaseReaderUtil; + +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.util.Bytes; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ScanNormalModeReader extends AbstractScanReader { + private static Logger LOG = LoggerFactory.getLogger(ScanNormalModeReader.class); + + public ScanNormalModeReader(Configuration configuration) { + super(configuration); + this.maxVersion = 1; + } + + @Override + public boolean fetchLine(Record record) throws Exception { + Result result = getNextHbaseRow(); + if (null == result) { + return false; + } + try { + byte[] hbaseColumnValue; + String columnName; + ColumnType columnType; + + byte[] cf; + byte[] qualifier; + + for (HbaseColumnCell cell : this.hbaseColumnCellMap.values()) { + columnType = cell.getColumnType(); + Column column = null; + if (cell.isConstant()) { + // 对常量字段的处理 + column = constantMap.get(cell.getColumnName()); + } else { + // 根据列名称获取值 + columnName = cell.getColumnName(); + if (ObHbaseReaderUtil.isRowkeyColumn(columnName)) { + hbaseColumnValue = result.getRow(); + } else { + cf = cell.getCf(); + qualifier = cell.getQualifier(); + hbaseColumnValue = result.getValue(cf, qualifier); + } + column = ObHbaseReaderUtil.buildColumn(hbaseColumnValue, columnType, super.encoding, cell.getDateformat(), timezone); + } + record.addColumn(column); + } + } catch (Exception e) { + // 注意,这里catch的异常,期望是byte数组转换失败的情况。而实际上,string的byte数组,转成整数类型是不容易报错的。但是转成double类型容易报错。 + record.setColumn(0, new StringColumn(Bytes.toStringBinary(result.getRow()))); + throw e; + } + return true; + } +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/HbaseSplitUtil.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/HbaseSplitUtil.java new file mode 100755 index 0000000000..2baa227030 --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/HbaseSplitUtil.java @@ -0,0 +1,154 @@ +package com.alibaba.datax.plugin.reader.obhbasereader.util; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.reader.obhbasereader.HbaseReaderErrorCode; +import com.alibaba.datax.plugin.reader.obhbasereader.Key; + +import com.google.common.collect.Lists; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Pair; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; + +public final class HbaseSplitUtil { + private final static Logger LOG = LoggerFactory.getLogger(HbaseSplitUtil.class); + + public static List split(Configuration configuration) { + final List ranges = configuration.getListConfiguration(Key.RANGE); + if (CollectionUtils.isEmpty(ranges)) { + return Lists.newArrayList(configuration); + } + + //TODO(yuez) 后续hbase api具备查询region的功能后,这里需要添加查询table region的逻辑,并且取table region和用户指定的range的交集 + List sliceConfs = new ArrayList<>(ranges.size()); + for (Configuration range : ranges) { + byte[] startRowKey = convertUserRowkey(range, true); + byte[] endRowKey = convertUserRowkey(range, false); + if (startRowKey.length != 0 && endRowKey.length != 0 && Bytes.compareTo(startRowKey, endRowKey) > 0) { + throw DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE, "The startRowkey in obhbasereader must not be greater than the endRowkey."); + } + Configuration sliceConf = configuration.clone(); + sliceConf.remove(Key.RANGE); + String startKeyStr = Bytes.toStringBinary(startRowKey); + String endRowKeyStr = Bytes.toStringBinary(endRowKey); + sliceConf.set(Key.START_ROWKEY, startKeyStr); + sliceConf.set(Key.END_ROWKEY, endRowKeyStr); + sliceConfs.add(sliceConf); + } + return sliceConfs; + } + + public static byte[] convertUserRowkey(Configuration configuration, boolean isStart) { + String keyName = isStart ? Key.START_ROWKEY : Key.END_ROWKEY; + String startRowkey = configuration.getString(keyName); + if (StringUtils.isBlank(startRowkey)) { + return HConstants.EMPTY_BYTE_ARRAY; + } else { + boolean isBinaryRowkey = configuration.getBool(Key.IS_BINARY_ROWKEY, false); + return stringToBytes(startRowkey, isBinaryRowkey); + } + } + + private static byte[] stringToBytes(String rowkey, boolean isBinaryRowkey) { + if (isBinaryRowkey) { + return Bytes.toBytesBinary(rowkey); + } else { + return Bytes.toBytes(rowkey); + } + } + + /** + * 后续hbase api具备查询region的功能后才用得到此方法 + * + * @param config + * @param startRowkeyByte + * @param endRowkeyByte + * @param regionRanges + * @return + */ + private static List doSplit(Configuration config, byte[] startRowkeyByte, byte[] endRowkeyByte, Pair regionRanges) { + + List configurations = new ArrayList(); + + for (int i = 0; i < regionRanges.getFirst().length; i++) { + + byte[] regionStartKey = regionRanges.getFirst()[i]; + byte[] regionEndKey = regionRanges.getSecond()[i]; + + // 当前的region为最后一个region + // 如果最后一个region的start Key大于用户指定的userEndKey,则最后一个region,应该不包含在内 + // 注意如果用户指定userEndKey为"",则此判断应该不成立。userEndKey为""表示取得最大的region + if (Bytes.compareTo(regionEndKey, HConstants.EMPTY_BYTE_ARRAY) == 0 && (endRowkeyByte.length != 0 && (Bytes.compareTo(regionStartKey, endRowkeyByte) > 0))) { + continue; + } + + // 如果当前的region不是最后一个region, + // 用户配置的userStartKey大于等于region的endkey,则这个region不应该含在内 + if ((Bytes.compareTo(regionEndKey, HConstants.EMPTY_BYTE_ARRAY) != 0) && (Bytes.compareTo(startRowkeyByte, regionEndKey) >= 0)) { + continue; + } + + // 如果用户配置的userEndKey小于等于 region的startkey,则这个region不应该含在内 + // 注意如果用户指定的userEndKey为"",则次判断应该不成立。userEndKey为""表示取得最大的region + if (endRowkeyByte.length != 0 && (Bytes.compareTo(endRowkeyByte, regionStartKey) <= 0)) { + continue; + } + + String thisStartKey = getStartKey(startRowkeyByte, regionStartKey); + String thisEndKey = getEndKey(endRowkeyByte, regionEndKey); + Configuration p = config.clone(); + p.set(Key.START_ROWKEY, thisStartKey); + p.set(Key.END_ROWKEY, thisEndKey); + LOG.debug("startRowkey:[{}], endRowkey:[{}] .", thisStartKey, thisEndKey); + configurations.add(p); + } + + return configurations; + } + + private static String getEndKey(byte[] endRowkeyByte, byte[] regionEndKey) { + if (endRowkeyByte == null) { // 由于之前处理过,所以传入的userStartKey不可能为null + throw new IllegalArgumentException("userEndKey should not be null!"); + } + + byte[] tempEndRowkeyByte; + + if (endRowkeyByte.length == 0) { + tempEndRowkeyByte = regionEndKey; + } else if (Bytes.compareTo(regionEndKey, HConstants.EMPTY_BYTE_ARRAY) == 0) { + // 为最后一个region + tempEndRowkeyByte = endRowkeyByte; + } else { + if (Bytes.compareTo(endRowkeyByte, regionEndKey) > 0) { + tempEndRowkeyByte = regionEndKey; + } else { + tempEndRowkeyByte = endRowkeyByte; + } + } + + return Bytes.toStringBinary(tempEndRowkeyByte); + } + + private static String getStartKey(byte[] startRowkeyByte, byte[] regionStarKey) { + if (startRowkeyByte == null) { // 由于之前处理过,所以传入的userStartKey不可能为null + throw new IllegalArgumentException("userStartKey should not be null!"); + } + + byte[] tempStartRowkeyByte; + + if (Bytes.compareTo(startRowkeyByte, regionStarKey) < 0) { + tempStartRowkeyByte = regionStarKey; + } else { + tempStartRowkeyByte = startRowkeyByte; + } + + return Bytes.toStringBinary(tempStartRowkeyByte); + } +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/LocalStrings.properties b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/LocalStrings.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/LocalStrings_en_US.properties b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/LocalStrings_en_US.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/LocalStrings_ja_JP.properties b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/LocalStrings_ja_JP.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/LocalStrings_zh_CN.properties b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/LocalStrings_zh_CN.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/LocalStrings_zh_HK.properties b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/LocalStrings_zh_HK.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/LocalStrings_zh_TW.properties b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/LocalStrings_zh_TW.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/ObHbaseReaderUtil.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/ObHbaseReaderUtil.java new file mode 100755 index 0000000000..4177f1f5d4 --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/ObHbaseReaderUtil.java @@ -0,0 +1,293 @@ +package com.alibaba.datax.plugin.reader.obhbasereader.util; + +import static com.alibaba.datax.plugin.reader.obhbasereader.enums.ModeType.MultiVersionFixedColumn; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_DATABASE; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_FULL_USER_NAME; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_ODP_ADDR; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_ODP_MODE; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_ODP_PORT; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_PARAM_URL; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_PASSWORD; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_SYS_PASSWORD; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_SYS_USER_NAME; + +import com.alibaba.datax.common.element.BoolColumn; +import com.alibaba.datax.common.element.BytesColumn; +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.DateColumn; +import com.alibaba.datax.common.element.DoubleColumn; +import com.alibaba.datax.common.element.LongColumn; +import com.alibaba.datax.common.element.StringColumn; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.reader.obhbasereader.Constant; +import com.alibaba.datax.plugin.reader.obhbasereader.HTableManager; +import com.alibaba.datax.plugin.reader.obhbasereader.HbaseColumnCell; +import com.alibaba.datax.plugin.reader.obhbasereader.HbaseReaderErrorCode; +import com.alibaba.datax.plugin.reader.obhbasereader.Key; +import com.alibaba.datax.plugin.reader.obhbasereader.enums.ColumnType; +import com.alibaba.datax.plugin.reader.obhbasereader.enums.ModeType; +import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson.TypeReference; + +import com.alipay.oceanbase.hbase.OHTable; +import org.apache.commons.collections.MapUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.Validate; +import org.apache.commons.lang3.time.DateUtils; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.util.Bytes; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.nio.charset.Charset; +import java.text.SimpleDateFormat; +import java.util.Collection; +import java.util.Date; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public final class ObHbaseReaderUtil { + private static Logger LOG = LoggerFactory.getLogger(ObHbaseReaderUtil.class); + + public static void doPretreatment(Configuration originalConfig) { + String mode = ObHbaseReaderUtil.dealMode(originalConfig); + originalConfig.set(Key.MODE, mode); + + String encoding = originalConfig.getString(Key.ENCODING, Constant.DEFAULT_ENCODING); + if (!Charset.isSupported(encoding)) { + throw DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE, String.format("The encoding you configured is not supported by hbasereader:[%s]", encoding)); + } + originalConfig.set(Key.ENCODING, encoding); + + // 此处增强一个检查:isBinaryRowkey 配置不能出现在与 hbaseConfig 等配置平级地位 + Boolean isBinaryRowkey = originalConfig.getBool(Key.IS_BINARY_ROWKEY); + if (isBinaryRowkey != null) { + throw DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE, String.format("%s cannot be configured here. It should be configured in range.", Key.IS_BINARY_ROWKEY)); + } + } + + /** + * 对模式以及与模式进行配对的配置进行检查 + */ + private static String dealMode(Configuration originalConfig) { + String mode = originalConfig.getString(Key.MODE); + ModeType modeType = ModeType.getByTypeName(mode); + List column = originalConfig.getList(Key.COLUMN, Map.class); + if (column == null || column.isEmpty()) { + throw DataXException.asDataXException(HbaseReaderErrorCode.REQUIRED_VALUE, + "You have configured the normal mode to read the data in HBase, so you must configure the column in the form of:column:[{\"name\": \"cf0:column0\",\"type\": \"string\"}," + + "{\"name\": \"cf1:column1\",\"type\": \"long\"}]"); + } + + // 通过 parse 进行 column 格式的进一步检查 + ObHbaseReaderUtil.parseColumn(column); + if (MultiVersionFixedColumn.equals(modeType)) { + Integer maxVersion = originalConfig.getInt(Key.MAX_VERSION); + Validate.notNull(maxVersion, String.format("You have configured thw mode %s to read the data in HBase, so you must configure: maxVersion", mode)); + + boolean isMaxVersionValid = maxVersion == -1 || maxVersion > 1; + Validate.isTrue(isMaxVersionValid, String.format( + "You have configured the mode %s to read the data in HBase, but the configured maxVersion value is wrong. maxVersion specifies that: - 1 is to read all versions, and cannot be " + + "configured as 0 or 1 (because 0 or 1, we think the user wants to read the data in normal mode instead of reading in mode %s, the difference is big). If it is greater " + + "than" + + " 1, it means to read the latest corresponding number of versions.", + mode, mode)); + } + return mode; + } + + /** + * 注意:convertUserStartRowkey 和 convertInnerStartRowkey,前者会受到 isBinaryRowkey 的影响,只用于第一次对用户配置的 String 类型的 rowkey 转为二进制时使用。而后者约定:切分时得到的二进制的 rowkey 回填到配置中时采用 + */ + public static byte[] convertInnerStartRowkey(Configuration configuration) { + String startRowkey = configuration.getString(Key.START_ROWKEY); + if (StringUtils.isBlank(startRowkey)) { + return HConstants.EMPTY_BYTE_ARRAY; + } + + return Bytes.toBytesBinary(startRowkey); + } + + public static byte[] convertInnerEndRowkey(Configuration configuration) { + String endRowkey = configuration.getString(Key.END_ROWKEY); + if (StringUtils.isBlank(endRowkey)) { + return HConstants.EMPTY_BYTE_ARRAY; + } + + return Bytes.toBytesBinary(endRowkey); + } + + private static void setObHBaseConfig(com.alibaba.datax.common.util.Configuration confFile, org.apache.hadoop.conf.Configuration oHbaseConf) { + + boolean odpMode = confFile.getBool(Key.USE_ODP_MODE); + String username = confFile.getString(Key.USERNAME); + String password = confFile.getString(Key.PASSWORD); + String dbName = confFile.getString(Key.DB_NAME); + +// oHbaseConf.set(RS_LIST_ACQUIRE_CONNECT_TIMEOUT.getKey(), "500"); +// oHbaseConf.set(RS_LIST_ACQUIRE_READ_TIMEOUT.getKey(), "5000"); + oHbaseConf.set(HBASE_OCEANBASE_FULL_USER_NAME, username); + oHbaseConf.set(HBASE_OCEANBASE_PASSWORD, password); +// oHbaseConf.set(HBASE_, META_SCANNER_CACHING); + if (odpMode) { + oHbaseConf.setBoolean(HBASE_OCEANBASE_ODP_MODE, true); + oHbaseConf.set(HBASE_OCEANBASE_DATABASE, dbName); + oHbaseConf.set(HBASE_OCEANBASE_ODP_ADDR, confFile.getString(Key.ODP_HOST)); + oHbaseConf.setInt(HBASE_OCEANBASE_ODP_PORT, confFile.getInt(Key.ODP_PORT)); + } else { + String clusterName = null; + final Pattern pattern = Pattern.compile("([\\w]+)@([\\w]+)#([\\w]+)"); + Matcher matcher = pattern.matcher(username); + if (matcher.find()) { + clusterName = matcher.group(3); + } else { + throw new RuntimeException("user name is not in the correct format: user@tenant#cluster"); + } + String configUrl = confFile.getString(Key.CONFIG_URL); + if (!configUrl.contains("ObRegion")) { + if (configUrl.contains("?")) { + configUrl += "&ObRegion=" + clusterName; + } else { + configUrl += "?ObRegion=" + clusterName; + } + } + + if (!configUrl.contains("database")) { + configUrl += "&database=" + dbName; + } + oHbaseConf.set(HBASE_OCEANBASE_PARAM_URL, configUrl); + oHbaseConf.set(HBASE_OCEANBASE_SYS_USER_NAME, confFile.getString(Key.OB_SYS_USERNAME)); + oHbaseConf.set(HBASE_OCEANBASE_SYS_PASSWORD, confFile.getString(Key.OB_SYS_PASSWORD)); + } + + String hbaseConf = confFile.getString(Key.HBASE_CONFIG); + Map map = JSON.parseObject(hbaseConf, new TypeReference>() { + }); + if (MapUtils.isNotEmpty(map)) { + for (Map.Entry entry : map.entrySet()) { + oHbaseConf.set(entry.getKey(), entry.getValue()); + } + } + } + + /** + * 每次都获取一个新的HTable 注意:HTable 本身是线程不安全的 + */ + public static OHTable initOHtable(com.alibaba.datax.common.util.Configuration configuration) { + String tableName = configuration.getString(Key.TABLE); + try { + org.apache.hadoop.conf.Configuration oHbaseConf = new org.apache.hadoop.conf.Configuration(); + setObHBaseConfig(configuration, oHbaseConf); + return HTableManager.createHTable(oHbaseConf, tableName); + } catch (Exception e) { + LOG.error("init ohTable error, reason: {}", e.getMessage(), e); + throw DataXException.asDataXException(HbaseReaderErrorCode.INIT_TABLE_ERROR, e); + } + } + + public static boolean isRowkeyColumn(String columnName) { + return Constant.ROWKEY_FLAG.equalsIgnoreCase(columnName); + } + + public static String parseColumnFamily(Collection hbaseColumnCells) { + for (HbaseColumnCell columnCell : hbaseColumnCells) { + if (ObHbaseReaderUtil.isRowkeyColumn(columnCell.getColumnName())) { + continue; + } + if (columnCell.getColumnName() == null || columnCell.getColumnName().split(":").length != 2) { + LOG.error("column cell format is unknown: {}", columnCell); + throw new RuntimeException("Column cell format is unknown: " + columnCell); + } + return columnCell.getColumnName().split(":")[0]; + } + throw new RuntimeException("parse column family failed."); + } + + /** + * 用于解析列配置 + */ + public static LinkedHashMap parseColumn(List column) { + return parseColumn(column, null, Constant.DEFAULT_ENCODING, Constant.DEFAULT_TIMEZONE); + } + + public static LinkedHashMap parseColumn(List column, Map constantMap, String encoding, String timezone) { + LinkedHashMap hbaseColumnCells = new LinkedHashMap<>(column.size()); + boolean cacheConstantValue = constantMap != null; + HbaseColumnCell oneColumnCell; + try { + for (Map aColumn : column) { + ColumnType type = ColumnType.getByTypeName(aColumn.get("type")); + boolean isRowKey = isRowkeyColumn(aColumn.get("name")); + String columnName = isRowKey ? Constant.ROWKEY_FLAG : aColumn.get("name"); + + String columnValue = aColumn.get("value"); + String dateFormat = aColumn.getOrDefault("format", Constant.DEFAULT_DATE_FORMAT); + Validate.isTrue(StringUtils.isNotBlank(columnName) || StringUtils.isNotBlank(columnValue), + "It is either a combination of type + name + format or a combination of type + value + format. Your configuration is neither of the two. Please check and modify it."); + if (type == ColumnType.DATE) { + if (StringUtils.isBlank(dateFormat)) { + LOG.warn("date format for {} is empty, use default date format 'yyyy-MM-dd HH:mm:ss' instead.", columnName); + } + oneColumnCell = new HbaseColumnCell.Builder(type).columnName(columnName).columnValue(columnValue).dateformat(dateFormat).build(); + } else { + oneColumnCell = new HbaseColumnCell.Builder(type).columnName(columnName).columnValue(columnValue).build(); + } + hbaseColumnCells.put(columnName, oneColumnCell); + if (cacheConstantValue && oneColumnCell.isConstant()) { + constantMap.put(columnName, buildColumn(columnValue, type, encoding, dateFormat, timezone)); + } + } + return hbaseColumnCells; + } catch (Exception e) { + LOG.error("parse column failed, reason:{}", e.getMessage(), e); + throw DataXException.asDataXException(HbaseReaderErrorCode.PARSE_COLUMN_ERROR, e.getMessage()); + } + } + + public static Column buildColumn(String columnValue, ColumnType columnType, String encoding, String dateformat, String timezone) throws Exception { + return buildColumn(columnValue.getBytes(encoding), columnType, encoding, dateformat, timezone); + } + + public static Column buildColumn(byte[] columnValue, ColumnType columnType, String encoding, String dateformat, String timezone) throws Exception { + switch (columnType) { + case BOOLEAN: + return new BoolColumn(columnValue == null ? null : Bytes.toBoolean(columnValue)); + case SHORT: + return new LongColumn(columnValue == null ? null : String.valueOf(Bytes.toShort(columnValue))); + case INT: + return new LongColumn(columnValue == null ? null : Bytes.toInt(columnValue)); + case LONG: + return new LongColumn(columnValue == null ? null : Bytes.toLong(columnValue)); + case BYTES: + return new BytesColumn(columnValue == null ? null : columnValue); + case FLOAT: + return new DoubleColumn(columnValue == null ? null : Bytes.toFloat(columnValue)); + case DOUBLE: + return new DoubleColumn(columnValue == null ? null : Bytes.toDouble(columnValue)); + case STRING: + return new StringColumn(columnValue == null ? null : new String(columnValue, encoding)); + case BINARY_STRING: + return new StringColumn(columnValue == null ? null : Bytes.toStringBinary(columnValue)); + case DATE: + String dateValue = Bytes.toStringBinary(columnValue); + String timestamp = null; + try { + long milliSec = Long.parseLong(dateValue); + Date date = new java.util.Date(milliSec); + SimpleDateFormat sdf = new java.text.SimpleDateFormat(dateformat); + sdf.setTimeZone(java.util.TimeZone.getTimeZone(timezone)); + timestamp = sdf.format(date); + } catch (Exception e) { + // this is already formatted timestamp + timestamp = dateValue; + } + return columnValue == null ? null : new DateColumn(DateUtils.parseDate(timestamp, dateformat)); + default: + throw DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE, "obHbasereader 不支持您配置的列类型:" + columnType); + } + } +} diff --git a/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/SqlReaderSplitUtil.java b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/SqlReaderSplitUtil.java new file mode 100644 index 0000000000..bd589500a4 --- /dev/null +++ b/obhbasereader/src/main/java/com/alibaba/datax/plugin/reader/obhbasereader/util/SqlReaderSplitUtil.java @@ -0,0 +1,190 @@ +package com.alibaba.datax.plugin.reader.obhbasereader.util; + +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.reader.Constant; +import com.alibaba.datax.plugin.rdbms.util.DBUtil; +import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; +import com.alibaba.datax.plugin.rdbms.util.SplitedSlice; +import com.alibaba.datax.plugin.reader.obhbasereader.Key; +import com.alibaba.datax.plugin.reader.oceanbasev10reader.util.ExecutorTemplate; +import com.alibaba.datax.plugin.reader.oceanbasev10reader.util.ObReaderUtils; +import com.alibaba.datax.plugin.reader.oceanbasev10reader.util.PartInfo; +import com.alibaba.datax.plugin.reader.oceanbasev10reader.util.PartitionSplitUtil; +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.commons.collections.CollectionUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class SqlReaderSplitUtil { + public static final String SAMPLE_SQL_TEMPLATE = "SELECT `hex` FROM (SELECT `hex`,K , bucket, ROW_NUMBER() OVER (PARTITION BY bucket ORDER BY K) rn FROM(SELECT %s `hex`, K ,NTILE(%s) OVER " + + "(ORDER BY K ) bucket FROM (SELECT hex(K) as `hex`, K FROM %s SAMPLE BLOCK(%s)) a) b) c WHERE rn = 1 GROUP BY K ORDER BY K"; + public static final String MIDDLE_RANGE_TEMPLATE = "((K) > (unhex('%s'))) AND ((K) <= (unhex('%s')))"; + public static final String MIN_MAX_RANGE_TEMPLATE = "((K)<= (unhex('%s'))) or ((K) > (unhex('%s')))"; + private static final Logger LOG = LoggerFactory.getLogger(SqlReaderSplitUtil.class); + + public static List splitSingleTable(Configuration configuration, String tableName, String columnFamily, int eachTableShouldSplittedNumber, boolean readByPartition) { + List partitionList = Lists.newArrayList(); + String tableNameWithCf = tableName + "$" + columnFamily; + PartInfo partInfo = PartitionSplitUtil.getObMySQLPartInfoBySQL(configuration, tableNameWithCf); + if (partInfo.isPartitionTable()) { + partitionList.addAll(partInfo.getPartList()); + } + // read all partitions and split job only by partition + if (readByPartition) { + LOG.info("table: [{}] will read only by partition", tableNameWithCf); + return splitSingleTableByPartition(configuration, partitionList); + } + + if (eachTableShouldSplittedNumber <= 1) { + LOG.info("total enable splitted number of table: [{}] is {}, no need to split", tableNameWithCf, eachTableShouldSplittedNumber); + return Lists.newArrayList(configuration); + } + + // If user specified some partitions to be read, + List userSetPartitions = configuration.getList(Key.PARTITION_NAME, String.class); + if (CollectionUtils.isNotEmpty(userSetPartitions)) { + Set partSet = new HashSet<>(partitionList); + // If partition name does not exist in the table, throw exception directly. Case is sensitive. + userSetPartitions.forEach(e -> Preconditions.checkArgument(partSet.contains(e), "partition %s does not exist in table: %s", e, tableNameWithCf)); + partitionList.clear(); + partitionList.addAll(userSetPartitions); + } + + if (partitionList.isEmpty()) { + LOG.info("table: [{}] is not partitioned, just split table by rowKey.", tableNameWithCf); + List splitConfs = splitSingleTableByRowKey(configuration, tableNameWithCf, eachTableShouldSplittedNumber); + LOG.info("total split count of non-partitioned table :[{}] is {}", tableNameWithCf, splitConfs.size()); + return splitConfs; + } else { + ExecutorTemplate> template = new ExecutorTemplate<>("split-rows-by-rowkey-" + tableNameWithCf + "-", eachTableShouldSplittedNumber); + int splitNumPerPartition = (int) Math.ceil(1.0d * eachTableShouldSplittedNumber / partitionList.size()); + LOG.info("table: [{}] is partitioned, split table by rowKey in parallel. splitNumPerPartition is {}", tableNameWithCf, splitNumPerPartition); + for (String partName : partitionList) { + try { + template.submit(() -> { + Configuration tempConf = configuration.clone(); + tempConf.set(Key.PARTITION_NAME, partName); + return splitSingleTableByRowKey(tempConf, tableNameWithCf, splitNumPerPartition); + }); + } catch (Throwable th) { + LOG.error("submit split task of table: [{}-{}] failed, reason: {}", tableNameWithCf, partName, th.getMessage(), th); + } + } + List splitConfs = template.waitForResult().stream().flatMap(Collection::stream).collect(Collectors.toList()); + LOG.info("total split count of partitioned table :[{}] is {}", tableNameWithCf, splitConfs.size()); + return splitConfs; + } + } + + private static List splitSingleTableByPartition(Configuration configuration, List partList) { + if (partList == null || partList.isEmpty()) { + return Lists.newArrayList(configuration); + } + List confList = new ArrayList<>(); + for (String partName : partList) { + LOG.info("read sub task: reading from partition " + partName); + Configuration conf = configuration.clone(); + conf.set(Key.PARTITION_NAME, partName); + confList.add(conf); + } + return confList; + } + + /** + * @param configuration + * @param tableNameWithCf + * @param eachTableShouldSplittedNumber + * @return + */ + public static List splitSingleTableByRowKey(Configuration configuration, String tableNameWithCf, int eachTableShouldSplittedNumber) { + String jdbcURL = configuration.getString(Key.JDBC_URL); + String username = configuration.getString(Key.USERNAME); + String password = configuration.getString(Key.PASSWORD); + String hint = configuration.getString(Key.READER_HINT, com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_READ_HINT); + String partInfo = ""; + String partName = configuration.getString(Key.PARTITION_NAME, null); + if (partName != null) { + partInfo = " partition(" + partName + ")"; + } + tableNameWithCf += partInfo; + int fetchSize = configuration.getInt(Constant.FETCH_SIZE, com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_FETCH_SIZE); + Double percentage = configuration.getDouble(Key.SAMPLE_PERCENTAGE, 0.1); + List slices = new ArrayList<>(); + List pluginParams = new ArrayList<>(); + // set ob_query_timeout and ob_trx_timeout to a large time in case timeout + int queryTimeoutSeconds = 60 * 60 * 48; + try (Connection conn = DBUtil.getConnection(DataBaseType.MySql, jdbcURL, username, password)) { + String setQueryTimeout = "set ob_query_timeout=" + (queryTimeoutSeconds * 1000 * 1000L); + String setTrxTimeout = "set ob_trx_timeout=" + ((queryTimeoutSeconds + 5) * 1000 * 1000L); + try (Statement stmt = conn.createStatement()) { + stmt.execute(setQueryTimeout); + stmt.execute(setTrxTimeout); + } catch (Exception e) { + LOG.warn("set ob_query_timeout and set ob_trx_timeout failed. reason: {}", e.getMessage(), e); + } + slices = getSplitSqlBySample(conn, tableNameWithCf, fetchSize, percentage, eachTableShouldSplittedNumber, hint); + } catch (Throwable e) { + LOG.warn("query rowkey range failed of table: {}. reason: {}. the table will not be splitted.", tableNameWithCf, e.getMessage(), e); + } + + if (!slices.isEmpty()) { + for (SplitedSlice slice : slices) { + Configuration tempConfig = configuration.clone(); + tempConfig.set(Key.RANGE, slice.getRange()); + pluginParams.add(tempConfig); + } + } else { + Configuration tempConfig = configuration.clone(); + pluginParams.add(tempConfig); + } + return pluginParams; + } + + /** + * 按照采样方法切分,不能直接顺序切分否则可能导致原本属于一行的数据被切分为两行 + * + * @param conn + * @param tableName + * @param fetchSize + * @param percentage + * @param adviceNum + * @param hint + * @return List + * @throws SQLException + */ + private static List getSplitSqlBySample(Connection conn, String tableName, int fetchSize, double percentage, int adviceNum, String hint) throws SQLException { + String splitSql = String.format(SAMPLE_SQL_TEMPLATE, hint, adviceNum, tableName, percentage); + LOG.info("split pk [sql={}] is running... ", splitSql); + List boundList = new ArrayList<>(); + try (ResultSet rs = DBUtil.query(conn, splitSql, fetchSize)) { + while (rs.next()) { + boundList.add(rs.getString(1)); + } + } + if (boundList.size() == 0) { + return new ArrayList<>(); + } + List rangeSql = new ArrayList<>(); + for (int i = 0; i < boundList.size() - 1; i++) { + String range = String.format(MIDDLE_RANGE_TEMPLATE, boundList.get(i), boundList.get(i + 1)); + SplitedSlice slice = new SplitedSlice(boundList.get(i), boundList.get(i + 1), range); + rangeSql.add(slice); + } + String range = String.format(MIN_MAX_RANGE_TEMPLATE, boundList.get(0), boundList.get(boundList.size() - 1)); + SplitedSlice slice = new SplitedSlice(null, null, range); + rangeSql.add(slice); + return rangeSql; + } +} diff --git a/obhbasereader/src/main/resources/plugin.json b/obhbasereader/src/main/resources/plugin.json new file mode 100755 index 0000000000..36d52d69ad --- /dev/null +++ b/obhbasereader/src/main/resources/plugin.json @@ -0,0 +1,6 @@ +{ + "name": "obhbasereader", + "class": "com.alibaba.datax.plugin.reader.obhbasereader.ObHbaseReader", + "description": "useScene: prod. mechanism: Scan to read data.", + "developer": "alibaba" +} diff --git a/obhbasereader/src/main/resources/plugin_job_template.json b/obhbasereader/src/main/resources/plugin_job_template.json new file mode 100644 index 0000000000..e8adb94510 --- /dev/null +++ b/obhbasereader/src/main/resources/plugin_job_template.json @@ -0,0 +1,15 @@ +{ + "name": "obhbasereader", + "parameter": { + "hbaseConfig": {}, + "table": "", + "encoding": "", + "mode": "", + "column": [], + "range": { + "startRowkey": "", + "endRowkey": "" + }, + "isBinaryRowkey": true + } +} \ No newline at end of file diff --git a/obhbasewriter/doc/obhbasewriter.md b/obhbasewriter/doc/obhbasewriter.md new file mode 100644 index 0000000000..8c2a6749cb --- /dev/null +++ b/obhbasewriter/doc/obhbasewriter.md @@ -0,0 +1,209 @@ +OceanBase的table api为应用提供了ObHBase的访问接口,因此,OceanBase table api的reader与HBase writer的结构和配置方法类似。 +1 快速介绍 +obhbaseWriter 插件实现了从向ObHbase中写取数据。在底层实现上,obhbaseWriter 通过 HBase 的 Java 客户端连接远程 HBase 服务,并通过 put 方式写入obHbase。 +1.1支持功能 +1、目前obhbasewriter支持的obHbase版本为OceanBase3.x以及4.x版本。 +2、目前obhbasewriter支持源端多个字段拼接作为ObHbase 表的 rowkey,具体配置参考:rowkeyColumn配置; +3、写入obhbase的时间戳(版本)支持:用当前时间作为版本,指定源端列作为版本,指定一个时间 三种方式作为版本; +#### 脚本配置 +```json +{ + "job": { + "setting": { + "speed": { + "channel": 5 + } + }, + "content": [ + { + "reader": { + "name": "txtfilereader", + "parameter": { + "path": "/normal.txt", + "charset": "UTF-8", + "column": [ + { + "index": 0, + "type": "String" + }, + { + "index": 1, + "type": "string" + }, + { + "index": 2, + "type": "string" + }, + { + "index": 3, + "type": "string" + }, + { + "index": 4, + "type": "string" + }, + { + "index": 5, + "type": "string" + }, + { + "index": 6, + "type": "string" + } + + ], + "fieldDelimiter": "," + } + }, + "writer": { + "name": "obhbasewriter", + "parameter": { + "username": "username", + "password": "password", + "writerThreadCount": "20", + "writeBufferHighMark": "2147483647", + "rpcExecuteTimeout": "30000", + "useOdpMode": "false", + "obSysUser": "root", + "obSysPassword": "", + "column": [ + { + "index": 0, + "name": "family1:c1", + "type": "string" + }, + { + "index": 1, + "name": "family1:c2", + "type": "string" + }, + { + "index": 2, + "name": "family1:c3", + "type": "string" + }, + { + "index": 3, + "name": "family1:c4", + "type": "string" + }, + { + "index": 4, + "name": "family1:c5", + "type": "string" + }, + { + "index": 5, + "name": "family1:c6", + "type": "string" + }, + { + "index": 6, + "name": "family1:c7", + "type": "string" + } + ], + "mode": "normal", + "rowkeyColumn": [ + { + "index": 0, + "type": "string" + }, + { + "index": 3, + "type": "string" + }, + { + "index": 2, + "type": "string" + }, + { + "index": 1, + "type": "string" + } + ], + "table": "htable3", + "batchSize": "200", + "dbName": "database", + "jdbcUrl": "jdbc:mysql://ip:port/database?" + } + } + } + ] + } +} +``` +##### 参数解释 + +- **connection** + +公有云和私有云需要配置的信息不同,具体如下: +公有云: + +- 数据库用户名;(在外层统一配置) +- 用户密码;(在外层统一配置) +- proxy的jdbc地址 +- 数据库名称; + +私有云: + +- 数据库用户名;(在外层统一配置) +- 用户密码;(在外层统一配置) +- proxy的jdbc地址 +- obSysUser:sys租户的用户名; +- obSysPass:sys租户的密码; +- configUrl; + - 描述:可以通过show parameters like 'obConfigUrl' 获得。 + - 必须:是 + - 默认值:无 +- **jdbcUrl** + - 描述:连接ob使用的jdbc url,支持如下两种格式: + - jdbc:mysql://obproxyIp:obproxyPort/db + - 此格式下username需要写成三段式格式 + - ||_dsc_ob10_dsc_||集群名:租户名||_dsc_ob10_dsc_||jdbc:mysql://obproxyIp:obproxyPort/db + - 此格式下username仅填写用户名本身,无需三段式写法 + - 必选:是 + - 默认值:无 +- **table** + - 描述:所选取的需要同步的表。无需增加列族信息。 + - 必选:是 + - 默认值:无 +- **username** + - 描述:访问OceanBase的用户名 + - 必选:是 + - 默认值:无 +- **useOdpMode** + - 描述:是否通过proxy连接。无法提供sys租户帐密时需要设置为true + - 必须:否 + - 默认值:false +- **column** + - 描述:要写入的hbase字段。index:指定该列对应reader端column的索引,从0开始;name:指定hbase表中的列,必须为 列族:列名 的格式;type:指定写入数据类型,用于转换HBase byte[]。配置格式如下: +```json +"column": [ { "index":1, "name": "cf1:q1", "type": "string" }, { "index":2, "name": "cf1:q2", "type": "string" } ] +``` + +- 必选:是 + - 默认值:无 +- **rowkeyColumn** + - 描述:要写入的ObHbase的rowkey列。index:指定该列对应reader端column的索引,从0开始,若为常量index为-1;type:指定写入数据类型,用于转换HBase byte[];value:配置常量,常作为多个字段的拼接符。obhbasewriter会将rowkeyColumn中所有列按照配置顺序进行拼接作为写入hbase的rowkey,不能全为常量。配置格式如下: +```json +"rowkeyColumn": [ { "index":0, "type":"string" }, { "index":-1, "type":"string", "value":"_" } ] +``` + +- 必选:是 + - 默认值:无 +- **versionColumn** + - 描述:指定写入obhbase的时间戳。支持:当前时间、指定时间列,指定时间,三者选一。若不配置表示用当前时间。index:指定对应reader端column的索引,从0开始,需保证能转换为long,若是Date类型,会尝试用yyyy-MM-dd HH:mm:ss和yyyy-MM-dd HH:mm:ss SSS去解析;若为指定时间index为-1;value:指定时间的值,long值。配置格式如下: +```json +"versionColumn":{ "index":1 } +``` +或者 +```json +"versionColumn":{ "index":-1, "value":123456789 } +``` + +- 必选:否 +- 默认值:无 + + + diff --git a/obhbasewriter/pom.xml b/obhbasewriter/pom.xml new file mode 100644 index 0000000000..b6a29f92c2 --- /dev/null +++ b/obhbasewriter/pom.xml @@ -0,0 +1,181 @@ + + + datax-all + com.alibaba.datax + 0.0.1-SNAPSHOT + + 4.0.0 + + obhbasewriter + + com.alibaba.datax + 0.0.1-SNAPSHOT + + + + com.alibaba.datax + datax-common + ${datax-project-version} + + + slf4j-log4j12 + org.slf4j + + + + + com.alibaba.datax + plugin-rdbms-util + ${datax-project-version} + + + guava + com.google.guava + + + + + org.slf4j + slf4j-api + + + ch.qos.logback + logback-classic + + + org.springframework + spring-test + 4.0.4.RELEASE + test + + + + + com.google.guava + guava + 33.1.0-jre + + + + + + log4j + log4j + 1.2.16 + + + + org.json + json + 20160810 + + + junit + junit + 4.11 + test + + + org.powermock + powermock-module-junit4 + 1.4.10 + test + + + org.powermock + powermock-api-mockito + 1.4.10 + test + + + org.mockito + mockito-core + 1.8.5 + test + + + + com.oceanbase + obkv-hbase-client + 0.1.4.2 + + + guava + com.google.guava + + + + + + org.apache.hadoop + hadoop-core + 1.0.3 + + + + + + + + src/main/java + + **/*.properties + + + + + + + maven-compiler-plugin + + ${jdk-version} + ${jdk-version} + ${project-sourceEncoding} + + + + + maven-assembly-plugin + + + src/main/assembly/package.xml + + datax + + + + dwzip + package + + single + + + + + + + diff --git a/obhbasewriter/src/main/assembly/package.xml b/obhbasewriter/src/main/assembly/package.xml new file mode 100644 index 0000000000..fd05bea3ca --- /dev/null +++ b/obhbasewriter/src/main/assembly/package.xml @@ -0,0 +1,35 @@ + + + + dir + + false + + + src/main/resources + + plugin.json + plugin_job_template.json + + plugin/writer/obhbasewriter + + + target/ + + obhbasewriter-0.0.1-SNAPSHOT.jar + + plugin/writer/obhbasewriter + + + + + + false + plugin/writer/obhbasewriter/libs + runtime + + + diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ColumnType.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ColumnType.java new file mode 100755 index 0000000000..e451071e80 --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ColumnType.java @@ -0,0 +1,50 @@ +package com.alibaba.datax.plugin.writer.obhbasewriter; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.MessageSource; +import java.util.Arrays; + + + +import org.apache.commons.lang.StringUtils; + +/** + * 只对 normal 模式读取时有用,多版本读取时,不存在列类型的 + */ +public enum ColumnType { + STRING("string"), + BINARY_STRING("binarystring"), + BYTES("bytes"), + BOOLEAN("boolean"), + SHORT("short"), + INT("int"), + LONG("long"), + FLOAT("float"), + DOUBLE("double"), + DATE("date"), + BINARY("binary"); + + private String typeName; + + ColumnType(String typeName) { + this.typeName = typeName; + } + + public static ColumnType getByTypeName(String typeName) { + if (StringUtils.isBlank(typeName)) { + throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MessageSource.loadResourceBundle(ColumnType.class).message("columntype.1", typeName, Arrays.asList(values()))); + } + for (ColumnType columnType : values()) { + if (StringUtils.equalsIgnoreCase(columnType.typeName, typeName.trim())) { + return columnType; + } + } + + throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MessageSource.loadResourceBundle(ColumnType.class).message("columntype.1", typeName, Arrays.asList(values()))); + } + + @Override + public String toString() { + return this.typeName; + } +} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/Config.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/Config.java new file mode 100644 index 0000000000..2a37d5d5ea --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/Config.java @@ -0,0 +1,42 @@ +package com.alibaba.datax.plugin.writer.obhbasewriter; + +public interface Config { + + String MEMSTORE_THRESHOLD = "memstoreThreshold"; + + double DEFAULT_MEMSTORE_THRESHOLD = 0.9d; + + String MEMSTORE_CHECK_INTERVAL_SECOND = "memstoreCheckIntervalSecond"; + + long DEFAULT_MEMSTORE_CHECK_INTERVAL_SECOND = 30; + + String FAIL_TRY_COUNT = "failTryCount"; + + int DEFAULT_FAIL_TRY_COUNT = 10000; + + String WRITER_THREAD_COUNT = "writerThreadCount"; + + int DEFAULT_WRITER_THREAD_COUNT = 5; + + String CONCURRENT_WRITE = "concurrentWrite"; + + boolean DEFAULT_CONCURRENT_WRITE = true; + + String RS_URL = "rsUrl"; + + String OB_VERSION = "obVersion"; + + String TIMEOUT = "timeout"; + + String PRINT_COST = "printCost"; + + boolean DEFAULT_PRINT_COST = false; + + String COST_BOUND = "costBound"; + + long DEFAULT_COST_BOUND = 20; + + String MAX_ACTIVE_CONNECTION = "maxActiveConnection"; + + int DEFAULT_MAX_ACTIVE_CONNECTION = 2000; +} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ConfigKey.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ConfigKey.java new file mode 100755 index 0000000000..bd06524f3a --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ConfigKey.java @@ -0,0 +1,78 @@ +package com.alibaba.datax.plugin.writer.obhbasewriter; + +public final class ConfigKey { + + public final static String HBASE_CONFIG = "hbaseConfig"; + + public final static String TABLE = "table"; + + public final static String DBNAME = "dbName"; + + public final static String OBCONFIG_URL = "obConfigUrl"; + + public final static String JDBC_URL = "jdbcUrl"; + /** + * mode 可以取 normal 或者 multiVersionFixedColumn 或者 multiVersionDynamicColumn 三个值,无默认值。 + * + * normal 配合 column(Map 结构的)使用 + * + * multiVersion + */ + public final static String MODE = "mode"; + + public final static String ROWKEY_COLUMN = "rowkeyColumn"; + + public final static String VERSION_COLUMN = "versionColumn"; + + /** + * 默认为 utf8 + */ + public final static String ENCODING = "encoding"; + + public final static String COLUMN = "column"; + + public static final String INDEX = "index"; + + public static final String NAME = "name"; + + public static final String TYPE = "type"; + + public static final String VALUE = "value"; + + public static final String FORMAT = "format"; + + /** + * 默认为 EMPTY_BYTES + */ + public static final String NULL_MODE = "nullMode"; + + public static final String TRUNCATE = "truncate"; + + public static final String AUTO_FLUSH = "autoFlush"; + + public static final String WAL_FLAG = "walFlag"; + + public static final String WRITE_BUFFER_SIZE = "writeBufferSize"; + + public static final String MAX_RETRY_COUNT = "maxRetryCount"; + + public static final String USE_ODP_MODE = "useOdpMode"; + + public static final String OB_SYS_USER = "obSysUser"; + + public static final String OB_SYS_PASSWORD = "obSysPassword"; + + public static final String ODP_HOST = "odpHost"; + + public static final String ODP_PORT = "odpPort"; + + public static final String OBHBASE_HTABLE_CLIENT_WRITE_BUFFER = "obhbaseClientWriteBuffer"; + + public static final String OBHBASE_HTABLE_PUT_WRITE_BUFFER_CHECK = "obhbaseHtablePutWriteBufferCheck"; + + public static final String WRITE_BUFFER_LOW_MARK = "writeBufferLowMark"; + + public static final String WRITE_BUFFER_HIGH_MARK = "writeBufferHighMark"; + + public static final String TABLE_CLIENT_RPC_EXECUTE_TIMEOUT = "rpcExecuteTimeout"; +} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ConfigValidator.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ConfigValidator.java new file mode 100644 index 0000000000..22224d4b2c --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ConfigValidator.java @@ -0,0 +1,110 @@ +package com.alibaba.datax.plugin.writer.obhbasewriter; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.common.util.MessageSource; +import com.alibaba.datax.plugin.rdbms.writer.Key; + +import java.nio.charset.Charset; +import java.util.List; + +/** + * Created by johnxu.xj on Sept 30 2018 + */ +public class ConfigValidator { + private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(ConfigValidator.class); + + public static void validateParameter(com.alibaba.datax.common.util.Configuration originalConfig) { + originalConfig.getNecessaryValue(Key.USERNAME, Hbase094xWriterErrorCode.REQUIRED_VALUE); + originalConfig.getNecessaryValue(Key.PASSWORD, Hbase094xWriterErrorCode.REQUIRED_VALUE); +// originalConfig.getNecessaryValue(ConfigKey.OBCONFIG_URL, Hbase094xWriterErrorCode.REQUIRED_VALUE); + originalConfig.getNecessaryValue(ConfigKey.TABLE, Hbase094xWriterErrorCode.REQUIRED_VALUE); + originalConfig.getNecessaryValue(ConfigKey.DBNAME, Hbase094xWriterErrorCode.REQUIRED_VALUE); + + ConfigValidator.validateMode(originalConfig); + + String encoding = originalConfig.getString(ConfigKey.ENCODING, Constant.DEFAULT_ENCODING); + if (!Charset.isSupported(encoding)) { + throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.9", encoding)); + } + originalConfig.set(ConfigKey.ENCODING, encoding); + } + + public static void validateMode(com.alibaba.datax.common.util.Configuration originalConfig) { + String mode = originalConfig.getNecessaryValue(ConfigKey.MODE, Hbase094xWriterErrorCode.REQUIRED_VALUE); + ModeType modeType = ModeType.getByTypeName(mode); + if (ModeType.Normal.equals(modeType)) { + validateRowkeyColumn(originalConfig); + validateColumn(originalConfig); + validateVersionColumn(originalConfig); + } + + if (originalConfig.getBool(ConfigKey.USE_ODP_MODE)) { + originalConfig.getNecessaryValue(ConfigKey.ODP_HOST, Hbase094xWriterErrorCode.REQUIRED_VALUE); + originalConfig.getNecessaryValue(ConfigKey.ODP_PORT, Hbase094xWriterErrorCode.REQUIRED_VALUE); + } else { + originalConfig.getNecessaryValue(ConfigKey.OBCONFIG_URL, Hbase094xWriterErrorCode.REQUIRED_VALUE); + originalConfig.getNecessaryValue(ConfigKey.OB_SYS_USER, Hbase094xWriterErrorCode.REQUIRED_VALUE); + } + } + + public static void validateColumn(com.alibaba.datax.common.util.Configuration originalConfig) { + List columns = originalConfig.getListConfiguration(ConfigKey.COLUMN); + if (columns == null || columns.isEmpty()) { + throw DataXException.asDataXException(Hbase094xWriterErrorCode.REQUIRED_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.11")); + } + for (Configuration aColumn : columns) { + Integer index = aColumn.getInt(ConfigKey.INDEX); + String type = aColumn.getNecessaryValue(ConfigKey.TYPE, Hbase094xWriterErrorCode.REQUIRED_VALUE); + String name = aColumn.getNecessaryValue(ConfigKey.NAME, Hbase094xWriterErrorCode.REQUIRED_VALUE); + ColumnType.getByTypeName(type); + if (name.split(":").length != 2) { + throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.12", name)); + } + if (index == null || index < 0) { + throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.13")); + } + } + } + + public static void validateRowkeyColumn(com.alibaba.datax.common.util.Configuration originalConfig) { + List rowkeyColumn = originalConfig.getListConfiguration(ConfigKey.ROWKEY_COLUMN); + if (rowkeyColumn == null || rowkeyColumn.isEmpty()) { + throw DataXException.asDataXException(Hbase094xWriterErrorCode.REQUIRED_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.14")); + } + int rowkeyColumnSize = rowkeyColumn.size(); + //包含{"index":0,"type":"string"} 或者 {"index":-1,"type":"string","value":"_"} + for (Configuration aRowkeyColumn : rowkeyColumn) { + Integer index = aRowkeyColumn.getInt(ConfigKey.INDEX); + String type = aRowkeyColumn.getNecessaryValue(ConfigKey.TYPE, Hbase094xWriterErrorCode.REQUIRED_VALUE); + ColumnType.getByTypeName(type); + if (index == null) { + throw DataXException.asDataXException(Hbase094xWriterErrorCode.REQUIRED_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.15")); + } + //不能只有-1列,即rowkey连接串 + if (rowkeyColumnSize == 1 && index == -1) { + throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.16")); + } + if (index == -1) { + aRowkeyColumn.getNecessaryValue(ConfigKey.VALUE, Hbase094xWriterErrorCode.REQUIRED_VALUE); + } + } + } + + public static void validateVersionColumn(com.alibaba.datax.common.util.Configuration originalConfig) { + Configuration versionColumn = originalConfig.getConfiguration(ConfigKey.VERSION_COLUMN); + //为null,表示用当前时间;指定列,需要index + if (versionColumn != null) { + Integer index = versionColumn.getInt(ConfigKey.INDEX); + if (index == null) { + throw DataXException.asDataXException(Hbase094xWriterErrorCode.REQUIRED_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.17")); + } + if (index == -1) { + //指定时间,需要index=-1,value + versionColumn.getNecessaryValue(ConfigKey.VALUE, Hbase094xWriterErrorCode.REQUIRED_VALUE); + } else if (index < 0) { + throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.18")); + } + } + } +} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/Constant.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/Constant.java new file mode 100755 index 0000000000..910855953d --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/Constant.java @@ -0,0 +1,27 @@ +package com.alibaba.datax.plugin.writer.obhbasewriter; + +import ch.qos.logback.classic.Level; + +public final class Constant { + public static final String DEFAULT_ENCODING = "UTF-8"; + public static final String DEFAULT_DATA_FORMAT = "yyyy-MM-dd HH:mm:ss"; + public static final String DEFAULT_NULL_MODE = "skip"; + public static final long DEFAULT_WRITE_BUFFER_SIZE = 8 * 1024 * 1024; + public static final long DEFAULT_MEMSTORE_CHECK_INTERVAL_SECOND = 30; + public static final double DEFAULT_MEMSTORE_THRESHOLD = 0.9d; + public static final int DEFAULT_FAIL_TRY_COUNT = 10000; + public static final String OB_TABLE_CLIENT_PROPERTY = "logging.path.com.alipay.oceanbase-table-client"; + public static final String OB_TABLE_HBASE_PROPERTY = "logging.path.com.alipay.oceanbase-table-hbase"; + public static final String OB_TABLE_CLIENT_LOG_LEVEL = "logging.level.oceanbase-table-client"; + public static final String OB_TABLE_HBASE_LOG_LEVEL = "logging.level.oceanbase-table-hbase"; + public static final String OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL = "logging.level.com.alipay.oceanbase-table-client"; + public static final String OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL = "logging.level.com.alipay.oceanbase-table-hbase"; + public static final String OB_HBASE_LOG_PATH = System.getProperty("datax.home") + "/log/"; + public static final String DEFAULT_OB_TABLE_CLIENT_LOG_LEVEL = Level.OFF.toString(); + public static final String DEFAULT_OB_TABLE_HBASE_LOG_LEVEL = Level.OFF.toString(); + public static final String DEFAULT_NETTY_BUFFER_LOW_WATERMARK = Integer.toString(512 * 1024); + public static final String DEFAULT_NETTY_BUFFER_HIGH_WATERMARK = Integer.toString(1024 * 1024); + public static final String DEFAULT_HBASE_HTABLE_CLIENT_WRITE_BUFFER = "2097152"; + public static final String DEFAULT_HBASE_HTABLE_PUT_WRITE_BUFFER_CHECK = "10"; + public static final String DEFAULT_RPC_EXECUTE_TIMEOUT = "3000"; +} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/Hbase094xWriterErrorCode.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/Hbase094xWriterErrorCode.java new file mode 100644 index 0000000000..08529c378c --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/Hbase094xWriterErrorCode.java @@ -0,0 +1,44 @@ +package com.alibaba.datax.plugin.writer.obhbasewriter; + +import com.alibaba.datax.common.spi.ErrorCode; +import com.alibaba.datax.common.util.MessageSource; + +/** + * Created by shf on 16/3/8. + */ +public enum Hbase094xWriterErrorCode implements ErrorCode { + REQUIRED_VALUE("Hbasewriter-00", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.required_value")), + ILLEGAL_VALUE("Hbasewriter-01", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.illegal_value")), + GET_HBASE_CONFIG_ERROR("Hbasewriter-02", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.get_hbase_config_error")), + GET_HBASE_TABLE_ERROR("Hbasewriter-03", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.get_hbase_table_error")), + CLOSE_HBASE_AMIN_ERROR("Hbasewriter-05", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.close_hbase_amin_error")), + CLOSE_HBASE_TABLE_ERROR("Hbasewriter-06", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.close_hbase_table_error")), + PUT_HBASE_ERROR("Hbasewriter-07", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.put_hbase_error")), + DELETE_HBASE_ERROR("Hbasewriter-08", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.delete_hbase_error")), + TRUNCATE_HBASE_ERROR("Hbasewriter-09", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.truncate_hbase_error")), + CONSTRUCT_ROWKEY_ERROR("Hbasewriter-10", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.construct_rowkey_error")), + CONSTRUCT_VERSION_ERROR("Hbasewriter-11", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.construct_version_error")), + INIT_ERROR("Hbasewriter-12", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.init_error")); + private final String code; + private final String description; + + private Hbase094xWriterErrorCode(String code, String description) { + this.code = code; + this.description = description; + } + + @Override + public String getCode() { + return this.code; + } + + @Override + public String getDescription() { + return this.description; + } + + @Override + public String toString() { + return String.format("Code:[%s], Description:[%s].", this.code, this.description); + } +} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/LocalStrings.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/LocalStrings.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/LocalStrings_en_US.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/LocalStrings_en_US.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/LocalStrings_ja_JP.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/LocalStrings_ja_JP.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/LocalStrings_zh_CN.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/LocalStrings_zh_CN.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/LocalStrings_zh_HK.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/LocalStrings_zh_HK.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/LocalStrings_zh_TW.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/LocalStrings_zh_TW.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ModeType.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ModeType.java new file mode 100644 index 0000000000..592a59a88f --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ModeType.java @@ -0,0 +1,30 @@ +package com.alibaba.datax.plugin.writer.obhbasewriter; + +import java.util.Arrays; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.MessageSource; + +public enum ModeType { + Normal("normal"), + MultiVersion("multiVersion"); + + private String mode; + + ModeType(String mode) { + this.mode = mode.toLowerCase(); + } + + public String getMode() { + return mode; + } + + public static ModeType getByTypeName(String modeName) { + for (ModeType modeType : values()) { + if (modeType.mode.equalsIgnoreCase(modeName)) { + return modeType; + } + } + throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MessageSource.loadResourceBundle(ModeType.class).message("modetype.1", modeName, Arrays.asList(values()))); + } +} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/NullModeType.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/NullModeType.java new file mode 100644 index 0000000000..6514a1a44a --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/NullModeType.java @@ -0,0 +1,30 @@ +package com.alibaba.datax.plugin.writer.obhbasewriter; + +import java.util.Arrays; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.MessageSource; + +public enum NullModeType { + Skip("skip"), + Empty("empty"); + + private String mode; + + NullModeType(String mode) { + this.mode = mode.toLowerCase(); + } + + public String getMode() { + return mode; + } + + public static NullModeType getByTypeName(String modeName) { + for (NullModeType modeType : values()) { + if (modeType.mode.equalsIgnoreCase(modeName)) { + return modeType; + } + } + throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MessageSource.loadResourceBundle(NullModeType.class).message("nullmodetype.1", modeName, Arrays.asList(values()))); + } +} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ObHTableInfo.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ObHTableInfo.java new file mode 100644 index 0000000000..80b15ae9cd --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ObHTableInfo.java @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2021 OceanBase ob-loader-dumper is licensed under Mulan PSL v2. You can use this software according to + * the terms and conditions of the Mulan PSL v2. You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING + * BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. See the Mulan PSL v2 for more + * details. + */ +package com.alibaba.datax.plugin.writer.obhbasewriter; + +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.reader.Key; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import org.apache.commons.lang3.tuple.Triple; + +/** + * @author cjyyz + * @date 2023/03/24 + * @since + */ +public class ObHTableInfo { + + /** + * 不带列族的表名,用于构建OHTable + */ + String tableName; + + /** + * 带列族的表名,用于分区计算 + */ + String fullHbaseTableName; + + NullModeType nullModeType; + + String encoding; + + List columns; + + /** + * 记录配置文件中的columns的列族名,字段名,字段类型,避免每次执行插入都解析 + * Triple left : 列族名;middle : 字段名;right:字段类型 + */ + LinkedHashMap> indexColumnInfoMap; + + /** + * 记录配置文件中rowKey的Index,常量值,字段类型,避免每次执行插入都解析 + * Triple left : Index;middle : 常量值;right:字段类型 + */ + List> rowKeyElementList; + + public ObHTableInfo(Configuration configuration) { + this.nullModeType = NullModeType.getByTypeName(configuration.getString(ConfigKey.NULL_MODE, Constant.DEFAULT_NULL_MODE)); + this.encoding = configuration.getString(ConfigKey.ENCODING, Constant.DEFAULT_ENCODING); + this.columns = configuration.getListConfiguration(ConfigKey.COLUMN); + this.indexColumnInfoMap = new LinkedHashMap<>(); + configuration.getListConfiguration(ConfigKey.COLUMN).forEach(e -> { + String[] name = e.getString(ConfigKey.NAME).split(":"); + indexColumnInfoMap.put(e.getInt(ConfigKey.INDEX), Triple.of(name[0], name[1], ColumnType.getByTypeName(e.getString(ConfigKey.TYPE))) + ); + }); + + this.rowKeyElementList = new ArrayList<>(); + configuration.getListConfiguration(ConfigKey.ROWKEY_COLUMN).forEach(e -> { + Integer index = e.getInt(ConfigKey.INDEX); + String constantValue = e.getString(ConfigKey.VALUE); + ColumnType columnType = ColumnType.getByTypeName(e.getString(ConfigKey.TYPE)); + rowKeyElementList.add(Triple.of(index, constantValue, columnType)); + + }); + + this.tableName = configuration.getString(Key.TABLE); + this.fullHbaseTableName = tableName; + if (!fullHbaseTableName.contains("$")) { + String name = columns.get(0).getString(ConfigKey.NAME); + String familyName = name.split(":")[0]; + fullHbaseTableName = fullHbaseTableName + "$" + familyName; + } + } + + public String getTableName() { + return tableName; + } + + public String getFullHbaseTableName() { + return fullHbaseTableName; + } + + public NullModeType getNullModeType() { + return nullModeType; + } + + public String getEncoding() { + return encoding; + } + + public Map> getIndexColumnInfoMap() { + return indexColumnInfoMap; + } + + public List> getRowKeyElementList() { + return rowKeyElementList; + } +} \ No newline at end of file diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ObHbaseWriter.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ObHbaseWriter.java new file mode 100644 index 0000000000..555ce83638 --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ObHbaseWriter.java @@ -0,0 +1,267 @@ +package com.alibaba.datax.plugin.writer.obhbasewriter; + +import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.DEFAULT_OB_TABLE_CLIENT_LOG_LEVEL; +import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.DEFAULT_OB_TABLE_HBASE_LOG_LEVEL; +import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL; +import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL; +import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.OB_HBASE_LOG_PATH; +import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.OB_TABLE_CLIENT_LOG_LEVEL; +import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.OB_TABLE_CLIENT_PROPERTY; +import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.OB_TABLE_HBASE_LOG_LEVEL; +import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.OB_TABLE_HBASE_PROPERTY; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.spi.Writer; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.reader.util.ObVersion; +import com.alibaba.datax.plugin.rdbms.util.DBUtil; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; +import com.alibaba.datax.plugin.rdbms.writer.CommonRdbmsWriter; +import com.alibaba.datax.plugin.rdbms.writer.Key; +import com.alibaba.datax.plugin.writer.obhbasewriter.ext.ServerConnectInfo; +import com.alibaba.datax.plugin.writer.obhbasewriter.task.ObHBaseWriteTask; +import com.google.common.base.Preconditions; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.util.List; +import java.util.concurrent.TimeUnit; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; + +/** + * + */ +public class ObHbaseWriter extends Writer { + /** + * Job 中的方法仅执行一次,Task 中方法会由框架启动多个 Task 线程并行执行。 + * + * 整个 Writer 执行流程是: + * + * + * Job类init-->prepare-->split + * + * Task类init-->prepare-->startWrite-->post-->destroy + * Task类init-->prepare-->startWrite-->post-->destroy + * + * Job类post-->destroy + * + */ + public static class Job extends Writer.Job { + private Configuration originalConfig = null; + private static final Logger LOG = LoggerFactory.getLogger(Job.class); + + /** + * 注意:此方法仅执行一次。 最佳实践:通常在这里对用户的配置进行校验:是否缺失必填项?有无错误值?有没有无关配置项?... + * 并给出清晰的报错/警告提示。校验通常建议采用静态工具类进行,以保证本类结构清晰。 + */ + @Override + public void init() { + if (System.getProperty(OB_TABLE_CLIENT_PROPERTY) == null) { + LOG.info(OB_TABLE_CLIENT_PROPERTY + " not set"); + System.setProperty(OB_TABLE_CLIENT_PROPERTY, OB_HBASE_LOG_PATH); + } + if (System.getProperty(OB_TABLE_HBASE_PROPERTY) == null) { + LOG.info(OB_TABLE_HBASE_PROPERTY + " not set"); + System.setProperty(OB_TABLE_HBASE_PROPERTY, OB_HBASE_LOG_PATH); + } + if (System.getProperty(OB_TABLE_CLIENT_LOG_LEVEL) == null) { + LOG.info(OB_TABLE_CLIENT_LOG_LEVEL + " not set"); + System.setProperty(OB_TABLE_CLIENT_LOG_LEVEL, DEFAULT_OB_TABLE_CLIENT_LOG_LEVEL); + } + if (System.getProperty(OB_TABLE_HBASE_LOG_LEVEL) == null) { + LOG.info(OB_TABLE_HBASE_LOG_LEVEL + " not set"); + System.setProperty(OB_TABLE_HBASE_LOG_LEVEL, DEFAULT_OB_TABLE_HBASE_LOG_LEVEL); + } + if (System.getProperty(OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL) == null) { + LOG.info(OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL + " not set"); + System.setProperty(OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL, DEFAULT_OB_TABLE_CLIENT_LOG_LEVEL); + } + if (System.getProperty(OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL) == null) { + LOG.info(OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL + " not set"); + System.setProperty(OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL, DEFAULT_OB_TABLE_HBASE_LOG_LEVEL); + } + + LOG.info("{} is set to {}, {} is set to {}", + OB_TABLE_CLIENT_PROPERTY, OB_HBASE_LOG_PATH, OB_TABLE_HBASE_PROPERTY, OB_HBASE_LOG_PATH); + this.originalConfig = super.getPluginJobConf(); + boolean useOdpMode = originalConfig.getBool(ConfigKey.USE_ODP_MODE, false); + String configUrl = originalConfig.getString(ConfigKey.OBCONFIG_URL, null); + String jdbcUrl = originalConfig.getString(ConfigKey.JDBC_URL, null); + jdbcUrl = DataBaseType.MySql.appendJDBCSuffixForReader(jdbcUrl); + String user = originalConfig.getString(Key.USERNAME, null); + String password = originalConfig.getString(Key.PASSWORD); + ServerConnectInfo serverConnectInfo = new ServerConnectInfo(jdbcUrl, user, password); + if (useOdpMode) { + originalConfig.set(ConfigKey.ODP_HOST, serverConnectInfo.host); + originalConfig.set(ConfigKey.ODP_PORT, serverConnectInfo.port); + } else if (StringUtils.isBlank(configUrl)) { + serverConnectInfo.setSysUser(originalConfig.getString(ConfigKey.OB_SYS_USER)); + serverConnectInfo.setSysPass(originalConfig.getString(ConfigKey.OB_SYS_PASSWORD)); + try { + originalConfig.set(ConfigKey.OBCONFIG_URL, queryRsUrl(serverConnectInfo)); + originalConfig.set(ConfigKey.OB_SYS_USER, serverConnectInfo.sysUser); + originalConfig.set(ConfigKey.OB_SYS_PASSWORD, serverConnectInfo.sysPass); + LOG.info("fetch configUrl success, configUrl is {}", configUrl); + } catch (Exception e) { + LOG.error("fail to get configure url: " + e.getMessage()); + throw DataXException.asDataXException(Hbase094xWriterErrorCode.REQUIRED_VALUE, "Missing obConfigUrl"); + } + } + if (StringUtils.isBlank(originalConfig.getString(ConfigKey.DBNAME))) { + originalConfig.set(ConfigKey.DBNAME, serverConnectInfo.databaseName); + } + ConfigValidator.validateParameter(this.originalConfig); + } + + private String queryRsUrl(ServerConnectInfo serverInfo) { + String configUrl = originalConfig.getString(ConfigKey.OBCONFIG_URL, null); + if (configUrl == null) { + try { + Connection conn = null; + int retry = 0; + final String sysJDBCUrl = serverInfo.jdbcUrl.replace(serverInfo.databaseName, "oceanbase"); + do { + try { + if (retry > 0) { + int sleep = retry > 9 ? 500 : 1 << retry; + try { + TimeUnit.SECONDS.sleep(sleep); + } catch (InterruptedException e) { + } + LOG.warn("retry fetch RsUrl the {} times", retry); + } + conn = DBUtil.getConnection(DataBaseType.OceanBase, sysJDBCUrl, serverInfo.sysUser, serverInfo.sysPass); + String sql = "show parameters like 'obconfig_url'"; + LOG.info("query param: {}", sql); + PreparedStatement stmt = conn.prepareStatement(sql); + ResultSet result = stmt.executeQuery(); + if (result.next()) { + configUrl = result.getString("Value"); + } + if (StringUtils.isNotBlank(configUrl)) { + break; + } + } catch (Exception e) { + ++retry; + LOG.warn("fetch root server list(rsList) error {}", e.getMessage()); + } finally { + DBUtil.closeDBResources(null, conn); + } + } while (retry < 3); + + LOG.info("configure url is: " + configUrl); + originalConfig.set(ConfigKey.OBCONFIG_URL, configUrl); + } catch (Exception e) { + LOG.error("Fail to get configure url: {}", e.getMessage(), e); + throw DataXException.asDataXException(Hbase094xWriterErrorCode.REQUIRED_VALUE, "未配置obConfigUrl,且无法获取obConfigUrl"); + } + } + return configUrl; + } + + /** + * 注意:此方法仅执行一次。 最佳实践:如果 Job 中有需要进行数据同步之前的处理,可以在此处完成,如果没有必要则可以直接去掉。 + */ + // 一般来说,是需要推迟到 task 中进行pre 的执行(单表情况例外) + @Override + public void prepare() { + } + + /** + * 注意:此方法仅执行一次。 最佳实践:通常采用工具静态类完成把 Job 配置切分成多个 Task 配置的工作。 这里的 + * mandatoryNumber 是强制必须切分的份数。 + */ + @Override + public List split(int mandatoryNumber) { + // This function does not need any change. + Configuration simplifiedConf = this.originalConfig; + + List splitResultConfigs = new ArrayList(); + for (int j = 0; j < mandatoryNumber; j++) { + splitResultConfigs.add(simplifiedConf.clone()); + } + return splitResultConfigs; + } + + /** + * 注意:此方法仅执行一次。 最佳实践:如果 Job 中有需要进行数据同步之后的后续处理,可以在此处完成。 + */ + @Override + public void post() { + // No post supported + } + + /** + * 注意:此方法仅执行一次。 最佳实践:通常配合 Job 中的 post() 方法一起完成 Job 的资源释放。 + */ + @Override + public void destroy() { + + } + } + + public static class Task extends Writer.Task { + private Configuration taskConfig; + private CommonRdbmsWriter.Task writerTask; + + /** + * 注意:此方法每个 Task 都会执行一次。 最佳实践:此处通过对 taskConfig 配置的读取,进而初始化一些资源为 + * startWrite()做准备。 + */ + @Override + public void init() { + this.taskConfig = super.getPluginJobConf(); + String mode = this.taskConfig.getString(ConfigKey.MODE); + ModeType modeType = ModeType.getByTypeName(mode); + + switch (modeType) { + case Normal: + try { + this.writerTask = new ObHBaseWriteTask(this.taskConfig); + } catch (Exception e) { + throw DataXException.asDataXException(Hbase094xWriterErrorCode.INIT_ERROR, "ObHbase writer init error:" + e.getMessage()); + } + break; + default: + throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, "ObHbase not support this mode type:" + modeType); + } + } + + /** + * 注意:此方法每个 Task 都会执行一次。 最佳实践:如果 Task + * 中有需要进行数据同步之前的处理,可以在此处完成,如果没有必要则可以直接去掉。 + */ + @Override + public void prepare() { + this.writerTask.prepare(taskConfig); + } + + /** + * 注意:此方法每个 Task 都会执行一次。 最佳实践:此处适当封装确保简洁清晰完成数据写入工作。 + */ + public void startWrite(RecordReceiver recordReceiver) { + this.writerTask.startWrite(recordReceiver, taskConfig, super.getTaskPluginCollector()); + } + + /** + * 注意:此方法每个 Task 都会执行一次。 最佳实践:如果 Task 中有需要进行数据同步之后的后续处理,可以在此处完成。 + */ + @Override + public void post() { + this.writerTask.post(taskConfig); + } + + /** + * 注意:此方法每个 Task 都会执行一次。 最佳实践:通常配合Task 中的 post() 方法一起完成 Task 的资源释放。 + */ + @Override + public void destroy() { + this.writerTask.destroy(taskConfig); + } + } +} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings.properties new file mode 100644 index 0000000000..63a53efab7 --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings.properties @@ -0,0 +1 @@ +databasewriterbuffer.1=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684table\u4e0d\u5b58\u5728, \u7b97\u51fa\u7684tableName={0},db={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. \ No newline at end of file diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings_en_US.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings_en_US.properties new file mode 100644 index 0000000000..86b6a8b3b8 --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings_en_US.properties @@ -0,0 +1 @@ +databasewriterbuffer.1=The [table] calculated based on the rules does not exist. The calculated [tableName]={0}, [db]={1}. Please check the rules you configured. diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings_ja_JP.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings_ja_JP.properties new file mode 100644 index 0000000000..63a53efab7 --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings_ja_JP.properties @@ -0,0 +1 @@ +databasewriterbuffer.1=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684table\u4e0d\u5b58\u5728, \u7b97\u51fa\u7684tableName={0},db={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. \ No newline at end of file diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings_zh_CN.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings_zh_CN.properties new file mode 100644 index 0000000000..63a53efab7 --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings_zh_CN.properties @@ -0,0 +1 @@ +databasewriterbuffer.1=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684table\u4e0d\u5b58\u5728, \u7b97\u51fa\u7684tableName={0},db={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. \ No newline at end of file diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings_zh_HK.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings_zh_HK.properties new file mode 100644 index 0000000000..4940a177b6 --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings_zh_HK.properties @@ -0,0 +1 @@ +databasewriterbuffer.1=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684table\u4e0d\u5b58\u5728, \u7b97\u51fa\u7684tableName={0},db={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.databasewriterbuffer.1=通過規則計算出來的table不存在, 算出的tableName={0},db={1}, 請檢查您配置的規則. diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings_zh_TW.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings_zh_TW.properties new file mode 100644 index 0000000000..4940a177b6 --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/LocalStrings_zh_TW.properties @@ -0,0 +1 @@ +databasewriterbuffer.1=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684table\u4e0d\u5b58\u5728, \u7b97\u51fa\u7684tableName={0},db={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.databasewriterbuffer.1=通過規則計算出來的table不存在, 算出的tableName={0},db={1}, 請檢查您配置的規則. diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/ObDataSourceErrorCode.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/ObDataSourceErrorCode.java new file mode 100644 index 0000000000..5e656a6e41 --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/ObDataSourceErrorCode.java @@ -0,0 +1,30 @@ +package com.alibaba.datax.plugin.writer.obhbasewriter.ext; + +import com.alibaba.datax.common.spi.ErrorCode; + +public enum ObDataSourceErrorCode implements ErrorCode { + DESC("ObDataSourceError code", "connect error"); + + private final String code; + private final String describe; + + private ObDataSourceErrorCode(String code, String describe) { + this.code = code; + this.describe = describe; + } + + @Override + public String getCode() { + return this.code; + } + + @Override + public String getDescription() { + return this.describe; + } + + @Override + public String toString() { + return String.format("Code:[%s], Describe:[%s]. ", this.code, this.describe); + } +} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/ObHbaseTableHolder.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/ObHbaseTableHolder.java new file mode 100644 index 0000000000..7b74b9479c --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/ObHbaseTableHolder.java @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2021 OceanBase ob-loader-dumper is licensed under Mulan PSL v2. You can use this software according to + * the terms and conditions of the Mulan PSL v2. You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING + * BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. See the Mulan PSL v2 for more + * details. + */ +package com.alibaba.datax.plugin.writer.obhbasewriter.ext; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.plugin.writer.obhbasewriter.Hbase094xWriterErrorCode; +import com.alipay.oceanbase.hbase.OHTable; +import org.apache.hadoop.conf.Configuration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * @author cjyyz + * @date 2023/03/16 + * @since + */ +public class ObHbaseTableHolder { + private static final Logger LOG = LoggerFactory.getLogger(ObHbaseTableHolder.class); + + private Configuration configuration; + + private String hbaseTableName; + + private OHTable ohTable; + + public ObHbaseTableHolder(Configuration configuration, String hbaseTableName) { + this.configuration = configuration; + this.hbaseTableName = hbaseTableName; + } + + public OHTable getOhTable() { + try { + if (ohTable == null) { + ohTable = new OHTable(configuration, hbaseTableName); + } + return ohTable; + } catch (Exception e) { + LOG.error("build obHTable: {} failed. reason: {}", hbaseTableName, e.getMessage()); + throw DataXException.asDataXException(Hbase094xWriterErrorCode.GET_HBASE_TABLE_ERROR, Hbase094xWriterErrorCode.GET_HBASE_TABLE_ERROR.getDescription()); + } + } + + public void destroy() { + try { + if (ohTable != null) { + ohTable.close(); + } + } catch (Exception e) { + LOG.warn("error in closing htable: {}. Reason: {}", hbaseTableName, e.getMessage()); + } + } +} \ No newline at end of file diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/ServerConnectInfo.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/ServerConnectInfo.java new file mode 100644 index 0000000000..80c2d0d9f7 --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/ext/ServerConnectInfo.java @@ -0,0 +1,146 @@ +package com.alibaba.datax.plugin.writer.obhbasewriter.ext; + +import com.google.common.base.Preconditions; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import static org.apache.commons.lang3.StringUtils.EMPTY; + +public class ServerConnectInfo { + + public String clusterName; + public String tenantName; + // userName doesn't contain tenantName or clusterName + public String userName; + public String password; + public String databaseName; + public String ipPort; + public String jdbcUrl; + public String host; + public String port; + public boolean publicCloud; + public int rpcPort; + public String sysUser; + public String sysPass; + + /** + * + * @param jdbcUrl format is jdbc:oceanbase//ip:port + * @param username format is cluster:tenant:username or username@tenant#cluster or user@tenant or user + * @param password + */ + public ServerConnectInfo(final String jdbcUrl, final String username, final String password) { + this(jdbcUrl, username, password, null, null); + } + + public ServerConnectInfo(final String jdbcUrl, final String username, final String password, final String sysUser, final String sysPass) { + if (jdbcUrl.startsWith(com.alibaba.datax.plugin.rdbms.writer.Constant.OB10_SPLIT_STRING)) { + String[] ss = jdbcUrl.split(com.alibaba.datax.plugin.rdbms.writer.Constant.OB10_SPLIT_STRING_PATTERN); + Preconditions.checkArgument(ss.length == 3, "jdbc url format is not correct:" + jdbcUrl); + this.userName = username; + this.clusterName = ss[1].trim().split(":")[0]; + this.tenantName = ss[1].trim().split(":")[1]; + this.jdbcUrl = ss[2]; + } else { + this.jdbcUrl = jdbcUrl; + } + this.password = password; + this.sysUser = sysUser; + this.sysPass = sysPass; + parseJdbcUrl(jdbcUrl); + parseFullUserName(username); + } + + private void parseJdbcUrl(final String jdbcUrl) { + Pattern pattern = Pattern.compile("//([\\w\\.\\-]+:\\d+)/([\\w-]+)\\?"); + Matcher matcher = pattern.matcher(jdbcUrl); + if (matcher.find()) { + String ipPort = matcher.group(1); + String dbName = matcher.group(2); + this.ipPort = ipPort; + String[] hostPort = ipPort.split(":"); + this.host = hostPort[0]; + this.port = hostPort[1]; + this.databaseName = dbName; + this.publicCloud = host.endsWith("aliyuncs.com"); + } else { + throw new RuntimeException("Invalid argument:" + jdbcUrl); + } + } + + private void parseFullUserName(final String fullUserName) { + int tenantIndex = fullUserName.indexOf("@"); + int clusterIndex = fullUserName.indexOf("#"); + // 适用于jdbcUrl以||_dsc_ob10_dsc_开头的场景 + if (fullUserName.contains(":") && tenantIndex < 0) { + String[] names = fullUserName.split(":"); + if (names.length != 3) { + throw new RuntimeException("invalid argument: " + fullUserName); + } else { + this.clusterName = names[0]; + this.tenantName = names[1]; + this.userName = names[2]; + } + } else if (tenantIndex < 0) { + // 适用于short jdbcUrl,且username中不含租户名(主要是公有云场景,此场景下不计算分区) + this.userName = fullUserName; + this.clusterName = EMPTY; + this.tenantName = EMPTY; + } else { + // 适用于short jdbcUrl,且username中含租户名 + this.userName = fullUserName.substring(0, tenantIndex); + if (clusterIndex < 0) { + this.clusterName = EMPTY; + this.tenantName = fullUserName.substring(tenantIndex + 1); + } else { + this.clusterName = fullUserName.substring(clusterIndex + 1); + this.tenantName = fullUserName.substring(tenantIndex + 1, clusterIndex); + } + } + } + + @Override + public String toString() { + return "ServerConnectInfo{" + + "clusterName='" + clusterName + '\'' + + ", tenantName='" + tenantName + '\'' + + ", userName='" + userName + '\'' + + ", password='" + password + '\'' + + ", databaseName='" + databaseName + '\'' + + ", ipPort='" + ipPort + '\'' + + ", jdbcUrl='" + jdbcUrl + '\'' + + ", publicCloud=" + publicCloud + + ", rpcPort=" + rpcPort + + '}'; + } + + public String getFullUserName() { + StringBuilder builder = new StringBuilder(); + builder.append(userName); + if (publicCloud || (rpcPort != 0 && EMPTY.equals(clusterName))) { + return builder.toString(); + } + if (!EMPTY.equals(tenantName)) { + builder.append("@").append(tenantName); + } + + if (!EMPTY.equals(clusterName)) { + builder.append("#").append(clusterName); + } + if (EMPTY.equals(this.clusterName) && EMPTY.equals(this.tenantName)) { + return this.userName; + } + return builder.toString(); + } + + public void setRpcPort(int rpcPort) { + this.rpcPort = rpcPort; + } + + public void setSysUser(String sysUser) { + this.sysUser = sysUser; + } + + public void setSysPass(String sysPass) { + this.sysPass = sysPass; + } +} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings.properties new file mode 100644 index 0000000000..d41f6151e9 --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings.properties @@ -0,0 +1,21 @@ +multitablewritertask.1=\u914d\u7f6e\u7684tableList\u4e3a\u591a\u8868\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e +multitablewritertask.2=\u914d\u7f6e\u7684\u591a\u5e93\u4e2d\u7684\u8868\u540d\u6709\u91cd\u590d\u7684\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\u548c\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e +multitablewritertask.3=\u914d\u7f6e\u7684\u6240\u6709\u8868\u540d\u90fd\u76f8\u540c\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e +multitablewritertask.4=\u914d\u7f6e\u7684table\u548cdb\u540d\u79f0\u90fd\u76f8\u540c\uff0c\u6b64\u79cd\u56de\u6d41\u65b9\u5f0f\u4e0d\u652f\u6301 +multitablewritertask.5=\u5217\u914d\u7f6e\u4fe1\u606f\u6709\u9519\u8bef. \u56e0\u4e3a\u60a8\u914d\u7f6e\u7684\u4efb\u52a1\u4e2d\uff0c\u6e90\u5934\u8bfb\u53d6\u5b57\u6bb5\u6570:{0} \u4e0e \u76ee\u7684\u8868\u8981\u5199\u5165\u7684\u5b57\u6bb5\u6570:{1} \u4e0d\u76f8\u7b49. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +multitablewritertask.6=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684tableName\u67e5\u627e\u5bf9\u5e94\u7684db\u4e0d\u5b58\u5728\uff0ctableName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. +multitablewritertask.7=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u548ctable\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0},tableName={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. +multitablewritertask.8=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. +multitablewritertask.9=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684dbName[{0}], \u5b58\u5728\u591a\u5f20\u5206\u8868\uff0c\u8bf7\u914d\u7f6e\u60a8\u7684\u5206\u8868\u89c4\u5219. +multitablewritertask.10=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0},ErrorCode:{1} +multitablewritertask.11=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0},ErrorCode:{1} +multitablewritertask.12=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0},ErrorCode:{1} +multitablewritertask.13=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0},ErrorCode:{1} +multitablewritertask.14=\u5199\u5165\u8868[{0}]\u5931\u8d25,\u4f11\u7720[{1}]\u6beb\u79d2,\u6570\u636e:{2} +multitablewritertask.15=\u5199\u5165\u8868[{0}]\u5b58\u5728\u810f\u6570\u636e,record={1}, \u5199\u5165\u5f02\u5e38\u4e3a: + + +singletablewritertask.1=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0},ErrorCode:{1} +singletablewritertask.2=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0},ErrorCode:{1} +singletablewritertask.3=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0},ErrorCode:{1} +singletablewritertask.4=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0},ErrorCode:{1} \ No newline at end of file diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings_en_US.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings_en_US.properties new file mode 100644 index 0000000000..010db531ac --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings_en_US.properties @@ -0,0 +1,21 @@ +multitablewritertask.1=The configured [tableList] contains multiple tables but no table splitting rules have been configured. Please check your configuration. +multitablewritertask.2=There are repeated table names in the multiple tables you configured, but no database or table splitting rules have been configured. Please check your configuration. +multitablewritertask.3=All configured tables share the same name, but no database splitting rules have been configured. Please check your configuration. +multitablewritertask.4=The configured table and database share the same name. This back-to-source method is not supported. +multitablewritertask.5=Error in column configuration information. In your configured tasks, the number of source fields to be read: {0} and the number of fields to be written to the target table: {1} are not equivalent. Please check your configuration and make corrections. +multitablewritertask.6=The database that corresponds to the [tableName] calculated based on the rules does not exist. The [tableName]={0}. Please check the rules you configured. +multitablewritertask.7=The database and [table] calculated based on the rules do not exist. The calculated [dbName]={0}, and [tableName]={1}. Please check the rules you configured. +multitablewritertask.8=The database calculated based on the rules does not exist. The calculated [dbName]={0}. Please check the rules you configured. +multitablewritertask.9=The [dbName] [{0}] calculated based on the rules contains multiple sub-tables. Please configure your table splitting rules. +multitablewritertask.10=Fatal exception in OB. Roll back this write and hibernate for five minutes. SQLState: {0}. ErrorCode: {1} +multitablewritertask.11=Recoverable exception in OB. Roll back this write and hibernate for one minute. SQLState: {0}. ErrorCode: {1} +multitablewritertask.12=Exception in OB. Roll back this write and hibernate for one second. Write and submit the records one by one. SQLState: {0}. ErrorCode: {1} +multitablewritertask.13=Exception in OB. Roll back this write. Write and submit the records one by one. SQLState: {0}. ErrorCode: {1} +multitablewritertask.14=Failed to write to table: [{0}]. Hibernate for [{1}] milliseconds. Data: {2} +multitablewritertask.15=writing table [{0}] contains dirty data. Record={1}. Writing exception is: + + +singletablewritertask.1=Fatal exception in OB. Roll back this write and hibernate for five minutes. SQLState: {0}. ErrorCode: {1} +singletablewritertask.2=Recoverable exception in OB. Roll back this write and hibernate for one minute. SQLState: {0}. ErrorCode: {1} +singletablewritertask.3=Exception in OB. Roll back this write and hibernate for one second. Write and submit the records one by one. SQLState: {0}. ErrorCode: {1} +singletablewritertask.4=Exception in OB. Roll back this write. Write and submit the records one by one. SQLState: {0}. ErrorCode: {1} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings_ja_JP.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings_ja_JP.properties new file mode 100644 index 0000000000..12e3e481f8 --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings_ja_JP.properties @@ -0,0 +1,21 @@ +multitablewritertask.1=\u914d\u7f6e\u7684tableList\u4e3a\u591a\u8868\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e +multitablewritertask.2=\u914d\u7f6e\u7684\u591a\u5e93\u4e2d\u7684\u8868\u540d\u6709\u91cd\u590d\u7684\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\u548c\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e +multitablewritertask.3=\u914d\u7f6e\u7684\u6240\u6709\u8868\u540d\u90fd\u76f8\u540c\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e +multitablewritertask.4=\u914d\u7f6e\u7684table\u548cdb\u540d\u79f0\u90fd\u76f8\u540c\uff0c\u6b64\u79cd\u56de\u6d41\u65b9\u5f0f\u4e0d\u652f\u6301 +multitablewritertask.5=\u5217\u914d\u7f6e\u4fe1\u606f\u6709\u9519\u8bef. \u56e0\u4e3a\u60a8\u914d\u7f6e\u7684\u4efb\u52a1\u4e2d\uff0c\u6e90\u5934\u8bfb\u53d6\u5b57\u6bb5\u6570:{0} \u4e0e \u76ee\u7684\u8868\u8981\u5199\u5165\u7684\u5b57\u6bb5\u6570:{1} \u4e0d\u76f8\u7b49. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +multitablewritertask.6=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684tableName\u67e5\u627e\u5bf9\u5e94\u7684db\u4e0d\u5b58\u5728\uff0ctableName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. +multitablewritertask.7=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u548ctable\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0},tableName={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. +multitablewritertask.8=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. +multitablewritertask.9=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684dbName[{0}], \u5b58\u5728\u591a\u5f20\u5206\u8868\uff0c\u8bf7\u914d\u7f6e\u60a8\u7684\u5206\u8868\u89c4\u5219. +multitablewritertask.10=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0} +multitablewritertask.11=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0} +multitablewritertask.12=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0} +multitablewritertask.13=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0} +multitablewritertask.14=\u5199\u5165\u8868[{0}]\u5931\u8d25,\u4f11\u7720[{1}]\u6beb\u79d2,\u6570\u636e:{2} +multitablewritertask.15=\u5199\u5165\u8868[{0}]\u5b58\u5728\u810f\u6570\u636e,record={1}, \u5199\u5165\u5f02\u5e38\u4e3a: + + +singletablewritertask.1=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0} +singletablewritertask.2=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0} +singletablewritertask.3=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0} +singletablewritertask.4=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0} \ No newline at end of file diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings_zh_CN.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings_zh_CN.properties new file mode 100644 index 0000000000..12e3e481f8 --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings_zh_CN.properties @@ -0,0 +1,21 @@ +multitablewritertask.1=\u914d\u7f6e\u7684tableList\u4e3a\u591a\u8868\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e +multitablewritertask.2=\u914d\u7f6e\u7684\u591a\u5e93\u4e2d\u7684\u8868\u540d\u6709\u91cd\u590d\u7684\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\u548c\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e +multitablewritertask.3=\u914d\u7f6e\u7684\u6240\u6709\u8868\u540d\u90fd\u76f8\u540c\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e +multitablewritertask.4=\u914d\u7f6e\u7684table\u548cdb\u540d\u79f0\u90fd\u76f8\u540c\uff0c\u6b64\u79cd\u56de\u6d41\u65b9\u5f0f\u4e0d\u652f\u6301 +multitablewritertask.5=\u5217\u914d\u7f6e\u4fe1\u606f\u6709\u9519\u8bef. \u56e0\u4e3a\u60a8\u914d\u7f6e\u7684\u4efb\u52a1\u4e2d\uff0c\u6e90\u5934\u8bfb\u53d6\u5b57\u6bb5\u6570:{0} \u4e0e \u76ee\u7684\u8868\u8981\u5199\u5165\u7684\u5b57\u6bb5\u6570:{1} \u4e0d\u76f8\u7b49. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +multitablewritertask.6=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684tableName\u67e5\u627e\u5bf9\u5e94\u7684db\u4e0d\u5b58\u5728\uff0ctableName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. +multitablewritertask.7=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u548ctable\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0},tableName={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. +multitablewritertask.8=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. +multitablewritertask.9=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684dbName[{0}], \u5b58\u5728\u591a\u5f20\u5206\u8868\uff0c\u8bf7\u914d\u7f6e\u60a8\u7684\u5206\u8868\u89c4\u5219. +multitablewritertask.10=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0} +multitablewritertask.11=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0} +multitablewritertask.12=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0} +multitablewritertask.13=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0} +multitablewritertask.14=\u5199\u5165\u8868[{0}]\u5931\u8d25,\u4f11\u7720[{1}]\u6beb\u79d2,\u6570\u636e:{2} +multitablewritertask.15=\u5199\u5165\u8868[{0}]\u5b58\u5728\u810f\u6570\u636e,record={1}, \u5199\u5165\u5f02\u5e38\u4e3a: + + +singletablewritertask.1=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0} +singletablewritertask.2=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0} +singletablewritertask.3=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0} +singletablewritertask.4=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0} \ No newline at end of file diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings_zh_HK.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings_zh_HK.properties new file mode 100644 index 0000000000..4c0f8e55bc --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings_zh_HK.properties @@ -0,0 +1,41 @@ +multitablewritertask.1=\u914d\u7f6e\u7684tableList\u4e3a\u591a\u8868\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e +multitablewritertask.2=\u914d\u7f6e\u7684\u591a\u5e93\u4e2d\u7684\u8868\u540d\u6709\u91cd\u590d\u7684\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\u548c\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e +multitablewritertask.3=\u914d\u7f6e\u7684\u6240\u6709\u8868\u540d\u90fd\u76f8\u540c\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e +multitablewritertask.4=\u914d\u7f6e\u7684table\u548cdb\u540d\u79f0\u90fd\u76f8\u540c\uff0c\u6b64\u79cd\u56de\u6d41\u65b9\u5f0f\u4e0d\u652f\u6301 +multitablewritertask.5=\u5217\u914d\u7f6e\u4fe1\u606f\u6709\u9519\u8bef. \u56e0\u4e3a\u60a8\u914d\u7f6e\u7684\u4efb\u52a1\u4e2d\uff0c\u6e90\u5934\u8bfb\u53d6\u5b57\u6bb5\u6570:{0} \u4e0e \u76ee\u7684\u8868\u8981\u5199\u5165\u7684\u5b57\u6bb5\u6570:{1} \u4e0d\u76f8\u7b49. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +multitablewritertask.6=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684tableName\u67e5\u627e\u5bf9\u5e94\u7684db\u4e0d\u5b58\u5728\uff0ctableName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. +multitablewritertask.7=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u548ctable\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0},tableName={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. +multitablewritertask.8=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. +multitablewritertask.9=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684dbName[{0}], \u5b58\u5728\u591a\u5f20\u5206\u8868\uff0c\u8bf7\u914d\u7f6e\u60a8\u7684\u5206\u8868\u89c4\u5219. +multitablewritertask.10=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0} +multitablewritertask.11=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0} +multitablewritertask.12=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0} +multitablewritertask.13=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0} +multitablewritertask.14=\u5199\u5165\u8868[{0}]\u5931\u8d25,\u4f11\u7720[{1}]\u6beb\u79d2,\u6570\u636e:{2} +multitablewritertask.15=\u5199\u5165\u8868[{0}]\u5b58\u5728\u810f\u6570\u636e,record={1}, \u5199\u5165\u5f02\u5e38\u4e3a: + + +singletablewritertask.1=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0} +singletablewritertask.2=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0} +singletablewritertask.3=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0} +singletablewritertask.4=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}multitablewritertask.1=配置的tableList為多表,但未配置分表規則,請檢查您的配置 +multitablewritertask.2=配置的多庫中的表名有重複的,但未配置分庫規則和分表規則,請檢查您的配置 +multitablewritertask.3=配置的所有表名都相同,但未配置分庫規則,請檢查您的配置 +multitablewritertask.4=配置的table和db名稱都相同,此種回流方式不支援 +multitablewritertask.5=列配置資訊有錯誤. 因為您配置的任務中,源頭讀取欄位數:{0}與 目的表要寫入的欄位數:{1}不相等. 請檢查您的配置並作出修改. +multitablewritertask.6=通過規則計算出來的tableName查找對應的db不存在,tableName={0}, 請檢查您配置的規則. +multitablewritertask.7=通過規則計算出來的db和table不存在,算出的dbName={0},tableName={1}, 請檢查您配置的規則. +multitablewritertask.8=通過規則計算出來的db不存在,算出的dbName={0}, 請檢查您配置的規則. +multitablewritertask.9=通過規則計算出來的dbName[{0}], 存在多張分表,請配置您的分表規則. +multitablewritertask.10=遇到OB致命異常,回滾此次寫入, 休眠 5分鐘,SQLState:{0} +multitablewritertask.11=遇到OB可恢復異常,回滾此次寫入, 休眠 1分鐘,SQLState:{0} +multitablewritertask.12=遇到OB異常,回滾此次寫入, 休眠 1秒,採用逐條寫入提交,SQLState:{0} +multitablewritertask.13=遇到OB異常,回滾此次寫入, 採用逐條寫入提交,SQLState:{0} +multitablewritertask.14=寫入表[{0}]失敗,休眠[{1}]毫秒,數據:{2} +multitablewritertask.15=寫入表[{0}]存在髒數據,record={1}, 寫入異常為: + + +singletablewritertask.1=遇到OB致命異常,回滾此次寫入, 休眠 5分鐘,SQLState:{0} +singletablewritertask.2=遇到OB可恢復異常,回滾此次寫入, 休眠 1分鐘,SQLState:{0} +singletablewritertask.3=遇到OB異常,回滾此次寫入, 休眠 1秒,採用逐條寫入提交,SQLState:{0} +singletablewritertask.4=遇到OB異常,回滾此次寫入, 採用逐條寫入提交,SQLState:{0} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings_zh_TW.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings_zh_TW.properties new file mode 100644 index 0000000000..4c0f8e55bc --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/LocalStrings_zh_TW.properties @@ -0,0 +1,41 @@ +multitablewritertask.1=\u914d\u7f6e\u7684tableList\u4e3a\u591a\u8868\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e +multitablewritertask.2=\u914d\u7f6e\u7684\u591a\u5e93\u4e2d\u7684\u8868\u540d\u6709\u91cd\u590d\u7684\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\u548c\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e +multitablewritertask.3=\u914d\u7f6e\u7684\u6240\u6709\u8868\u540d\u90fd\u76f8\u540c\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e +multitablewritertask.4=\u914d\u7f6e\u7684table\u548cdb\u540d\u79f0\u90fd\u76f8\u540c\uff0c\u6b64\u79cd\u56de\u6d41\u65b9\u5f0f\u4e0d\u652f\u6301 +multitablewritertask.5=\u5217\u914d\u7f6e\u4fe1\u606f\u6709\u9519\u8bef. \u56e0\u4e3a\u60a8\u914d\u7f6e\u7684\u4efb\u52a1\u4e2d\uff0c\u6e90\u5934\u8bfb\u53d6\u5b57\u6bb5\u6570:{0} \u4e0e \u76ee\u7684\u8868\u8981\u5199\u5165\u7684\u5b57\u6bb5\u6570:{1} \u4e0d\u76f8\u7b49. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +multitablewritertask.6=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684tableName\u67e5\u627e\u5bf9\u5e94\u7684db\u4e0d\u5b58\u5728\uff0ctableName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. +multitablewritertask.7=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u548ctable\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0},tableName={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. +multitablewritertask.8=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219. +multitablewritertask.9=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684dbName[{0}], \u5b58\u5728\u591a\u5f20\u5206\u8868\uff0c\u8bf7\u914d\u7f6e\u60a8\u7684\u5206\u8868\u89c4\u5219. +multitablewritertask.10=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0} +multitablewritertask.11=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0} +multitablewritertask.12=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0} +multitablewritertask.13=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0} +multitablewritertask.14=\u5199\u5165\u8868[{0}]\u5931\u8d25,\u4f11\u7720[{1}]\u6beb\u79d2,\u6570\u636e:{2} +multitablewritertask.15=\u5199\u5165\u8868[{0}]\u5b58\u5728\u810f\u6570\u636e,record={1}, \u5199\u5165\u5f02\u5e38\u4e3a: + + +singletablewritertask.1=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0} +singletablewritertask.2=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0} +singletablewritertask.3=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0} +singletablewritertask.4=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}multitablewritertask.1=配置的tableList為多表,但未配置分表規則,請檢查您的配置 +multitablewritertask.2=配置的多庫中的表名有重複的,但未配置分庫規則和分表規則,請檢查您的配置 +multitablewritertask.3=配置的所有表名都相同,但未配置分庫規則,請檢查您的配置 +multitablewritertask.4=配置的table和db名稱都相同,此種回流方式不支援 +multitablewritertask.5=列配置資訊有錯誤. 因為您配置的任務中,源頭讀取欄位數:{0}與 目的表要寫入的欄位數:{1}不相等. 請檢查您的配置並作出修改. +multitablewritertask.6=通過規則計算出來的tableName查找對應的db不存在,tableName={0}, 請檢查您配置的規則. +multitablewritertask.7=通過規則計算出來的db和table不存在,算出的dbName={0},tableName={1}, 請檢查您配置的規則. +multitablewritertask.8=通過規則計算出來的db不存在,算出的dbName={0}, 請檢查您配置的規則. +multitablewritertask.9=通過規則計算出來的dbName[{0}], 存在多張分表,請配置您的分表規則. +multitablewritertask.10=遇到OB致命異常,回滾此次寫入, 休眠 5分鐘,SQLState:{0} +multitablewritertask.11=遇到OB可恢復異常,回滾此次寫入, 休眠 1分鐘,SQLState:{0} +multitablewritertask.12=遇到OB異常,回滾此次寫入, 休眠 1秒,採用逐條寫入提交,SQLState:{0} +multitablewritertask.13=遇到OB異常,回滾此次寫入, 採用逐條寫入提交,SQLState:{0} +multitablewritertask.14=寫入表[{0}]失敗,休眠[{1}]毫秒,數據:{2} +multitablewritertask.15=寫入表[{0}]存在髒數據,record={1}, 寫入異常為: + + +singletablewritertask.1=遇到OB致命異常,回滾此次寫入, 休眠 5分鐘,SQLState:{0} +singletablewritertask.2=遇到OB可恢復異常,回滾此次寫入, 休眠 1分鐘,SQLState:{0} +singletablewritertask.3=遇到OB異常,回滾此次寫入, 休眠 1秒,採用逐條寫入提交,SQLState:{0} +singletablewritertask.4=遇到OB異常,回滾此次寫入, 採用逐條寫入提交,SQLState:{0} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/MultiVersionWriteTask.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/MultiVersionWriteTask.java new file mode 100644 index 0000000000..4e400b060c --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/MultiVersionWriteTask.java @@ -0,0 +1,12 @@ +package com.alibaba.datax.plugin.writer.obhbasewriter.task; + +import com.alibaba.datax.common.util.Configuration; + +/** + * TODO(yuez)升级hbase api之后再补充暂时用不到 + */ +public class MultiVersionWriteTask extends ObHBaseWriteTask{ + public MultiVersionWriteTask(Configuration configuration) throws Exception { + super(configuration); + } +} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/NormalWriteTask.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/NormalWriteTask.java new file mode 100644 index 0000000000..3113c022ce --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/NormalWriteTask.java @@ -0,0 +1,12 @@ +package com.alibaba.datax.plugin.writer.obhbasewriter.task; + +import com.alibaba.datax.common.util.Configuration; + +/** + * TODO(yuez) 升级hbase api之后再补充暂时用不到 + */ +public class NormalWriteTask extends ObHBaseWriteTask{ + public NormalWriteTask(Configuration configuration) throws Exception { + super(configuration); + } +} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/ObHBaseWriteTask.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/ObHBaseWriteTask.java new file mode 100644 index 0000000000..d424f6eac2 --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/ObHBaseWriteTask.java @@ -0,0 +1,317 @@ +package com.alibaba.datax.plugin.writer.obhbasewriter.task; + +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.plugin.TaskPluginCollector; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.common.util.MessageSource; +import com.alibaba.datax.plugin.rdbms.reader.Key; +import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; +import com.alibaba.datax.plugin.rdbms.writer.CommonRdbmsWriter; +import com.alibaba.datax.plugin.writer.obhbasewriter.Config; +import com.alibaba.datax.plugin.writer.obhbasewriter.ConfigKey; +import com.alibaba.datax.plugin.writer.obhbasewriter.Constant; +import com.alibaba.datax.plugin.writer.obhbasewriter.NullModeType; +import com.alibaba.datax.plugin.writer.obhbasewriter.ObHTableInfo; +import com.alibaba.datax.plugin.writer.obhbasewriter.ext.ServerConnectInfo; +import com.google.common.collect.Lists; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ObHBaseWriteTask extends CommonRdbmsWriter.Task { + private final static MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(ObHBaseWriteTask.class); + private final static Logger LOG = LoggerFactory.getLogger(ObHBaseWriteTask.class); + + public NullModeType nullMode = null; + private int maxRetryCount; + + public List columns; + public List rowkeyColumn; + public Configuration versionColumn; + + public String hbaseTableName; + public String encoding; + public Boolean walFlag; + + String configUrl; + String dbName; + String ip; + String port; + + String fullUserName; + boolean usdOdpMode; + String sysUsername; + String sysPassword; + private ObHTableInfo obHTableInfo; + + private ConcurrentTableWriter concurrentWriter; + private boolean allTaskInQueue = false; + private long startTime = 0; + private String threadName = Thread.currentThread().getName(); + + private Lock lock = new ReentrantLock(); + private Condition condition = lock.newCondition(); + + public ObHBaseWriteTask(Configuration configuration) { + super(DataBaseType.MySql); + init(configuration); + } + + @Override + public void init(com.alibaba.datax.common.util.Configuration configuration) { + this.obHTableInfo = new ObHTableInfo(configuration); + this.hbaseTableName = configuration.getString(ConfigKey.TABLE); + this.columns = configuration.getListConfiguration(ConfigKey.COLUMN); + this.rowkeyColumn = configuration.getListConfiguration(ConfigKey.ROWKEY_COLUMN); + this.versionColumn = configuration.getConfiguration(ConfigKey.VERSION_COLUMN); + this.encoding = configuration.getString(ConfigKey.ENCODING, Constant.DEFAULT_ENCODING); + this.nullMode = NullModeType.getByTypeName(configuration.getString(ConfigKey.NULL_MODE, Constant.DEFAULT_NULL_MODE)); +// this.memstoreThreshold = configuration.getDouble(Config.MEMSTORE_THRESHOLD, Config.DEFAULT_MEMSTORE_THRESHOLD); + this.walFlag = configuration.getBool(ConfigKey.WAL_FLAG, true); + this.maxRetryCount = configuration.getInt(ConfigKey.MAX_RETRY_COUNT, 3); + + // default 1000 rows are committed together + this.batchSize = com.alibaba.datax.plugin.rdbms.writer.Constant.DEFAULT_BATCH_SIZE; + this.batchByteSize = com.alibaba.datax.plugin.rdbms.writer.Constant.DEFAULT_BATCH_BYTE_SIZE; + + this.configUrl = configuration.getString(ConfigKey.OBCONFIG_URL); + this.jdbcUrl = configuration.getString(ConfigKey.JDBC_URL); + this.username = configuration.getString(Key.USERNAME); + this.password = configuration.getString(Key.PASSWORD); + this.dbName = configuration.getString(Key.DBNAME); + this.usdOdpMode = configuration.getBool(ConfigKey.USE_ODP_MODE); + + ServerConnectInfo connectInfo = new ServerConnectInfo(jdbcUrl, username, password); + String clusterName = connectInfo.clusterName; + this.fullUserName = connectInfo.getFullUserName(); + final String[] ipPort = connectInfo.ipPort.split(":"); + if (usdOdpMode) { + this.ip = ipPort[0]; + this.port = ipPort[1]; + } else { + this.sysUsername = configuration.getString(ConfigKey.OB_SYS_USER); + this.sysPassword = configuration.getString(ConfigKey.OB_SYS_PASSWORD); + connectInfo.setSysUser(sysUsername); + connectInfo.setSysPass(sysPassword); + if (!configUrl.contains("ObRegion")) { + if (configUrl.contains("?")) { + configUrl += "&ObRegion=" + clusterName; + } else { + configUrl += "?ObRegion=" + clusterName; + } + } + if (!configUrl.contains("database")) { + configUrl += "&database=" + dbName; + } + } + if (null == concurrentWriter) { + concurrentWriter = new ConcurrentTableWriter(configuration, connectInfo); + allTaskInQueue = false; + } + } + + @Override + public void prepare(Configuration configuration) { + concurrentWriter.start(); + } + + @Override + public void startWrite(RecordReceiver recordReceiver, Configuration configuration, TaskPluginCollector taskPluginCollector) { + this.taskPluginCollector = taskPluginCollector; + int recordCount = 0; + int bufferBytes = 0; + List records = new ArrayList<>(); + try { + Record record; + while ((record = recordReceiver.getFromReader()) != null) { + recordCount++; + bufferBytes += record.getMemorySize(); + records.add(record); + // 按照指定的批大小进行批量写入 + if (records.size() >= batchSize || bufferBytes >= batchByteSize) { + concurrentWriter.addBatchRecords(Lists.newArrayList(records)); + records.clear(); + bufferBytes = 0; + } + } + + if (!records.isEmpty()) { + concurrentWriter.addBatchRecords(records); + } + } catch (Throwable e) { + LOG.warn("startWrite error unexpected ", e); + throw DataXException.asDataXException(DBUtilErrorCode.WRITE_DATA_ERROR, e); + } + LOG.info(recordCount + " rows received."); + waitTaskFinish(); + } + + public void waitTaskFinish() { + this.allTaskInQueue = true; + LOG.info("ConcurrentTableWriter has put all task in queue, queueSize = {}, total = {}, finished = {}", + concurrentWriter.getTaskQueueSize(), + concurrentWriter.getTotalTaskCount(), + concurrentWriter.getFinishTaskCount()); + + lock.lock(); + try { + while (!concurrentWriter.checkFinish()) { + condition.await(50, TimeUnit.MILLISECONDS); + // print statistic + LOG.debug("Statistic total task {}, finished {}, queue Size {}", + concurrentWriter.getTotalTaskCount(), + concurrentWriter.getFinishTaskCount(), + concurrentWriter.getTaskQueueSize()); + concurrentWriter.printStatistics(); + } + } catch (InterruptedException e) { + LOG.warn("Concurrent table writer wait task finish interrupt"); + } finally { + lock.unlock(); + } + LOG.debug("wait all InsertTask finished ..."); + } + + public boolean isFinished() { + return allTaskInQueue && concurrentWriter.checkFinish(); + } + + public void singalTaskFinish() { + lock.lock(); + try { + condition.signal(); + } finally { + lock.unlock(); + } + } + + public void collectDirtyRecord(Record record, Throwable throwable) { + this.taskPluginCollector.collectDirtyRecord(record, throwable); + } + + @Override + public void post(Configuration configuration) { + + } + + @Override + public void destroy(Configuration configuration) { + if (concurrentWriter != null) { + concurrentWriter.destory(); + } + super.destroy(configuration); + } + + public class ConcurrentTableWriter { + private BlockingQueue> queue; + private List putTasks; + private Configuration config; + private AtomicLong totalTaskCount; + private AtomicLong finishTaskCount; + private ServerConnectInfo connectInfo; + private ExecutorService executorService; + private final int threadCount; + + public ConcurrentTableWriter(Configuration config, ServerConnectInfo connectInfo) { + this.threadCount = config.getInt(Config.WRITER_THREAD_COUNT, Config.DEFAULT_WRITER_THREAD_COUNT); + this.queue = new LinkedBlockingQueue>(threadCount << 1); + this.putTasks = new ArrayList(threadCount); + this.config = config; + this.totalTaskCount = new AtomicLong(0); + this.finishTaskCount = new AtomicLong(0); + this.executorService = Executors.newFixedThreadPool(threadCount); + this.connectInfo = connectInfo; + } + + public long getTotalTaskCount() { + return totalTaskCount.get(); + } + + public long getFinishTaskCount() { + return finishTaskCount.get(); + } + + public int getTaskQueueSize() { + return queue.size(); + } + + public void increFinishCount() { + finishTaskCount.incrementAndGet(); + } + + // should check after put all the task in the queue + public boolean checkFinish() { + long finishCount = finishTaskCount.get(); + long totalCount = totalTaskCount.get(); + return finishCount == totalCount; + } + + public synchronized void start() { + for (int i = 0; i < threadCount; ++i) { + LOG.info("start {} insert task.", (i + 1)); + PutTask putTask = new PutTask(threadName, queue, config, connectInfo, obHTableInfo, ObHBaseWriteTask.this); + putTask.setWriter(this); + putTasks.add(putTask); + } + for (PutTask task : putTasks) { + executorService.execute(task); + } + } + + public void printStatistics() { + long insertTotalCost = 0; + long insertTotalCount = 0; + for (PutTask task : putTasks) { + insertTotalCost += task.getTotalCost(); + insertTotalCount += task.getPutCount(); + } + long avgCost = 0; + if (insertTotalCount != 0) { + avgCost = insertTotalCost / insertTotalCount; + } + ObHBaseWriteTask.LOG.debug("Put {} times, totalCost {} ms, average {} ms", + insertTotalCount, insertTotalCost, avgCost); + } + + public void addBatchRecords(final List records) throws InterruptedException { + boolean isSucc = false; + while (!isSucc) { + isSucc = queue.offer(records, 5, TimeUnit.MILLISECONDS); + } + totalTaskCount.incrementAndGet(); + } + + public synchronized void destory() { + if (putTasks != null) { + for (PutTask task : putTasks) { + task.setStop(); + task.destroy(); + } + } + destroyExecutor(); + } + + private void destroyExecutor() { + if (executorService != null && !executorService.isShutdown()) { + executorService.shutdown(); + try { + executorService.awaitTermination(0L, TimeUnit.SECONDS); + } catch (InterruptedException var2) { + } + } + } + } +} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/PutTask.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/PutTask.java new file mode 100644 index 0000000000..768772c0ab --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/task/PutTask.java @@ -0,0 +1,325 @@ +package com.alibaba.datax.plugin.writer.obhbasewriter.task; + +import com.alibaba.datax.common.element.DoubleColumn; +import com.alibaba.datax.common.element.LongColumn; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.common.util.MessageSource; +import com.alibaba.datax.plugin.writer.obhbasewriter.ColumnType; +import com.alibaba.datax.plugin.writer.obhbasewriter.Config; +import com.alibaba.datax.plugin.writer.obhbasewriter.ConfigKey; +import com.alibaba.datax.plugin.writer.obhbasewriter.Hbase094xWriterErrorCode; +import com.alibaba.datax.plugin.writer.obhbasewriter.ObHTableInfo; +import com.alibaba.datax.plugin.writer.obhbasewriter.ext.ObHbaseTableHolder; +import com.alibaba.datax.plugin.writer.obhbasewriter.ext.ServerConnectInfo; +import com.alipay.oceanbase.hbase.constants.OHConstants; +import com.alipay.oceanbase.rpc.property.Property; + +import com.google.common.base.Stopwatch; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Queue; +import java.util.concurrent.TimeUnit; +import org.apache.commons.lang3.tuple.Triple; +import org.apache.hadoop.hbase.client.HTableInterface; +import org.apache.hadoop.hbase.client.Put; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static com.alibaba.datax.plugin.writer.obhbasewriter.ConfigKey.OBHBASE_HTABLE_CLIENT_WRITE_BUFFER; +import static com.alibaba.datax.plugin.writer.obhbasewriter.ConfigKey.OBHBASE_HTABLE_PUT_WRITE_BUFFER_CHECK; +import static com.alibaba.datax.plugin.writer.obhbasewriter.ConfigKey.TABLE_CLIENT_RPC_EXECUTE_TIMEOUT; +import static com.alibaba.datax.plugin.writer.obhbasewriter.ConfigKey.WRITE_BUFFER_HIGH_MARK; +import static com.alibaba.datax.plugin.writer.obhbasewriter.ConfigKey.WRITE_BUFFER_LOW_MARK; +import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.DEFAULT_HBASE_HTABLE_CLIENT_WRITE_BUFFER; +import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.DEFAULT_HBASE_HTABLE_PUT_WRITE_BUFFER_CHECK; +import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.DEFAULT_NETTY_BUFFER_HIGH_WATERMARK; +import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.DEFAULT_NETTY_BUFFER_LOW_WATERMARK; +import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.DEFAULT_RPC_EXECUTE_TIMEOUT; +import static com.alibaba.datax.plugin.writer.obhbasewriter.util.ObHbaseWriterUtils.getColumnByte; +import static com.alibaba.datax.plugin.writer.obhbasewriter.util.ObHbaseWriterUtils.getRowkey; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_HTABLE_CLIENT_WRITE_BUFFER; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_HTABLE_PUT_WRITE_BUFFER_CHECK; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_DATABASE; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_FULL_USER_NAME; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_PARAM_URL; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_PASSWORD; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_SYS_USER_NAME; +import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_SYS_PASSWORD; + +public class PutTask implements Runnable { + + private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(PutTask.class); + + private static final Logger LOG = LoggerFactory.getLogger(PutTask.class); + + private ObHBaseWriteTask writerTask; + private ObHBaseWriteTask.ConcurrentTableWriter writer; + + private long totalCost = 0; + private long putCount = 0; + private boolean isStop; + + private ObHTableInfo obHTableInfo; + private final Configuration versionColumn; + // 失败重试次数 + private final int failTryCount; + + private String parentThreadName; + private Queue> queue; + private Configuration config; + private ServerConnectInfo connInfo; + + private ObHbaseTableHolder tableHolder; + + private final SimpleDateFormat df_second = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + private final SimpleDateFormat df_ms = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss SSS"); + + public PutTask(String parentThreadName, Queue> recordsQueue, Configuration config, ServerConnectInfo connectInfo, ObHTableInfo obHTableInfo, ObHBaseWriteTask writerTask) { + this.parentThreadName = parentThreadName; + this.queue = recordsQueue; + this.config = config; + this.connInfo = connectInfo; + this.obHTableInfo = obHTableInfo; + this.writerTask = writerTask; + this.versionColumn = config.getConfiguration(ConfigKey.VERSION_COLUMN); + this.failTryCount = config.getInt(Config.FAIL_TRY_COUNT, Config.DEFAULT_FAIL_TRY_COUNT); + this.isStop = false; + initTableHolder(); + } + + private void initTableHolder() { + try { + org.apache.hadoop.conf.Configuration c = new org.apache.hadoop.conf.Configuration(); + c.set(HBASE_OCEANBASE_FULL_USER_NAME, writerTask.fullUserName); + c.set(HBASE_OCEANBASE_PASSWORD, this.connInfo.password); + c.set(HBASE_OCEANBASE_DATABASE, writerTask.dbName); + // obkv-table-client is needed the code below + if (writerTask.usdOdpMode) { + c.setBoolean(OHConstants.HBASE_OCEANBASE_ODP_MODE, true); + c.set(OHConstants.HBASE_OCEANBASE_ODP_ADDR, connInfo.host); + c.set(OHConstants.HBASE_OCEANBASE_ODP_PORT, connInfo.port); + LOG.info("sysUser and sysPassword is empty, build HTABLE in odp mode."); + } else { + c.set(HBASE_OCEANBASE_PARAM_URL, writerTask.configUrl); + c.set(HBASE_OCEANBASE_SYS_USER_NAME, this.connInfo.sysUser); + c.set(HBASE_OCEANBASE_SYS_PASSWORD, this.connInfo.sysPass); + LOG.info("sysUser and sysPassword is not empty, build HTABLE in sys mode."); + } + c.set(HBASE_HTABLE_PUT_WRITE_BUFFER_CHECK, config.getString(OBHBASE_HTABLE_PUT_WRITE_BUFFER_CHECK, DEFAULT_HBASE_HTABLE_PUT_WRITE_BUFFER_CHECK)); + c.set(HBASE_HTABLE_CLIENT_WRITE_BUFFER, config.getString(OBHBASE_HTABLE_CLIENT_WRITE_BUFFER, DEFAULT_HBASE_HTABLE_CLIENT_WRITE_BUFFER)); + + c.set(Property.RS_LIST_ACQUIRE_CONNECT_TIMEOUT.getKey(), "500"); + c.set(Property.RS_LIST_ACQUIRE_READ_TIMEOUT.getKey(), "5000"); + c.set(Property.RPC_EXECUTE_TIMEOUT.getKey(), config.getString(TABLE_CLIENT_RPC_EXECUTE_TIMEOUT, DEFAULT_RPC_EXECUTE_TIMEOUT)); + c.set(Property.NETTY_BUFFER_LOW_WATERMARK.getKey(), config.getString(WRITE_BUFFER_LOW_MARK, DEFAULT_NETTY_BUFFER_LOW_WATERMARK)); + c.set(Property.NETTY_BUFFER_HIGH_WATERMARK.getKey(), config.getString(WRITE_BUFFER_HIGH_MARK, DEFAULT_NETTY_BUFFER_HIGH_WATERMARK)); + this.tableHolder = new ObHbaseTableHolder(c, obHTableInfo.getTableName()); + } catch (Exception e) { + LOG.error("init table holder failed, reason: {}", e.getMessage()); + throw new IllegalStateException(e); + } + + } + + private void batchWrite(final List buffer) { + HTableInterface ohTable = null; + Stopwatch stopwatch = Stopwatch.createStarted(); + try { + ohTable = this.tableHolder.getOhTable(); + List puts = buildBatchPutList(buffer); + ohTable.put(puts); + } catch (Exception e) { + if (Objects.isNull(ohTable)) { + LOG.error("build obHTable: {} failed. reason: {}", obHTableInfo.getTableName(), e.getMessage()); + throw DataXException.asDataXException(Hbase094xWriterErrorCode.GET_HBASE_TABLE_ERROR, Hbase094xWriterErrorCode.GET_HBASE_TABLE_ERROR.getDescription()); + } + // + LOG.error("hbase batch error: " + e); + // 出错了之后对该出错的batch逐条重试 + for (Record record : buffer) { + writeOneRecord(ohTable, record); + } + } finally { + this.writer.increFinishCount(); + putCount++; + totalCost += stopwatch.elapsed(TimeUnit.MILLISECONDS); + try { + if (!Objects.isNull(ohTable)) { + ohTable.close(); + } + } catch (Exception e) { + LOG.warn("error in closing htable: {}. Reason: {}", obHTableInfo.getFullHbaseTableName(), e.getMessage()); + } + } + } + + private void writeOneRecord(HTableInterface ohTable, Record record) { + int retryCount = 0; + while (retryCount < this.failTryCount) { + try { + byte[] rowkey = getRowkey(record, obHTableInfo); + Put put = new Put(rowkey); // row key + boolean hasValidValue = buildPut(put, record); + + if (hasValidValue) { + ohTable.put(put); + } + break; + } catch (Exception e) { + retryCount++; + LOG.error("error in writing: " + e.getMessage() + ", retry count: " + retryCount); + if (retryCount == this.failTryCount) { + LOG.warn("ERROR : record {}", record); + this.writerTask.collectDirtyRecord(record, e); + } + } + } + } + + private List buildBatchPutList(List buffer) { + List puts = new ArrayList<>(); + for (Record record : buffer) { + byte[] rowkey = getRowkey(record, obHTableInfo); + Put put = new org.apache.hadoop.hbase.client.Put(rowkey); // row key + boolean hasValidValue = buildPut(put, record); + if (hasValidValue) { + puts.add(put); + } + } + return puts; + } + + private boolean buildPut(Put put, Record record) { + boolean hasValidValue = false; + long timestamp = buildTimestamp(record); + for (Map.Entry> columnInfo : obHTableInfo.getIndexColumnInfoMap().entrySet()) { + Integer index = columnInfo.getKey(); + if (index >= record.getColumnNumber()) { + throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, + MESSAGE_SOURCE.message("normaltask.2", record.getColumnNumber(), index)); + } + ColumnType columnType = columnInfo.getValue().getRight(); + String familyName = columnInfo.getValue().getLeft(); + String columnName = columnInfo.getValue().getMiddle(); + + byte[] value = getColumnByte(columnType, record.getColumn(index), obHTableInfo); + if (value != null) { + hasValidValue = true; + if (timestamp == -1) { + put.add(familyName.getBytes(), // family + columnName.getBytes(), // Q + value); // V + } else { + put.add(familyName.getBytes(), // family + columnName.getBytes(), // Q + timestamp, // timestamp/version + value); // V + } + } + } + + return hasValidValue; + } + + private long buildTimestamp(Record record) { + if (versionColumn == null) { + return -1; + } + + int index = versionColumn.getInt(ConfigKey.INDEX); + long timestamp; + if (index == -1) { + // user specified the constant as timestamp + timestamp = versionColumn.getLong(ConfigKey.VALUE); + if (timestamp < 0) { + throw DataXException.asDataXException(Hbase094xWriterErrorCode.CONSTRUCT_VERSION_ERROR, + MESSAGE_SOURCE.message("normaltask.4")); + } + } else { + // 指定列作为版本,long/doubleColumn直接record.aslong, 其它类型尝试用yyyy-MM-dd HH:mm:ss, + // yyyy-MM-dd HH:mm:ss SSS去format + if (index >= record.getColumnNumber()) { + throw DataXException.asDataXException(Hbase094xWriterErrorCode.CONSTRUCT_VERSION_ERROR, + MESSAGE_SOURCE.message("normaltask.5", record.getColumnNumber(), index)); + } + + if (record.getColumn(index).getRawData() == null) { + throw DataXException.asDataXException(Hbase094xWriterErrorCode.CONSTRUCT_VERSION_ERROR, + MESSAGE_SOURCE.message("normaltask.6")); + } + + if (record.getColumn(index) instanceof LongColumn || record.getColumn(index) instanceof DoubleColumn) { + timestamp = record.getColumn(index).asLong(); + } else { + Date date; + try { + date = df_ms.parse(record.getColumn(index).asString()); + } catch (ParseException e) { + try { + date = df_second.parse(record.getColumn(index).asString()); + } catch (ParseException e1) { + LOG.info(MESSAGE_SOURCE.message("normaltask.7", index)); + throw DataXException.asDataXException(Hbase094xWriterErrorCode.CONSTRUCT_VERSION_ERROR, e1); + } + } + timestamp = date.getTime(); + } + } + + return timestamp; + } + + public void setStop() {isStop = true;} + + public long getTotalCost() {return totalCost;} + + public long getPutCount() {return putCount;} + + public void destroy() { + tableHolder.destroy(); + } + + void setWriterTask(ObHBaseWriteTask writerTask) { + this.writerTask = writerTask; + } + + void setWriter(ObHBaseWriteTask.ConcurrentTableWriter writer) { + this.writer = writer; + } + + @Override + public void run() { + String currentThreadName = String.format("%s-putTask-%d", parentThreadName, Thread.currentThread().getId()); + Thread.currentThread().setName(currentThreadName); + LOG.debug("Task {} start to execute...", currentThreadName); + int sleepTimes = 0; + while (!isStop) { + try { + List records = queue.poll(); + if (null != records) { + batchWrite(records); + } else if (writerTask.isFinished()) { + writerTask.singalTaskFinish(); + LOG.debug("not more task, thread exist ..."); + break; + } else { + TimeUnit.MILLISECONDS.sleep(5); + sleepTimes++; + } + } catch (InterruptedException e) { + LOG.debug("TableWriter is interrupt"); + } catch (Exception e) { + LOG.warn("ERROR UNEXPECTED {}", e); + } + } + LOG.debug("Thread exist..."); + LOG.debug("sleep {} times, total sleep time: {}", sleepTimes, sleepTimes * 5); + } +} diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/util/LocalStrings.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/util/LocalStrings.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/util/LocalStrings_en_US.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/util/LocalStrings_en_US.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/util/LocalStrings_ja_JP.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/util/LocalStrings_ja_JP.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/util/LocalStrings_zh_CN.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/util/LocalStrings_zh_CN.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/util/LocalStrings_zh_HK.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/util/LocalStrings_zh_HK.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/util/LocalStrings_zh_TW.properties b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/util/LocalStrings_zh_TW.properties new file mode 100644 index 0000000000..e69de29bb2 diff --git a/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/util/ObHbaseWriterUtils.java b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/util/ObHbaseWriterUtils.java new file mode 100644 index 0000000000..9ccd3ed84b --- /dev/null +++ b/obhbasewriter/src/main/java/com/alibaba/datax/plugin/writer/obhbasewriter/util/ObHbaseWriterUtils.java @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2021 OceanBase ob-loader-dumper is licensed under Mulan PSL v2. You can use this software according to + * the terms and conditions of the Mulan PSL v2. You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING + * BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. See the Mulan PSL v2 for more + * details. + */ +package com.alibaba.datax.plugin.writer.obhbasewriter.util; + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.MessageSource; +import com.alibaba.datax.plugin.writer.obhbasewriter.ColumnType; +import com.alibaba.datax.plugin.writer.obhbasewriter.Hbase094xWriterErrorCode; +import com.alibaba.datax.plugin.writer.obhbasewriter.ObHTableInfo; +import com.alibaba.datax.plugin.writer.obhbasewriter.task.PutTask; +import java.nio.charset.Charset; +import org.apache.commons.lang3.tuple.Triple; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.util.Bytes; + +/** + * @author cjyyz + * @date 2023/03/23 + * @since + */ +public class ObHbaseWriterUtils { + + private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(PutTask.class); + + public static byte[] getRowkey(Record record, ObHTableInfo obHTableInfo) { + byte[] rowkeyBuffer = {}; + for (Triple rowKeyElement : obHTableInfo.getRowKeyElementList()) { + Integer index = rowKeyElement.getLeft(); + ColumnType columnType = rowKeyElement.getRight(); + if (index == -1) { + String value = rowKeyElement.getMiddle(); + rowkeyBuffer = Bytes.add(rowkeyBuffer, getValueByte(columnType, value, obHTableInfo.getEncoding())); + } else { + if (index >= record.getColumnNumber()) { + throw DataXException.asDataXException(Hbase094xWriterErrorCode.CONSTRUCT_ROWKEY_ERROR, MESSAGE_SOURCE.message("normaltask.3", record.getColumnNumber(), index)); + } + byte[] value = getColumnByte(columnType, record.getColumn(index), obHTableInfo); + rowkeyBuffer = Bytes.add(rowkeyBuffer, value); + } + } + + return rowkeyBuffer; + } + + public static byte[] getColumnByte(ColumnType columnType, Column column, ObHTableInfo obHTableInfo) { + byte[] bytes; + if (column.getRawData() != null && !(columnType == ColumnType.STRING && column.asString().equals("null"))) { + switch (columnType) { + case INT: + bytes = Bytes.toBytes(column.asLong().intValue()); + break; + case LONG: + bytes = Bytes.toBytes(column.asLong()); + break; + case DOUBLE: + bytes = Bytes.toBytes(column.asDouble()); + break; + case FLOAT: + bytes = Bytes.toBytes(column.asDouble().floatValue()); + break; + case SHORT: + bytes = Bytes.toBytes(column.asLong().shortValue()); + break; + case BOOLEAN: + bytes = Bytes.toBytes(column.asBoolean()); + break; + case STRING: + bytes = getValueByte(columnType, column.asString(), obHTableInfo.getEncoding()); + break; + case BINARY: + bytes = Bytes.toBytesBinary(column.asString()); + break; + default: + throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("hbaseabstracttask.2", columnType)); + } + } else { + switch (obHTableInfo.getNullModeType()) { + case Skip: + bytes = null; + break; + case Empty: + bytes = HConstants.EMPTY_BYTE_ARRAY; + break; + default: + throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("hbaseabstracttask.3")); + } + } + return bytes; + } + + /** + * @param columnType + * @param value + * @return byte[] + */ + private static byte[] getValueByte(ColumnType columnType, String value, String encoding) { + byte[] bytes; + if (value != null) { + switch (columnType) { + case INT: + bytes = Bytes.toBytes(Integer.parseInt(value)); + break; + case LONG: + bytes = Bytes.toBytes(Long.parseLong(value)); + break; + case DOUBLE: + bytes = Bytes.toBytes(Double.parseDouble(value)); + break; + case FLOAT: + bytes = Bytes.toBytes(Float.parseFloat(value)); + break; + case SHORT: + bytes = Bytes.toBytes(Short.parseShort(value)); + break; + case BOOLEAN: + bytes = Bytes.toBytes(Boolean.parseBoolean(value)); + break; + case STRING: + bytes = value.getBytes(Charset.forName(encoding)); + break; + default: + throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("hbaseabstracttask.4", columnType)); + } + } else { + bytes = HConstants.EMPTY_BYTE_ARRAY; + } + return bytes; + } +} \ No newline at end of file diff --git a/obhbasewriter/src/main/resources/plugin.json b/obhbasewriter/src/main/resources/plugin.json new file mode 100644 index 0000000000..6ea96196e4 --- /dev/null +++ b/obhbasewriter/src/main/resources/plugin.json @@ -0,0 +1,6 @@ +{ + "name": "obhbasewriter", + "class": "com.alibaba.datax.plugin.writer.obhbasewriter.ObHbaseWriter", + "description": "适用于: 生产环境. 原理: TODO", + "developer": "alibaba" +} diff --git a/oceanbasev10reader/doc/oceanbasev10reader.md b/oceanbasev10reader/doc/oceanbasev10reader.md new file mode 100644 index 0000000000..3d1672b5c2 --- /dev/null +++ b/oceanbasev10reader/doc/oceanbasev10reader.md @@ -0,0 +1,244 @@ +## 1 快速介绍 +OceanbaseV10Reader插件实现了从Oceanbase V1.0读取数据。在底层实现上,该读取插件通过java client(jdbc)连接远程Oceanbase 1.0数据库,并执行相应的sql语句将数据从库中SELECT出来。 + +注意,oceanbasev10reader只适用于ob1.0及以后版本的reader。 + +## 2 实现原理 +简而言之,Oceanbasev10Reader通过java client连接器连接到远程的Oceanbase数据库,并根据用户配置的信息生成查询SELECT SQL语句,然后发送到远程Oceanbase v1.0及更高版本数据库,并将该SQL执行返回结果使用DataX自定义的数据类型拼装为抽象的数据集,并传递给下游Writer处理。对于用户配置Table、Column、Where的信息,OceanbaseV10Reader将其拼接为SQL语句发送到Oceanbase 数据库;对于用户配置querySql信息,Oceanbasev10Reader直接将其发送到Oceanbase数据库。 +## 3 功能说明 +### 3.1 配置样例 + +- 配置一个从Oceanbase数据库同步抽取数据到本地的作业: +``` +{ + "job": { + "setting": { + "speed": { + //设置传输速度,单位为byte/s,DataX运行会尽可能达到该速度但是不超过它. + "byte": 1048576 + } + //出错限制 + "errorLimit": { + //出错的record条数上限,当大于该值即报错。 + "record": 0, + //出错的record百分比上限 1.0表示100%,0.02表示2% + "percentage": 0.02 + } + }, + "content": [ + { + "reader": { + "name": "oceanbasev10reader", + "parameter": { + "where": "", + "timeout": 5, + "readBatchSize": 50000, + "column": [ + "id","name" + ], + "connection": [ + { + "jdbcUrl": ["||_dsc_ob10_dsc_||集群名:租户名||_dsc_ob10_dsc_||jdbc:mysql://obproxyIp:obproxyPort/dbName"], + "table": [ + "table" + ] + } + ] + } + }, + "writer": { + //writer类型 + "name": "streamwriter", + //是否打印内容 + "parameter": { + "print":true, + } + } + } + ] + } +} +``` +``` +{ + "job": { + "setting": { + "speed": { + "channel": 3 + }, + "errorLimit": { + "record": 0 + } + }, + "content": [ + { + "reader": { + "name": "oceanbasev10reader", + "parameter": { + "where": "", + "timeout": 5, + "fetchSize": 500, + "column": [ + "id", + "name" + ], + "splitPk": "pk", + "connection": [ + { + "jdbcUrl": ["||_dsc_ob10_dsc_||集群名:租户名||_dsc_ob10_dsc_||jdbc:mysql://obproxyIp:obproxyPort/dbName"], + "table": [ + "table" + ] + } + ], + "username":"xxx", + "password":"xxx" + } + }, + "writer": { + "name": "streamwriter", + "parameter": { + "print": true + } + } + } + ] + } +} +``` + +- 配置一个自定义SQL的数据库同步任务到本地内容的作业: +``` +{ + "job": { + "setting": { + "channel": 3 + }, + "content": [ + { + "reader": { + "name": "oceanbasev10reader", + "parameter": { + "timeout": 5, + "fetchSize": 500, + "splitPk": "pk", + "connection": [ + { + "jdbcUrl": ["||_dsc_ob10_dsc_||集群名:租户名||_dsc_ob10_dsc_||jdbc:mysql://obproxyIp:obproxyPort/dbName"], + "querySql": [ + "select db_id,on_line_flag from db_info where db_id < 10;" + ] + } + ], + "username":"xxx", + "password":"xxx" + } + }, + "writer": { + "name": "streamwriter", + "parameter": { + "print": false, + "encoding": "UTF-8" + } + } + } + ] + } +} +``` +### 3.2 参数说明 + +- **jdbcUrl** + - 描述:连接ob使用的jdbc url,支持两种格式: + - ||_dsc_ob10_dsc_||集群名:租户名||_dsc_ob10_dsc_||jdbc:mysql://obproxyIp:obproxyPort/db + - 此格式下username仅填写用户名本身,无需三段式写法 + - jdbc:mysql://ip:port/db + - 此格式下username需要三段式写法 + - 必选:是 + - 默认值:无 +- **table** + - 描述:所选取的需要同步的表。使用JSON的数组描述,因此支持多张表同时抽取。当配置为多张表时,用户自己需保证多张表是同一schema结构,OceanbaseReader不予检查表是否同一逻辑表。注意,table必须包含在connection配置单元中。 + - 必选:是 + - 默认值:无 +- **column** + - 描述:所配置的表中需要同步的列名集合,使用JSON的数组描述字段信息。 +- 支持列裁剪,即列可以挑选部分列进行导出。 +``` +支持列换序,即列可以不按照表schema信息进行导出,同时支持通配符*,在使用之前需仔细核对列信息。 +``` + +- 必选:是 +- 默认值:无 +- **where** + - 描述:筛选条件,OceanbaseReader根据指定的column、table、where条件拼接SQL,并根据这个SQL进行数据抽取。在实际业务场景中,往往会选择当天的数据进行同步,可以将where条件指定为gmt_create > $bizdate 。这里gmt_create不可以是索引字段,也不可以是联合索引的第一个字段。where条件可以有效地进行业务增量同步。如果不填写where语句,包括不提供where的key或者value,DataX均视作同步全量数据 + - 必选:否 + - 默认值:无 +- **splitPk** + - 描述:OBReader进行数据抽取时,如果指定splitPk,表示用户希望使用splitPk代表的字段进行数据分片,DataX因此会启动并发任务进行数据同步,这样可以大大提供数据同步的效能。 + - 推荐splitPk用户使用表主键,因为表主键通常情况下比较均匀,因此切分出来的分片也不容易出现数据热点。 + - 目前splitPk仅支持int数据切分,`不支持其他类型`。如果用户指定其他非支持类型将报错。splitPk如果不填写,将视作用户不对单表进行切分,OBReader使用单通道同步全量数据。 + - 必选:否 + - 默认值:空 +- **querySql** + - 描述:在有些业务场景下,where这一配置项不足以描述所筛选的条件,用户可以通过该配置型来自定义筛选SQL。当用户配置了这一项之后,DataX系统就会忽略table,column这些配置型,直接使用这个配置项的内容对数据进行筛选 +- `当用户配置querySql时,OceanbaseReader直接忽略table、column、where条件的配置`,querySql优先级大于table、column、where选项。 + - 必选:否 + - 默认值:无 +- **timeout** + - 描述:sql执行的超时时间 单位分钟 + - 必选:否 + - 默认值:5 +- **username** + - 描述:访问oceanbase的用户名 + - 必选:是 + - 默认值:无 +- ** password** + - 描述:访问oceanbase的密码 + - 必选:是 + - 默认值:无 +- **readByPartition** + - 描述:对分区表是否按照分区切分任务 + - 必选:否 + - 默认值:fasle +- **readBatchSize** + - 描述:一次读取的行数,如果遇到内存不足的情况,可将该值调小 + - 必选:否 + - 默认值:10000 +### 3.3 类 +### 3.3 类型转换 +下面列出OceanbaseReader针对Oceanbase类型转换列表: + +| DataX 内部类型 | Oceanbase 数据类型 | +| --- | --- | +| Long | int | +| Double | numeric | +| String | varchar | +| Date | timestamp | +| Boolean | bool | + +## 4性能测试 +### 4.1 测试报告 +影响速度的主要原因在于channel数量,channel值受限于分表的数量或者单个表的数据分片数量单表导出时查看分片数量的办法,idb执行`select/*+query_timeout(150000000)*/ s.tablet_count from __all_table t,__table_stat s where t.table_id = s.table_id and t.table_name = '表名'` + +| 通道数 | DataX速度(Rec/s) | DataX流量(MB/s) | +| --- | --- | --- | +| 1 | 15001 | 4.7 | +| 2 | 28169 | 11.66 | +| 3 | 37076 | 14.77 | +| 4 | 55862 | 17.60 | +| 5 | 70860 | 22.31 | + +## 5常见问题 +### +4.1 oracle模式下报错Invalid fatch size +``` +Caused by: java.sql.SQLSyntaxErrorException: (conn=2498) invalid fetch size. in Oracle mode, extendOracleResultSetClass is ineffective if useOraclePrepareExecute is set to true or usePieceData is set to true + at com.oceanbase.jdbc.internal.util.exceptions.ExceptionFactory.createException(ExceptionFactory.java:110) + at com.oceanbase.jdbc.internal.util.exceptions.ExceptionFactory.create(ExceptionFactory.java:211) + at com.oceanbase.jdbc.OceanBaseStatement.setFetchSize(OceanBaseStatement.java:1599) + at com.alibaba.datax.plugin.reader.oceanbasev10reader.ext.ReaderTask.doRead(ReaderTask.java:270) + ... 5 more +``` +该错误常发生更换了高版本的oceanbase-client.jar驱动,高版本的驱动未来提高效率,增加了oracle预处理语句行为。这个机制和setFetchSize冲突。 +#### 解决方案 +在jdbcUrl中配置extendOracleResultSetClass=true可解决这个冲突。 \ No newline at end of file diff --git a/oceanbasev10reader/pom.xml b/oceanbasev10reader/pom.xml index 49477241d0..66965320fd 100644 --- a/oceanbasev10reader/pom.xml +++ b/oceanbasev10reader/pom.xml @@ -39,7 +39,7 @@ mysql mysql-connector-java - 5.1.40 + 8.0.28 log4j diff --git a/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/ext/ReaderTask.java b/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/ext/ReaderTask.java index a43dcebdf4..092710f28c 100644 --- a/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/ext/ReaderTask.java +++ b/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/ext/ReaderTask.java @@ -1,7 +1,14 @@ package com.alibaba.datax.plugin.reader.oceanbasev10reader.ext; +import com.alibaba.datax.common.element.BoolColumn; +import com.alibaba.datax.common.element.BytesColumn; import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.DateColumn; +import com.alibaba.datax.common.element.DoubleColumn; +import com.alibaba.datax.common.element.LongColumn; import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.element.StringColumn; +import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.plugin.RecordSender; import com.alibaba.datax.common.plugin.TaskPluginCollector; import com.alibaba.datax.common.statistics.PerfRecord; @@ -11,6 +18,7 @@ import com.alibaba.datax.plugin.rdbms.reader.Constant; import com.alibaba.datax.plugin.rdbms.reader.Key; import com.alibaba.datax.plugin.rdbms.util.DBUtil; +import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; import com.alibaba.datax.plugin.rdbms.util.RdbmsException; import com.alibaba.datax.plugin.reader.oceanbasev10reader.Config; import com.alibaba.datax.plugin.reader.oceanbasev10reader.util.ObReaderUtils; @@ -36,6 +44,7 @@ public class ReaderTask extends CommonRdbmsReader.Task { private int readBatchSize; private int retryLimit = 0; private String compatibleMode = ObReaderUtils.OB_COMPATIBLE_MODE_MYSQL; + private static final boolean IS_DEBUG = LOG.isDebugEnabled(); private boolean reuseConn = false; public ReaderTask(int taskGroupId, int taskId) { @@ -294,4 +303,119 @@ private boolean doRead(RecordSender recordSender, TaskPluginCollector taskPlugin } } } + + //重写方法支持array类型 + protected Record buildRecord(RecordSender recordSender,ResultSet rs, ResultSetMetaData metaData, int columnNumber, String mandatoryEncoding, + TaskPluginCollector taskPluginCollector) { + Record record = recordSender.createRecord(); + + try { + for (int i = 1; i <= columnNumber; i++) { + switch (metaData.getColumnType(i)) { + + case Types.CHAR: + case Types.NCHAR: + case Types.VARCHAR: + case Types.LONGVARCHAR: + case Types.NVARCHAR: + case Types.LONGNVARCHAR: + String rawData; + if(StringUtils.isBlank(mandatoryEncoding)){ + rawData = rs.getString(i); + }else{ + rawData = new String((rs.getBytes(i) == null ? EMPTY_CHAR_ARRAY : + rs.getBytes(i)), mandatoryEncoding); + } + record.addColumn(new StringColumn(rawData)); + break; + + case Types.CLOB: + case Types.NCLOB: + record.addColumn(new StringColumn(rs.getString(i))); + break; + + case Types.SMALLINT: + case Types.TINYINT: + case Types.INTEGER: + case Types.BIGINT: + record.addColumn(new LongColumn(rs.getString(i))); + break; + + case Types.NUMERIC: + case Types.DECIMAL: + case Types.FLOAT: + case Types.REAL: + case Types.DOUBLE: + record.addColumn(new DoubleColumn(rs.getString(i))); + break; + + case Types.TIME: + record.addColumn(new DateColumn(rs.getTime(i))); + break; + + // for mysql bug, see http://bugs.mysql.com/bug.php?id=35115 + case Types.DATE: + if (metaData.getColumnTypeName(i).equalsIgnoreCase("year")) { + record.addColumn(new LongColumn(rs.getInt(i))); + } else { + record.addColumn(new DateColumn(rs.getDate(i))); + } + break; + + case Types.TIMESTAMP: + record.addColumn(new DateColumn(rs.getTimestamp(i))); + break; + + case Types.VARBINARY: + case Types.BLOB: + case Types.LONGVARBINARY: + record.addColumn(new BytesColumn(rs.getBytes(i))); + break; + case Types.BINARY: + String isArray = rs.getString(i); + if (isArray.startsWith("[")&& isArray.endsWith("]")){ + record.addColumn(new StringColumn(rs.getString(i))); + }else { + record.addColumn(new BytesColumn(rs.getBytes(i))); + } + break; + // warn: bit(1) -> Types.BIT 可使用BoolColumn + // warn: bit(>1) -> Types.VARBINARY 可使用BytesColumn + case Types.BOOLEAN: + case Types.BIT: + record.addColumn(new BoolColumn(rs.getBoolean(i))); + break; + + case Types.NULL: + String stringData = null; + if(rs.getObject(i) != null) { + stringData = rs.getObject(i).toString(); + } + record.addColumn(new StringColumn(stringData)); + break; + + default: + throw DataXException + .asDataXException( + DBUtilErrorCode.UNSUPPORTED_TYPE, + String.format( + "您的配置文件中的列配置信息有误. 因为DataX 不支持数据库读取这种字段类型. 字段名:[%s], 字段名称:[%s], 字段Java类型:[%s]. 请尝试使用数据库函数将其转换datax支持的类型 或者不同步该字段 .", + metaData.getColumnName(i), + metaData.getColumnType(i), + metaData.getColumnClassName(i))); + } + } + } catch (Exception e) { + if (IS_DEBUG) { + LOG.debug("read data " + record.toString() + + " occur exception:", e); + } + //TODO 这里识别为脏数据靠谱吗? + taskPluginCollector.collectDirtyRecord(record, e); + if (e instanceof DataXException) { + throw (DataXException) e; + } + } + return record; + } } diff --git a/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/ExecutorTemplate.java b/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/ExecutorTemplate.java new file mode 100644 index 0000000000..a027ed7f35 --- /dev/null +++ b/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/ExecutorTemplate.java @@ -0,0 +1,287 @@ +package com.alibaba.datax.plugin.reader.oceanbasev10reader.util; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorCompletionService; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +public class ExecutorTemplate { + + /** + * The default thread pool size. Set as the number of available processors by default. + */ + public static int DEFAULT_POOL_SIZE = Runtime.getRuntime().availableProcessors(); + + /** + * Indicate whether the executor closes automatically. + */ + private final boolean autoClose; + + /** + * + */ + private final List> futures; + + /** + * + */ + private final ExecutorService internalExecutor; + + private final ExecutorCompletionService completionService; + + /** + * Set pool size for ExecutorTemplate. + */ + public static void setPoolSize(int size) { + DEFAULT_POOL_SIZE = size; + } + + /** + * Default: 1024 AutoClose: true + * + * @param poolName + */ + public ExecutorTemplate(String poolName) { + this(defaultExecutor(poolName), true); + } + + /** + * Default: 1024 AutoClose: true + * + * @param poolName + */ + public ExecutorTemplate(String poolName, int poolSize) { + this(defaultExecutor(poolName, poolSize), true); + } + + public ExecutorTemplate(String poolName, int poolSize, boolean autoClose) { + this(defaultExecutor(poolName, poolSize), autoClose); + } + + /** + * Default: 1024 + * + * @param poolName + * @param autoClose + */ + public ExecutorTemplate(String poolName, boolean autoClose) { + this(defaultExecutor(poolName), autoClose); + } + + /** + * Default: 1024 AutoClose: true + * + * @param executor + */ + public ExecutorTemplate(ExecutorService executor) { + this(executor, true); + } + + /** + * @param executor + */ + public ExecutorTemplate(ExecutorService executor, boolean autoClose) { + this.autoClose = autoClose; + this.internalExecutor = executor; + this.completionService = new ExecutorCompletionService<>(executor); + this.futures = Collections.synchronizedList(new ArrayList<>()); + } + + /** + * @param poolName + * @return ExecutorService + */ + public static ExecutorService defaultExecutor(String poolName) { + return defaultExecutor(100000, poolName, DEFAULT_POOL_SIZE); + } + + /** + * @param poolName + * @param poolSize + * @return ExecutorService + */ + public static ExecutorService defaultExecutor(String poolName, int poolSize) { + return defaultExecutor(100000, poolName, poolSize); + } + + /** + * @param capacity + * @param poolName + * @return ExecutorService + */ + public static ExecutorService defaultExecutor(int capacity, String poolName, int poolSize) { + return new ThreadPoolExecutor(poolSize, poolSize, 30, TimeUnit.SECONDS, /* */ + new ArrayBlockingQueue<>(capacity), new NamedThreadFactory(poolName)); + } + + /** + * Submit a callable task + * + * @param task + */ + public void submit(Callable task) { + Future f = this.completionService.submit(task); + futures.add(f); + check(f); + } + + /** + * Submit a runnable task + * + * @param task + */ + public void submit(Runnable task) { + Future f = this.completionService.submit(task, null); + futures.add(f); + check(f); + } + + /** + * Wait all the task run finished, and get all the results. + * + * @return List + */ + public List waitForResult() { + try { + int index = 0; + Throwable ex = null; + List result = new ArrayList(); + while (index < futures.size()) { + try { + Future f = this.completionService.take(); + result.add(f.get()); + } catch (Throwable e) { + ex = getRootCause(e); + break; + } + index++; + } + if (ex != null) { + cancelAll(); + throw new RuntimeException(ex); + } else { + return result; + } + } finally { + clearFutures(); + if (autoClose) { + destroyExecutor(); + } + } + } + + /** + * + */ + public void cancelAll() { + for (Future f : futures) { + if (!f.isDone() && !f.isCancelled()) { + f.cancel(false); + } + } + } + + /** + * + */ + public void clearFutures() { + this.futures.clear(); + } + + /** + * + */ + public void destroyExecutor() { + if (internalExecutor != null && !internalExecutor.isShutdown()) { + this.internalExecutor.shutdown(); + try { + this.internalExecutor.awaitTermination(0, TimeUnit.SECONDS); + } catch (InterruptedException e) { + } + } + } + + /** + * Fast check the future + * + * @param f + */ + private void check(Future f) { + if (f != null && f.isDone()) { + try { + f.get(); + } catch (Throwable e) { + cancelAll(); + throw new RuntimeException(e); + } + } + } + + /** + * @param throwable + * @return Throwable + */ + private Throwable getRootCause(Throwable throwable) { + final Throwable holder = throwable; + final List list = new ArrayList<>(); + while (throwable != null && !list.contains(throwable)) { + list.add(throwable); + throwable = throwable.getCause(); + } + return list.size() < 2 ? holder : list.get(list.size() - 1); + } + + /** + * An internal named thread factory + */ + static class NamedThreadFactory implements ThreadFactory { + + /** + * + */ + private final boolean daemon; + + /** + * + */ + private final String name; + + /** + * + */ + private final AtomicInteger seq = new AtomicInteger(0); + + /** + * @param name + */ + public NamedThreadFactory(String name) { + this(name, false); + } + + /** + * @param name + * @param daemon + */ + public NamedThreadFactory(String name, boolean daemon) { + this.name = name; + this.daemon = daemon; + } + + @Override + public Thread newThread(Runnable r) { + Thread t = new Thread(r); + t.setDaemon(daemon); + t.setPriority(Thread.NORM_PRIORITY); + t.setName((name + seq.incrementAndGet())); + return t; + } + } +} \ No newline at end of file diff --git a/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/ObReaderUtils.java b/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/ObReaderUtils.java index cca2f66c90..9aa06644f3 100644 --- a/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/ObReaderUtils.java +++ b/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/ObReaderUtils.java @@ -1,6 +1,7 @@ package com.alibaba.datax.plugin.reader.oceanbasev10reader.util; import com.alibaba.datax.common.element.*; +import com.alibaba.datax.common.element.Record; import com.alibaba.datax.plugin.rdbms.reader.util.ObVersion; import com.alibaba.datax.plugin.rdbms.reader.util.SingleTableSplitUtil; import com.alibaba.datax.plugin.rdbms.util.DBUtil; @@ -10,6 +11,7 @@ import com.alibaba.druid.sql.ast.SQLExpr; import com.alibaba.druid.sql.ast.expr.SQLBinaryOpExpr; import com.alibaba.druid.sql.ast.expr.SQLBinaryOperator; + import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; @@ -26,7 +28,13 @@ */ public class ObReaderUtils { private static final Logger LOG = LoggerFactory.getLogger(ObReaderUtils.class); - private static final String MYSQL_KEYWORDS = "ACCESSIBLE,ACCOUNT,ACTION,ADD,AFTER,AGAINST,AGGREGATE,ALGORITHM,ALL,ALTER,ALWAYS,ANALYSE,AND,ANY,AS,ASC,ASCII,ASENSITIVE,AT,AUTO_INCREMENT,AUTOEXTEND_SIZE,AVG,AVG_ROW_LENGTH,BACKUP,BEFORE,BEGIN,BETWEEN,BIGINT,BINARY,BINLOG,BIT,BLOB,BLOCK,BOOL,BOOLEAN,BOTH,BTREE,BY,BYTE,CACHE,CALL,CASCADE,CASCADED,CASE,CATALOG_NAME,CHAIN,CHANGE,CHANGED,CHANNEL,CHAR,CHARACTER,CHARSET,CHECK,CHECKSUM,CIPHER,CLASS_ORIGIN,CLIENT,CLOSE,COALESCE,CODE,COLLATE,COLLATION,COLUMN,COLUMN_FORMAT,COLUMN_NAME,COLUMNS,COMMENT,COMMIT,COMMITTED,COMPACT,COMPLETION,COMPRESSED,COMPRESSION,CONCURRENT,CONDITION,CONNECTION,CONSISTENT,CONSTRAINT,CONSTRAINT_CATALOG,CONSTRAINT_NAME,CONSTRAINT_SCHEMA,CONTAINS,CONTEXT,CONTINUE,CONVERT,CPU,CREATE,CROSS,CUBE,CURRENT,CURRENT_DATE,CURRENT_TIME,CURRENT_TIMESTAMP,CURRENT_USER,CURSOR,CURSOR_NAME,DATA,DATABASE,DATABASES,DATAFILE,DATE,DATETIME,DAY,DAY_HOUR,DAY_MICROSECOND,DAY_MINUTE,DAY_SECOND,DEALLOCATE,DEC,DECIMAL,DECLARE,DEFAULT,DEFAULT_AUTH,DEFINER,DELAY_KEY_WRITE,DELAYED,DELETE,DES_KEY_FILE,DESC,DESCRIBE,DETERMINISTIC,DIAGNOSTICS,DIRECTORY,DISABLE,DISCARD,DISK,DISTINCT,DISTINCTROW,DIV,DO,DOUBLE,DROP,DUAL,DUMPFILE,DUPLICATE,DYNAMIC,EACH,ELSE,ELSEIF,ENABLE,ENCLOSED,ENCRYPTION,END,ENDS,ENGINE,ENGINES,ENUM,ERROR,ERRORS,ESCAPE,ESCAPED,EVENT,EVENTS,EVERY,EXCHANGE,EXECUTE,EXISTS,EXIT,EXPANSION,EXPIRE,EXPLAIN,EXPORT,EXTENDED,EXTENT_SIZE,FAST,FAULTS,FETCH,FIELDS,FILE,FILE_BLOCK_SIZE,FILTER,FIRST,FIXED,FLOAT,FLOAT4,FLOAT8,FLUSH,FOLLOWS,FOR,FORCE,FOREIGN,FORMAT,FOUND,FROM,FULL,FULLTEXT,FUNCTION,GENERAL,GENERATED,GEOMETRY,GEOMETRYCOLLECTION,GET,GET_FORMAT,GLOBAL,GRANT,GRANTS,GROUP,GROUP_REPLICATION,HANDLER,HASH,HAVING,HELP,HIGH_PRIORITY,HOST,HOSTS,HOUR,HOUR_MICROSECOND,HOUR_MINUTE,HOUR_SECOND,IDENTIFIED,IF,IGNORE,IGNORE_SERVER_IDS,IMPORT,IN,INDEX,INDEXES,INFILE,INITIAL_SIZE,INNER,INOUT,INSENSITIVE,INSERT,INSERT_METHOD,INSTALL,INSTANCE,INT,INT1,INT2,INT3,INT4,INT8,INTEGER,INTERVAL,INTO,INVOKER,IO,IO_AFTER_GTIDS,IO_BEFORE_GTIDS,IO_THREAD,IPC,IS,ISOLATION,ISSUER,ITERATE,JOIN,JSON,KEY,KEY_BLOCK_SIZE,KEYS,KILL,LANGUAGE,LAST,LEADING,LEAVE,LEAVES,LEFT,LESS,LEVEL,LIKE,LIMIT,LINEAR,LINES,LINESTRING,LIST,LOAD,LOCAL,LOCALTIME,LOCALTIMESTAMP,LOCK,LOCKS,LOGFILE,LOGS,LONG,LONGBLOB,LONGTEXT,LOOP,LOW_PRIORITY,MASTER,MASTER_AUTO_POSITION,MASTER_BIND,MASTER_CONNECT_RETRY,MASTER_DELAY,MASTER_HEARTBEAT_PERIOD,MASTER_HOST,MASTER_LOG_FILE,MASTER_LOG_POS,MASTER_PASSWORD,MASTER_PORT,MASTER_RETRY_COUNT,MASTER_SERVER_ID,MASTER_SSL,MASTER_SSL_CA,MASTER_SSL_CAPATH,MASTER_SSL_CERT,MASTER_SSL_CIPHER,MASTER_SSL_CRL,MASTER_SSL_CRLPATH,MASTER_SSL_KEY,MASTER_SSL_VERIFY_SERVER_CERT,MASTER_TLS_VERSION,MASTER_USER,MATCH,MAX_CONNECTIONS_PER_HOUR,MAX_QUERIES_PER_HOUR,MAX_ROWS,MAX_SIZE,MAX_STATEMENT_TIME,MAX_UPDATES_PER_HOUR,MAX_USER_CONNECTIONS,MAXVALUE,MEDIUM,MEDIUMBLOB,MEDIUMINT,MEDIUMTEXT,MEMORY,MERGE,MESSAGE_TEXT,MICROSECOND,MIDDLEINT,MIGRATE,MIN_ROWS,MINUTE,MINUTE_MICROSECOND,MINUTE_SECOND,MOD,MODE,MODIFIES,MODIFY,MONTH,MULTILINESTRING,MULTIPOINT,MULTIPOLYGON,MUTEX,MYSQL_ERRNO,NAME,NAMES,NATIONAL,NATURAL,NCHAR,NDB,NDBCLUSTER,NEVER,NEW,NEXT,NO,NO_WAIT,NO_WRITE_TO_BINLOG,NODEGROUP,NONBLOCKING,NONE,NOT,NULL,NUMBER,NUMERIC,NVARCHAR,OFFSET,OLD_PASSWORD,ON,ONE,ONLY,OPEN,OPTIMIZE,OPTIMIZER_COSTS,OPTION,OPTIONALLY,OPTIONS,OR,ORDER,OUT,OUTER,OUTFILE,OWNER,PACK_KEYS,PAGE,PARSE_GCOL_EXPR,PARSER,PARTIAL,PARTITION,PARTITIONING,PARTITIONS,PASSWORD,PHASE,PLUGIN,PLUGIN_DIR,PLUGINS,POINT,POLYGON,PORT,PRECEDES,PRECISION,PREPARE,PRESERVE,PREV,PRIMARY,PRIVILEGES,PROCEDURE,PROCESSLIST,PROFILE,PROFILES,PROXY,PURGE,QUARTER,QUERY,QUICK,RANGE,READ,READ_ONLY,READ_WRITE,READS,REAL,REBUILD,RECOVER,REDO_BUFFER_SIZE,REDOFILE,REDUNDANT,REFERENCES,REGEXP,RELAY,RELAY_LOG_FILE,RELAY_LOG_POS,RELAY_THREAD,RELAYLOG,RELEASE,RELOAD,REMOVE,RENAME,REORGANIZE,REPAIR,REPEAT,REPEATABLE,REPLACE,REPLICATE_DO_DB,REPLICATE_DO_TABLE,REPLICATE_IGNORE_DB,REPLICATE_IGNORE_TABLE,REPLICATE_REWRITE_DB,REPLICATE_WILD_DO_TABLE,REPLICATE_WILD_IGNORE_TABLE,REPLICATION,REQUIRE,RESET,RESIGNAL,RESTORE,RESTRICT,RESUME,RETURN,RETURNED_SQLSTATE,RETURNS,REVERSE,REVOKE,RIGHT,RLIKE,ROLLBACK,ROLLUP,ROTATE,ROUTINE,ROW,ROW_COUNT,ROW_FORMAT,ROWS,RTREE,SAVEPOINT,SCHEDULE,SCHEMA,SCHEMA_NAME,SCHEMAS,SECOND,SECOND_MICROSECOND,SECURITY,SELECT,SENSITIVE,SEPARATOR,SERIAL,SERIALIZABLE,SERVER,SESSION,SET,SHARE,SHOW,SHUTDOWN,SIGNAL,SIGNED,SIMPLE,SLAVE,SLOW,SMALLINT,SNAPSHOT,SOCKET,SOME,SONAME,SOUNDS,SOURCE,SPATIAL,SPECIFIC,SQL,SQL_AFTER_GTIDS,SQL_AFTER_MTS_GAPS,SQL_BEFORE_GTIDS,SQL_BIG_RESULT,SQL_BUFFER_RESULT,SQL_CACHE,SQL_CALC_FOUND_ROWS,SQL_NO_CACHE,SQL_SMALL_RESULT,SQL_THREAD,SQL_TSI_DAY,SQL_TSI_HOUR,SQL_TSI_MINUTE,SQL_TSI_MONTH,SQL_TSI_QUARTER,SQL_TSI_SECOND,SQL_TSI_WEEK,SQL_TSI_YEAR,SQLEXCEPTION,SQLSTATE,SQLWARNING,SSL,STACKED,START,STARTING,STARTS,STATS_AUTO_RECALC,STATS_PERSISTENT,STATS_SAMPLE_PAGES,STATUS,STOP,STORAGE,STORED,STRAIGHT_JOIN,STRING,SUBCLASS_ORIGIN,SUBJECT,SUBPARTITION,SUBPARTITIONS,SUPER,SUSPEND,SWAPS,SWITCHES,TABLE,TABLE_CHECKSUM,TABLE_NAME,TABLES,TABLESPACE,TEMPORARY,TEMPTABLE,TERMINATED,TEXT,THAN,THEN,TIME,TIMESTAMP,TIMESTAMPADD,TIMESTAMPDIFF,TINYBLOB,TINYINT,TINYTEXT,TO,TRAILING,TRANSACTION,TRIGGER,TRIGGERS,TRUNCATE,TYPE,TYPES,UNCOMMITTED,UNDEFINED,UNDO,UNDO_BUFFER_SIZE,UNDOFILE,UNICODE,UNINSTALL,UNION,UNIQUE,UNKNOWN,UNLOCK,UNSIGNED,UNTIL,UPDATE,UPGRADE,USAGE,USE,USE_FRM,USER,USER_RESOURCES,USING,UTC_DATE,UTC_TIME,UTC_TIMESTAMP,VALIDATION,VALUE,VALUES,VARBINARY,VARCHAR,VARCHARACTER,VARIABLES,VARYING,VIEW,VIRTUAL,WAIT,WARNINGS,WEEK,WEIGHT_STRING,WHEN,WHERE,WHILE,WITH,WITHOUT,WORK,WRAPPER,WRITE,X509,XA,XID,XML,XOR,YEAR,YEAR_MONTH,ZEROFILL,FALSE,TRUE"; + private static final String MYSQL_KEYWORDS + = "ACCESSIBLE,ACCOUNT,ACTION,ADD,AFTER,AGAINST,AGGREGATE,ALGORITHM,ALL,ALTER,ALWAYS,ANALYSE,AND,ANY,AS,ASC,ASCII,ASENSITIVE,AT,AUTO_INCREMENT,AUTOEXTEND_SIZE,AVG,AVG_ROW_LENGTH,BACKUP,BEFORE,BEGIN,BETWEEN,BIGINT,BINARY,BINLOG,BIT,BLOB,BLOCK,BOOL,BOOLEAN,BOTH,BTREE,BY,BYTE,CACHE,CALL,CASCADE,CASCADED,CASE,CATALOG_NAME,CHAIN,CHANGE,CHANGED,CHANNEL,CHAR,CHARACTER,CHARSET,CHECK,CHECKSUM,CIPHER,CLASS_ORIGIN,CLIENT,CLOSE,COALESCE,CODE,COLLATE,COLLATION,COLUMN,COLUMN_FORMAT,COLUMN_NAME,COLUMNS,COMMENT,COMMIT,COMMITTED,COMPACT,COMPLETION,COMPRESSED,COMPRESSION,CONCURRENT,CONDITION,CONNECTION,CONSISTENT,CONSTRAINT,CONSTRAINT_CATALOG,CONSTRAINT_NAME,CONSTRAINT_SCHEMA,CONTAINS,CONTEXT,CONTINUE,CONVERT,CPU,CREATE,CROSS,CUBE,CURRENT,CURRENT_DATE,CURRENT_TIME,CURRENT_TIMESTAMP,CURRENT_USER,CURSOR,CURSOR_NAME,DATA,DATABASE,DATABASES,DATAFILE,DATE,DATETIME,DAY,DAY_HOUR,DAY_MICROSECOND,DAY_MINUTE,DAY_SECOND,DEALLOCATE,DEC,DECIMAL,DECLARE,DEFAULT,DEFAULT_AUTH,DEFINER,DELAY_KEY_WRITE," + + "DELAYED,DELETE,DES_KEY_FILE,DESC,DESCRIBE,DETERMINISTIC,DIAGNOSTICS,DIRECTORY,DISABLE,DISCARD,DISK,DISTINCT,DISTINCTROW,DIV,DO,DOUBLE,DROP,DUAL,DUMPFILE,DUPLICATE,DYNAMIC,EACH,ELSE,ELSEIF,ENABLE,ENCLOSED,ENCRYPTION,END,ENDS,ENGINE,ENGINES,ENUM,ERROR,ERRORS,ESCAPE,ESCAPED,EVENT,EVENTS,EVERY,EXCHANGE,EXECUTE,EXISTS,EXIT,EXPANSION,EXPIRE,EXPLAIN,EXPORT,EXTENDED,EXTENT_SIZE,FAST,FAULTS,FETCH,FIELDS,FILE,FILE_BLOCK_SIZE,FILTER,FIRST,FIXED,FLOAT,FLOAT4,FLOAT8,FLUSH,FOLLOWS,FOR,FORCE,FOREIGN,FORMAT,FOUND,FROM,FULL,FULLTEXT,FUNCTION,GENERAL,GENERATED,GEOMETRY,GEOMETRYCOLLECTION,GET,GET_FORMAT,GLOBAL,GRANT,GRANTS,GROUP,GROUP_REPLICATION,HANDLER,HASH,HAVING,HELP,HIGH_PRIORITY,HOST,HOSTS,HOUR,HOUR_MICROSECOND,HOUR_MINUTE,HOUR_SECOND,IDENTIFIED,IF,IGNORE,IGNORE_SERVER_IDS,IMPORT,IN,INDEX,INDEXES,INFILE,INITIAL_SIZE,INNER,INOUT,INSENSITIVE,INSERT,INSERT_METHOD,INSTALL,INSTANCE,INT,INT1,INT2,INT3,INT4,INT8,INTEGER,INTERVAL,INTO,INVOKER,IO,IO_AFTER_GTIDS,IO_BEFORE_GTIDS,IO_THREAD," + + "IPC,IS,ISOLATION,ISSUER,ITERATE,JOIN,JSON,KEY,KEY_BLOCK_SIZE,KEYS,KILL,LANGUAGE,LAST,LEADING,LEAVE,LEAVES,LEFT,LESS,LEVEL,LIKE,LIMIT,LINEAR,LINES,LINESTRING,LIST,LOAD,LOCAL,LOCALTIME,LOCALTIMESTAMP,LOCK,LOCKS,LOGFILE,LOGS,LONG,LONGBLOB,LONGTEXT,LOOP,LOW_PRIORITY,MASTER,MASTER_AUTO_POSITION,MASTER_BIND,MASTER_CONNECT_RETRY,MASTER_DELAY,MASTER_HEARTBEAT_PERIOD,MASTER_HOST,MASTER_LOG_FILE,MASTER_LOG_POS,MASTER_PASSWORD,MASTER_PORT,MASTER_RETRY_COUNT,MASTER_SERVER_ID,MASTER_SSL,MASTER_SSL_CA,MASTER_SSL_CAPATH,MASTER_SSL_CERT,MASTER_SSL_CIPHER,MASTER_SSL_CRL,MASTER_SSL_CRLPATH,MASTER_SSL_KEY,MASTER_SSL_VERIFY_SERVER_CERT,MASTER_TLS_VERSION,MASTER_USER,MATCH,MAX_CONNECTIONS_PER_HOUR,MAX_QUERIES_PER_HOUR,MAX_ROWS,MAX_SIZE,MAX_STATEMENT_TIME,MAX_UPDATES_PER_HOUR,MAX_USER_CONNECTIONS,MAXVALUE,MEDIUM,MEDIUMBLOB,MEDIUMINT,MEDIUMTEXT,MEMORY,MERGE,MESSAGE_TEXT,MICROSECOND,MIDDLEINT,MIGRATE,MIN_ROWS,MINUTE,MINUTE_MICROSECOND,MINUTE_SECOND,MOD,MODE,MODIFIES,MODIFY,MONTH," + + "MULTILINESTRING,MULTIPOINT,MULTIPOLYGON,MUTEX,MYSQL_ERRNO,NAME,NAMES,NATIONAL,NATURAL,NCHAR,NDB,NDBCLUSTER,NEVER,NEW,NEXT,NO,NO_WAIT,NO_WRITE_TO_BINLOG,NODEGROUP,NONBLOCKING,NONE,NOT,NULL,NUMBER,NUMERIC,NVARCHAR,OFFSET,OLD_PASSWORD,ON,ONE,ONLY,OPEN,OPTIMIZE,OPTIMIZER_COSTS,OPTION,OPTIONALLY,OPTIONS,OR,ORDER,OUT,OUTER,OUTFILE,OWNER,PACK_KEYS,PAGE,PARSE_GCOL_EXPR,PARSER,PARTIAL,PARTITION,PARTITIONING,PARTITIONS,PASSWORD,PHASE,PLUGIN,PLUGIN_DIR,PLUGINS,POINT,POLYGON,PORT,PRECEDES,PRECISION,PREPARE,PRESERVE,PREV,PRIMARY,PRIVILEGES,PROCEDURE,PROCESSLIST,PROFILE,PROFILES,PROXY,PURGE,QUARTER,QUERY,QUICK,RANGE,READ,READ_ONLY,READ_WRITE,READS,REAL,REBUILD,RECOVER,REDO_BUFFER_SIZE,REDOFILE,REDUNDANT,REFERENCES,REGEXP,RELAY,RELAY_LOG_FILE,RELAY_LOG_POS,RELAY_THREAD,RELAYLOG,RELEASE,RELOAD,REMOVE,RENAME,REORGANIZE,REPAIR,REPEAT,REPEATABLE,REPLACE,REPLICATE_DO_DB,REPLICATE_DO_TABLE,REPLICATE_IGNORE_DB,REPLICATE_IGNORE_TABLE,REPLICATE_REWRITE_DB,REPLICATE_WILD_DO_TABLE," + + "REPLICATE_WILD_IGNORE_TABLE,REPLICATION,REQUIRE,RESET,RESIGNAL,RESTORE,RESTRICT,RESUME,RETURN,RETURNED_SQLSTATE,RETURNS,REVERSE,REVOKE,RIGHT,RLIKE,ROLLBACK,ROLLUP,ROTATE,ROUTINE,ROW,ROW_COUNT,ROW_FORMAT,ROWS,RTREE,SAVEPOINT,SCHEDULE,SCHEMA,SCHEMA_NAME,SCHEMAS,SECOND,SECOND_MICROSECOND,SECURITY,SELECT,SENSITIVE,SEPARATOR,SERIAL,SERIALIZABLE,SERVER,SESSION,SET,SHARE,SHOW,SHUTDOWN,SIGNAL,SIGNED,SIMPLE,SLAVE,SLOW,SMALLINT,SNAPSHOT,SOCKET,SOME,SONAME,SOUNDS,SOURCE,SPATIAL,SPECIFIC,SQL,SQL_AFTER_GTIDS,SQL_AFTER_MTS_GAPS,SQL_BEFORE_GTIDS,SQL_BIG_RESULT,SQL_BUFFER_RESULT,SQL_CACHE,SQL_CALC_FOUND_ROWS,SQL_NO_CACHE,SQL_SMALL_RESULT,SQL_THREAD,SQL_TSI_DAY,SQL_TSI_HOUR,SQL_TSI_MINUTE,SQL_TSI_MONTH,SQL_TSI_QUARTER,SQL_TSI_SECOND,SQL_TSI_WEEK,SQL_TSI_YEAR,SQLEXCEPTION,SQLSTATE,SQLWARNING,SSL,STACKED,START,STARTING,STARTS,STATS_AUTO_RECALC,STATS_PERSISTENT,STATS_SAMPLE_PAGES,STATUS,STOP,STORAGE,STORED,STRAIGHT_JOIN,STRING,SUBCLASS_ORIGIN,SUBJECT,SUBPARTITION,SUBPARTITIONS,SUPER," + + "SUSPEND,SWAPS,SWITCHES,TABLE,TABLE_CHECKSUM,TABLE_NAME,TABLES,TABLESPACE,TEMPORARY,TEMPTABLE,TERMINATED,TEXT,THAN,THEN,TIME,TIMESTAMP,TIMESTAMPADD,TIMESTAMPDIFF,TINYBLOB,TINYINT,TINYTEXT,TO,TRAILING,TRANSACTION,TRIGGER,TRIGGERS,TRUNCATE,TYPE,TYPES,UNCOMMITTED,UNDEFINED,UNDO,UNDO_BUFFER_SIZE,UNDOFILE,UNICODE,UNINSTALL,UNION,UNIQUE,UNKNOWN,UNLOCK,UNSIGNED,UNTIL,UPDATE,UPGRADE,USAGE,USE,USE_FRM,USER,USER_RESOURCES,USING,UTC_DATE,UTC_TIME,UTC_TIMESTAMP,VALIDATION,VALUE,VALUES,VARBINARY,VARCHAR,VARCHARACTER,VARIABLES,VARYING,VIEW,VIRTUAL,WAIT,WARNINGS,WEEK,WEIGHT_STRING,WHEN,WHERE,WHILE,WITH,WITHOUT,WORK,WRAPPER,WRITE,X509,XA,XID,XML,XOR,YEAR,YEAR_MONTH,ZEROFILL,FALSE,TRUE"; private static final String ORACLE_KEYWORDS = "ACCESS,ADD,ALL,ALTER,AND,ANY,ARRAYLEN,AS,ASC,AUDIT,BETWEEN,BY,CHAR,CHECK,CLUSTER,COLUMN,COMMENT,COMPRESS,CONNECT,CREATE,CURRENT,DATE,DECIMAL,DEFAULT,DELETE,DESC,DISTINCT,DROP,ELSE,EXCLUSIVE,EXISTS,FILE,FLOAT,FOR,FROM,GRANT,GROUP,HAVING,IDENTIFIED,IMMEDIATE,IN,INCREMENT,INDEX,INITIAL,INSERT,INTEGER,INTERSECT,INTO,IS,LEVEL,LIKE,LOCK,LONG,MAXEXTENTS,MINUS,MODE,MODIFY,NOAUDIT,NOCOMPRESS,NOT,NOTFOUND,NOWAIT,NUMBER,OF,OFFLINE,ON,ONLINE,OPTION,OR,ORDER,PCTFREE,PRIOR,PRIVILEGES,PUBLIC,RAW,RENAME,RESOURCE,REVOKE,ROW,ROWID,ROWLABEL,ROWNUM,ROWS,SELECT,SESSION,SET,SHARE,SIZE,SMALLINT,SQLBUF,START,SUCCESSFUL,SYNONYM,TABLE,THEN,TO,TRIGGER,UID,UNION,UNIQUE,UPDATE,USER,VALIDATE,VALUES,VARCHAR,VARCHAR2,VIEW,WHENEVER,WHERE,WITH,KEY,NAME,VALUE,TYPE"; private static Set databaseKeywords; @@ -127,7 +135,7 @@ public static void matchPkIndexs(Connection conn, TaskContext context) { // 如果用户定义的 columns中 带有 ``,也不影响, // 最多只是在select里多加了几列PK column if (StringUtils.equalsIgnoreCase(pkc, columns.get(j)) - || StringUtils.equalsIgnoreCase(escapedPkc, columns.get(j))) { + || StringUtils.equalsIgnoreCase(escapedPkc, columns.get(j))) { pkIndexs[i] = j; pkColumns[i] = columns.get(j); break; @@ -156,13 +164,13 @@ private static String[] getPkColumns(Connection conn, TaskContext context) { } //OceanBase oracle模式下需要使用position排序获取正确的联合主键顺序 sql = String.format( - "SELECT cols.column_name Column_name " + - "FROM all_constraints cons, all_cons_columns cols " + - "WHERE cols.table_name = '%s' AND cons.constraint_type = 'P' " + - "AND cons.constraint_name = cols.constraint_name " + - "AND cons.owner = cols.owner and cons.OWNER = %s " + - "order by cols.position " , - tableName, schema); + "SELECT cols.column_name Column_name " + + "FROM all_constraints cons, all_cons_columns cols " + + "WHERE cols.table_name = '%s' AND cons.constraint_type = 'P' " + + "AND cons.constraint_name = cols.constraint_name " + + "AND cons.owner = cols.owner and cons.OWNER = %s " + + "order by cols.position ", + tableName, schema); } LOG.info("get primary key by sql: " + sql); Statement ps = null; @@ -227,11 +235,9 @@ public static String buildFirstQuerySql(TaskContext context) { sql += (StringUtils.isNotEmpty(context.getWhere()) ? " and " : " where ") + userSavePoint; } - sql += " order by " + StringUtils.join(context.getPkColumns(), ',') + " asc"; - - // Using sub-query to apply rownum < readBatchSize since where has higher priority than order by - if (ObReaderUtils.isOracleMode(context.getCompatibleMode()) && context.getReadBatchSize() != -1) { - sql = String.format("select * from (%s) where rownum <= %d", sql, context.getReadBatchSize()); + if (context.getPkColumns() != null && context.getPkColumns().length > 0) { + // 有主键 + sql += " order by " + StringUtils.join(context.getPkColumns(), ',') + " asc"; } return sql; @@ -260,19 +266,43 @@ public static String buildAppendQuerySql(Connection conn, TaskContext context) { sql += String.format(" partition(%s) ", context.getPartitionName()); } - sql += " where "; - String append = "(" + StringUtils.join(context.getPkColumns(), ',') + ") > (" - + buildPlaceHolder(context.getPkColumns().length) + ")"; - - if (StringUtils.isNotEmpty(context.getWhere())) { - sql += "(" + context.getWhere() + ") and "; - } + String[] pkColumns = context.getPkColumns(); + StringBuilder whereClause = new StringBuilder(); - sql = String.format("%s %s order by %s asc", sql, append, StringUtils.join(context.getPkColumns(), ',')); + if (pkColumns != null && pkColumns.length > 0) { + whereClause.append(" ("); + for (int i = 0; i < pkColumns.length; i++) { + if (i == 0) { + whereClause.append(pkColumns[i]).append(" > ?"); + } else { + whereClause.append(" OR ("); + for (int j = 0; j <= i; j++) { + if (j > 0) { + whereClause.append(" AND "); + } + if (j == i) { + whereClause.append(pkColumns[j]).append(" > ? "); + } else { + whereClause.append(pkColumns[j]).append(" = ? "); + } + } + whereClause.append(")"); + } + } + whereClause.append(")"); - // Using sub-query to apply rownum < readBatchSize since where has higher priority than order by - if (ObReaderUtils.isOracleMode(context.getCompatibleMode()) && context.getReadBatchSize() != -1) { - sql = String.format("select * from (%s) where rownum <= %d", sql, context.getReadBatchSize()); + // 如果有额外的 WHERE 条件,则拼接进去 + if (StringUtils.isNotEmpty(context.getWhere())) { + whereClause.insert(0, "(" + context.getWhere() + ") AND "); + } + sql += " where " + whereClause; + // 添加 ORDER BY 子句 + sql += " order by " + StringUtils.join(pkColumns, ",") + " asc"; + } else { + // 无主键 + if (StringUtils.isNotEmpty(context.getWhere())) { + sql += " where " + context.getWhere(); + } } return sql; @@ -476,22 +506,22 @@ private static Map> getAllIndex(Connection conn, String tab } sql = String.format( - "SELECT INDEX_NAME Key_name, COLUMN_NAME Column_name " + - "from all_ind_columns " + - "where TABLE_NAME = '%s' and TABLE_OWNER = %s " + - " union all " + - "SELECT DISTINCT " + - "CASE " + - "WHEN cons.CONSTRAINT_TYPE = 'P' THEN 'PRIMARY' " + - "WHEN cons.CONSTRAINT_TYPE = 'U' THEN cons.CONSTRAINT_NAME " + - "ELSE '' " + - "END AS Key_name, " + - "cols.column_name Column_name " + - "FROM all_constraints cons, all_cons_columns cols " + - "WHERE cols.table_name = '%s' AND cons.constraint_type in('P', 'U') " + - "AND cons.constraint_name = cols.constraint_name AND cons.owner = cols.owner " + - "AND cons.owner = %s", - tableName, schema, tableName, schema); + "SELECT INDEX_NAME Key_name, COLUMN_NAME Column_name " + + "from all_ind_columns " + + "where TABLE_NAME = '%s' and TABLE_OWNER = %s " + + " union all " + + "SELECT DISTINCT " + + "CASE " + + "WHEN cons.CONSTRAINT_TYPE = 'P' THEN 'PRIMARY' " + + "WHEN cons.CONSTRAINT_TYPE = 'U' THEN cons.CONSTRAINT_NAME " + + "ELSE '' " + + "END AS Key_name, " + + "cols.column_name Column_name " + + "FROM all_constraints cons, all_cons_columns cols " + + "WHERE cols.table_name = '%s' AND cons.constraint_type in('P', 'U') " + + "AND cons.constraint_name = cols.constraint_name AND cons.owner = cols.owner " + + "AND cons.owner = %s", + tableName, schema, tableName, schema); } Statement stmt = null; @@ -530,11 +560,11 @@ private static Map> getAllIndex(Connection conn, String tab } else { // add primary key to the index if the index is not on the column colsInPrimary.forEach( - c -> { - if (!indexColumns.contains(c)) { - indexColumns.add(c); - } - }); + c -> { + if (!indexColumns.contains(c)) { + indexColumns.add(c); + } + }); } } } @@ -550,13 +580,14 @@ private static Map> getAllIndex(Connection conn, String tab /** * find out the indexes which contains all columns in where conditions + * * @param conn * @param table * @param colNamesInCondition * @return */ private static List getIndexName(Connection conn, String table, - Set colNamesInCondition, String compatibleMode) { + Set colNamesInCondition, String compatibleMode) { List indexNames = new ArrayList(); if (colNamesInCondition == null || colNamesInCondition.size() == 0) { LOG.info("there is no qulified conditions in the where clause, skip index selection."); @@ -722,8 +753,13 @@ public static String buildPlaceHolder(int n) { } public static void binding(PreparedStatement ps, List list) throws SQLException { - for (int i = 0, n = list.size(); i < n; i++) { - Column c = list.get(i); + if (list.isEmpty()) { + return; + } + List columns = buildFullParams(list); + + for (int i = 0; i < columns.size(); i++) { + Column c = columns.get(i); if (c instanceof BoolColumn) { ps.setLong(i + 1, ((BoolColumn) c).asLong()); } else if (c instanceof BytesColumn) { @@ -731,7 +767,8 @@ public static void binding(PreparedStatement ps, List list) throws SQLEx } else if (c instanceof DateColumn) { ps.setTimestamp(i + 1, new Timestamp(((DateColumn) c).asDate().getTime())); } else if (c instanceof DoubleColumn) { - ps.setDouble(i + 1, ((DoubleColumn) c).asDouble()); + //应该直接使用bigDecimal,asDouble会先转换成bigDecimal再转换成Double会导致精度丢失 + ps.setBigDecimal(i + 1, ((DoubleColumn) c).asBigDecimal()); } else if (c instanceof LongColumn) { ps.setLong(i + 1, ((LongColumn) c).asLong()); } else if (c instanceof StringColumn) { @@ -742,6 +779,21 @@ public static void binding(PreparedStatement ps, List list) throws SQLEx } } + //增多检查点,上游的构建行为为A,AB,ABC,ABCD的组合,占位符的数量为n(n+1)/2,n为主键列的数量 + public static List buildFullParams(List savePointColumns) { + if (savePointColumns == null || savePointColumns.isEmpty()) { + return new ArrayList<>(); + } + int n = savePointColumns.size(); + List fullParams = new ArrayList<>(); + for (int i = 0; i < n; i++) { + for (int j = 0; j <= i; j++) { + fullParams.add(savePointColumns.get(j)); + } + } + return fullParams; + } + public static List buildPoint(Record savePoint, int[] pkIndexs) { List result = new ArrayList(pkIndexs.length); for (int i = 0, n = pkIndexs.length; i < n; i++) { @@ -808,11 +860,12 @@ public static String buildWeakReadQuerySql(String column, String table, String w /** * compare two ob versions + * * @param version1 * @param version2 - * @return 0 when the two versions are the same - * -1 when version1 is smaller (earlier) than version2 - * 1 when version is bigger (later) than version2 + * @return 0 when the two versions are the same + * -1 when version1 is smaller (earlier) than version2 + * 1 when version is bigger (later) than version2 */ public static int compareObVersion(String version1, String version2) { if (version1 == null || version2 == null) { @@ -824,7 +877,6 @@ public static int compareObVersion(String version1, String version2) { } /** - * * @param conn * @param sql * @return @@ -853,6 +905,7 @@ public static List getResultsFromSql(Connection conn, String sql) { /** * get obversion, try ob_version first, and then try version if failed + * * @param conn * @return */ diff --git a/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/TaskContext.java b/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/TaskContext.java index 17655a52e4..df77848f11 100644 --- a/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/TaskContext.java +++ b/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/TaskContext.java @@ -80,8 +80,10 @@ public void setSecondaryIndexColumns(List secondaryIndexColumns) { } public String getQuerySql() { - if (readBatchSize == -1 || ObReaderUtils.isOracleMode(compatibleMode)) { + if (readBatchSize == -1) { return querySql; + } else if (ObReaderUtils.isOracleMode(compatibleMode)) { + return String.format("select * from (%s) where rownum <= %d", querySql, readBatchSize); } else { return querySql + " limit " + readBatchSize; } diff --git a/oceanbasev10writer/doc/oceanbasev10writer.md b/oceanbasev10writer/doc/oceanbasev10writer.md new file mode 100644 index 0000000000..32be11b719 --- /dev/null +++ b/oceanbasev10writer/doc/oceanbasev10writer.md @@ -0,0 +1,363 @@ +## 1 快速介绍 +OceanBaseV10Writer 插件实现了写入数据到 OceanBase V1.0以及更高版本数据库的目的表的功能。在底层实现上, OceanbaseV10Writer 通过 java客户端(底层MySQL JDBC或oceanbase client) 连接obproxy远程 OceanBase 数据库,并执行相应的 insert .. on duplicate key update这条sql 语句将数据写入 OceanBase ,内部会分批次提交入库。 +Oceanbasev10Writer 面向ETL开发工程师,他们使用 Oceanbasev10Writer 从数仓导入数据到 Oceanbase。同时 Oceanbasev10Writer 亦可以作为数据迁移工具为DBA等用户提供服务。 + +注意,oceanbasewriter是ob 0.5的writer,oceanbasev10writer是ob 1.0及以后版本的writer。 + +## 2 实现原理 +Oceanbasev10Writer 通过 DataX 框架获取 Reader 生成的协议数据,生成insert ... on duplicate key update语句,在主键或唯一键冲突时,更新表中的所有字段。目前只有这一种行为,写入模式(只写入不更新)和更新指定字段目前暂未支持。 出于性能考虑,写入采用batch方式批量写,当行数累计到预定阈值时,才发起写入请求。 +插件连接ob使用Mysql/Oceanbase JDBC driver通过obproxy连接ob; + +## 3 功能说明 +### 3.1 配置样例 + +- 这里使用一份从内存产生到 Oceanbase 导入的数据。 +``` +{ + "job": { + "setting": { + "speed": { + "channel": 1 + }, + "errorLimit": { + "record": 1 + } + }, + "content": [ + { + "reader": { + "name": "streamreader", + "parameter": { + "column" : [ + { + "value": "DataX", + "type": "string" + }, + { + "value": 19880808, + "type": "long" + }, + { + "value": "1988-08-08 08:08:08", + "type": "date" + }, + { + "value": true, + "type": "bool" + }, + { + "value": "test", + "type": "bytes" + } + ], + "sliceRecordCount": 1000 + } + }, + "writer": { + "name": "oceanbasev10writer", + "parameter": { + "obWriteMode": "update", + "column": [ + "id", + "name" + ], + "preSql": [ + "delete from test" + ], + "connection": [ + { + "jdbcUrl": "||_dsc_ob10_dsc_||集群名:租户名||_dsc_ob10_dsc_||jdbc:mysql://obproxyIp:obproxyPort/dbName", + "table": [ + "test" + ] + } + ], + "username": "xxx", + "password":"xxx", + "batchSize": 256, + "memstoreThreshold": "0.9" + } + } + } + ] + } +} +``` +- 这里使用一份从内存产生到 Oceanbase 旁路导入的数据。 +``` +{ + "job": { + "setting": { + "speed": { + "channel": 1 + }, + "errorLimit": { + "record": 1 + } + }, + "content": [ + { + "reader": { + "name": "streamreader", + "parameter": { + "column" : [ + { + "value": "DataX", + "type": "string" + }, + { + "value": 19880808, + "type": "long" + }, + { + "value": "1988-08-08 08:08:08", + "type": "date" + }, + { + "value": true, + "type": "bool" + }, + { + "value": "test", + "type": "bytes" + } + ], + "sliceRecordCount": 1000 + } + }, + "writer": { + "name": "oceanbasev10writer", + "parameter": { + "obWriteMode": "update", + "column": [ + "id", + "name" + ], + "preSql": [ + "delete from test" + ], + "connection": [ + { + "jdbcUrl": "||_dsc_ob10_dsc_||集群名:租户名||_dsc_ob10_dsc_||jdbc:mysql://obproxyIp:obproxyPort/dbName", + "table": [ + "test" + ] + } + ], + "username": "xxx", + "password":"xxx", + "batchSize": 256, + "directPath": true, + "rpcPort": 2882, + "parallel": 8, + "heartBeatInterval": 1000, + "heartBeatTimeout": 6000, + "bufferSize": 1048576, + "memstoreThreshold": "0.9" + } + } + } + ] + } +} +``` +### 3.2 参数说明 + +- **jdbcUrl** + - 描述:连接ob使用的jdbc url,支持两种格式: + - ||_dsc_ob10_dsc_||集群名:租户名||_dsc_ob10_dsc_||jdbc:mysql://obproxyIp:obproxyPort/db + - 此格式下username仅填写用户名本身,无需三段式写法 + - jdbc:mysql://ip:port/db + - 此格式下username需要三段式写法 + - 必选:是 + - 默认值:无 +- **table** + - 描述:目的表的表名称。开源版obwriter插件仅支持写入一个表。表名中一般不含库名; + - 必选:是 + - 默认值:无 +- **column** + - 描述:目的表需要写入数据的字段,字段之间用英文逗号分隔。例如: "column": ["id","name","age"]。 +``` +**column配置项必须指定,不能留空!** + 注意:1、我们强烈不推荐你这样配置,因为当你目的表字段个数、类型等有改动时,你的任务可能运行不正确或者失败 + 2、 column 不能配置任何常量值 +``` + +- 必选:是 +- 默认值:否 +- **preSql** + - 描述:写入数据到目的表前,会先执行这里的标准语句。如果 Sql 中有你需要操作到的表名称,请使用 `@table` 表示,这样在实际执行 Sql 语句时,会对变量按照实际表名称进行替换。比如你的任务是要写入到目的端的100个同构分表(表名称为:datax_00,datax01, ... datax_98,datax_99),并且你希望导入数据前,先对表中数据进行删除操作,那么你可以这样配置:`"preSql":["delete from @table"]`,效果是:在执行到每个表写入数据前,会先执行对应的 delete from 对应表名称.只支持delete语句 + - 必选:否 + - 默认值:无 +- **batchSize** + - 描述:一次性批量提交的记录数大小,该值可以极大减少DataX与Oceanbase的网络交互次数,并提升整体吞吐量。但是该值设置过大可能会造成DataX运行进程OOM情况。 + - 必选:否 + - 默认值:1000 +- **memstoreThreshold** + - 描述:OB租户的memstore使用率,当达到这个阀值的时候暂停导入,等释放内存后继续导入. 防止租户内存溢出 + - 必选:否 + - 默认值:0.9 +- **username** + - 描述:访问oceanbase的用户名。注意当jdbcUrl配置为||_dsc_ob10_dsc_||集群名:租户名||_dsc_ob10_dsc_||这样的格式时,此处不配置ob的集群名和租户名。否则需要配置为三段式形式。 + - 必选:是 + - 默认值:无 +- **** password**** + - 描述:访问oceanbase的密码 + - 必选:是 + - 默认值:无 +- writerThreadCount + - 描述:每个通道(channel)中写入使用的线程数 + - 必选:否 + - 默认值:1 +- directPath + - 描述:开启旁路导入 + - 必选:否 + - 默认值:false +- rpcPort + - 描述:oceanbase的rpc端口 + - 必选:否 + - 默认值:无 +- parallel + - 描述:旁路导入的启用线程数 + - 必选:否 + - 默认值:1 +- bufferSize + - 描述:旁路导入的切分数据块大小 + - 必选:否 + - 默认值:1048576 +- heartBeatInterval + - 描述:旁路导入的心跳间隔 + - 必选:否 + - 默认值:1000 +- heartBeatTimeout + - 描述:旁路导入的心跳超时时间 + - 必选:否 + - 默认值:6000 +``` +**开启了旁路导入,即directPath:true时** + 注意:1、此时rpcPort为必填项。 + 2、设置parallel时,parallel和oceanbase的负载有关。 + 3、设置heartBeatTimeout最低不能低于6000,heartBeatTimeout的值最低不能低于1000, + 当heartBeatTimeout和heartBeatTimeout同时设置时,heartBeatTimeout-heartBeatTimeout的差值不能低于4000。 + 4、bufferSize的单位为字节数,默认为1M,即1048576。 +``` + +## 4 常见问题 +### +4.1 连接断开导致写入失败 +Data X写入ob的任务失败,在log中可以发现在写入ob时,连接被断开: +``` +2018-12-14 05:40:48.586 [18705170-3-17-writer] WARN CommonRdbmsWriter$Task - 遇到OB异常,回滚此次写入, 休眠 1秒,采用逐条写入提交,SQLState:S1000 +java.sql.SQLException: Could not retrieve transation read-only status server + at com.mysql.jdbc.SQLError.createSQLException(SQLError.java:964) ~[mysql-connector-java-5.1.40.jar:5.1.40] + at com.mysql.jdbc.SQLError.createSQLException(SQLError.java:897) ~[mysql-connector-java-5.1.40.jar:5.1.40] + at com.mysql.jdbc.SQLError.createSQLException(SQLError.java:886) ~[mysql-connector-java-5.1.40.jar:5.1.40] + at com.mysql.jdbc.SQLError.createSQLException(SQLError.java:860) ~[mysql-connector-java-5.1.40.jar:5.1.40] + at com.mysql.jdbc.SQLError.createSQLException(SQLError.java:877) ~[mysql-connector-java-5.1.40.jar:5.1.40] + at com.mysql.jdbc.SQLError.createSQLException(SQLError.java:873) ~[mysql-connector-java-5.1.40.jar:5.1.40] + at com.mysql.jdbc.ConnectionImpl.isReadOnly(ConnectionImpl.java:3603) ~[mysql-connector-java-5.1.40.jar:5.1.40] + at com.mysql.jdbc.ConnectionImpl.isReadOnly(ConnectionImpl.java:3572) ~[mysql-connector-java-5.1.40.jar:5.1.40] + at com.mysql.jdbc.PreparedStatement.executeBatchInternal(PreparedStatement.java:1225) ~[mysql-connector-java-5.1.40.jar:5.1.40] + at com.mysql.jdbc.StatementImpl.executeBatch(StatementImpl.java:958) ~[mysql-connector-java-5.1.40.jar:5.1.40] + at com.alibaba.datax.plugin.writer.oceanbasev10writer.task.MultiTableWriterTask.write(MultiTableWriterTask.java:357) [oceanbasev10writer-0.0.1-SNAPSHOT.jar:na] + at com.alibaba.datax.plugin.writer.oceanbasev10writer.task.MultiTableWriterTask.calcRuleAndDoBatchInsert(MultiTableWriterTask.java:338) [oceanbasev10writer-0.0.1-SNAPSHOT.jar:na] + at com.alibaba.datax.plugin.writer.oceanbasev10writer.task.MultiTableWriterTask.startWrite(MultiTableWriterTask.java:227) [oceanbasev10writer-0.0.1-SNAPSHOT.jar:na] + at com.alibaba.datax.plugin.writer.oceanbasev10writer.OceanBaseV10Writer$Task.startWrite(OceanBaseV10Writer.java:360) [oceanbasev10writer-0.0.1-SNAPSHOT.jar:na] + at com.alibaba.datax.core.taskgroup.runner.WriterRunner.run(WriterRunner.java:62) [datax-core-0.0.1-SNAPSHOT.jar:na] + at java.lang.Thread.run(Thread.java:834) [na:1.8.0_112] +Caused by: com.mysql.jdbc.exceptions.jdbc4.CommunicationsException: Communications link failure +The last packet successfully received from the server was 5 milliseconds ago. The last packet sent successfully to the server was 4 milliseconds ago. + at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) ~[na:1.8.0_112] + at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62) ~[na:1.8.0_112] + at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) ~[na:1.8.0_112] + at java.lang.reflect.Constructor.newInstance(Constructor.java:423) ~[na:1.8.0_112] + at com.mysql.jdbc.Util.handleNewInstance(Util.java:425) ~[mysql-connector-java-5.1.40.jar:5.1.40] + at com.mysql.jdbc.SQLError.createCommunicationsException(SQLError.java:989) ~[mysql-connector-java-5.1.40.jar:5.1.40] + at com.mysql.jdbc.MysqlIO.reuseAndReadPacket(MysqlIO.java:3556) ~[mysql-connector-java-5.1.40.jar:5.1.40] + at com.mysql.jdbc.MysqlIO.reuseAndReadPacket(MysqlIO.java:3456) ~[mysql-connector-java-5.1.40.jar:5.1.40] + at com.mysql.jdbc.MysqlIO.checkErrorPacket(MysqlIO.java:3897) ~[mysql-connector-java-5.1.40.jar:5.1.40] + at com.mysql.jdbc.MysqlIO.sendCommand(MysqlIO.java:2524) ~[mysql-connector-java-5.1.40.jar:5.1.40] + at com.mysql.jdbc.MysqlIO.sqlQueryDirect(MysqlIO.java:2677) ~[mysql-connector-java-5.1.40.jar:5.1.40] + at com.mysql.jdbc.ConnectionImpl.execSQL(ConnectionImpl.java:2545) ~[mysql-connector-java-5.1.40.jar:5.1.40] + at com.mysql.jdbc.ConnectionImpl.execSQL(ConnectionImpl.java:2503) ~[mysql-connector-java-5.1.40.jar:5.1.40] + at com.mysql.jdbc.StatementImpl.executeQuery(StatementImpl.java:1369) ~[mysql-connector-java-5.1.40.jar:5.1.40] + at com.mysql.jdbc.ConnectionImpl.isReadOnly(ConnectionImpl.java:3597) ~[mysql-connector-java-5.1.40.jar:5.1.40] + ... 9 common frames omitted +Caused by: java.io.EOFException: Can not read response from server. Expected to read 4 bytes, read 0 bytes before connection was unexpectedly lost. + at com.mysql.jdbc.MysqlIO.readFully(MysqlIO.java:3008) ~[mysql-connector-java-5.1.40.jar:5.1.40] + at com.mysql.jdbc.MysqlIO.reuseAndReadPacket(MysqlIO.java:3466) ~[mysql-connector-java-5.1.40.jar:5.1.40] + ... 17 common frames omitted +``` +关键字:could not retrieve transation status from read-only status server, communication link failure +检查运行Data X任务的机器,发现obproxy在任务运行时发生若干次重启: + +在第一次obproxy退出的日志里,找到退出原因: +``` +[2018-12-14 05:40:47.611683] ERROR [PROXY] do_monitor_mem (ob_proxy_main.cpp:889) [7262][Y0-7F4480213880] [AL=47391-47390-29] obproxy's memroy is out of limit, will be going to commit suicide(mem_limited=838860800, OTHER_MEMORY_SIZE=73400320, is_out_of_mem_limit=true, cur_pos=9) BACKTRACE:0x49db91 0x47fdc9 0x43b115 0x43ee5d 0xa6e623 0xe401b2 0xe3f497 0x4f674c 0x7f4487ace77d 0x7f44865ed9ad +[2018-12-14 05:40:47.612334] ERROR [PROXY] do_monitor_mem (ob_proxy_main.cpp:891) [7262][Y0-7F4480213880] [AL=47392-47391-651] history memory size, history_mem_size[0]=765460480 BACKTRACE:0x49db91 0x47fdc9 0x48717a 0x43f121 0xa6e623 0xe401b2 0xe3f497 0x4f674c 0x7f4487ace77d 0x7f44865ed9ad +[2018-12-14 05:40:47.612934] ERROR [PROXY] do_monitor_mem (ob_proxy_main.cpp:891) [7262][Y0-7F4480213880] [AL=47393-47392-600] history memory size, history_mem_size[1]=765460480 BACKTRACE:0x49db91 0x47fdc9 0x48717a 0x43f121 0xa6e623 0xe401b2 0xe3f497 0x4f674c 0x7f4487ace77d 0x7f44865ed9ad +[2018-12-14 05:40:47.613530] ERROR [PROXY] do_monitor_mem (ob_proxy_main.cpp:891) [7262][Y0-7F4480213880] [AL=47394-47393-596] history memory size, history_mem_size[2]=765460480 BACKTRACE:0x49db91 0x47fdc9 0x48717a 0x43f121 0xa6e623 0xe401b2 0xe3f497 0x4f674c 0x7f4487ace77d 0x7f44865ed9ad +[2018-12-14 05:40:47.614121] ERROR [PROXY] do_monitor_mem (ob_proxy_main.cpp:891) [7262][Y0-7F4480213880] [AL=47395-47394-591] history memory size, history_mem_size[3]=765460480 BACKTRACE:0x49db91 0x47fdc9 0x48717a 0x43f121 0xa6e623 0xe401b2 0xe3f497 0x4f674c 0x7f4487ace77d 0x7f44865ed9ad +[2018-12-14 05:40:47.614717] ERROR [PROXY] do_monitor_mem (ob_proxy_main.cpp:891) [7262][Y0-7F4480213880] [AL=47396-47395-596] history memory size, history_mem_size[4]=765460480 BACKTRACE:0x49db91 0x47fdc9 0x48717a 0x43f121 0xa6e623 0xe401b2 0xe3f497 0x4f674c 0x7f4487ace77d 0x7f44865ed9ad +[2018-12-14 05:40:47.615307] ERROR [PROXY] do_monitor_mem (ob_proxy_main.cpp:891) [7262][Y0-7F4480213880] [AL=47397-47396-590] history memory size, history_mem_size[5]=765460480 BACKTRACE:0x49db91 0x47fdc9 0x48717a 0x43f121 0xa6e623 0xe401b2 0xe3f497 0x4f674c 0x7f4487ace77d 0x7f44865ed9ad +``` +关键字:obproxy's memroy is out of limit, will be going to commit suicide +可以看到,obproxy由于内存不足退出。 +#### 解决方案 +obproxy在启动时, 可以指定使用内存上限,默认是800M,在某些情况下,比如连接数较多(该失败的任务为写入100张分表,并发数32,因此连接数为3200),可能会导致obproxy内存不够用。要解决该问题,一方面可以调低任务的并发数,另一方面可以调大obproxy的内存限制,比如调整至2G。 + +### 4.2 Session interrupted +在使用ob 1.0 writer往单表里写入数据时,遇到以下错误: + +``` +2019-01-03 19:37:27.197 [0-insertTask-73] WARN InsertTask - Insert fatal error SqlState =HY000, errorCode = 5066, java.sql.SQLException: Session interrupted, server ip:port[11.145.28.93:2881] +``` +关键字:fatal,Session interrupted,server ip:port +在任务执行的log中,还可以发现如下log: + +``` +2019-08-09 11:56:56.758 [2-insertTask-82] ERROR StdoutPluginCollector - +java.sql.SQLException: Session interrupted, server ip:port[11.232.58.16:2881] + at com.alipay.oceanbase.obproxy.connection.ObGroupConnection.checkAndThrowException(ObGroupConnection.java:431) ~[oceanbase-connector-java-2.0.8.20180730.jar:na] + at com.alipay.oceanbase.obproxy.statement.ObStatement.doExecute(ObStatement.java:598) ~[oceanbase-connector-java-2.0.8.20180730.jar:na] + at com.alipay.oceanbase.obproxy.statement.ObStatement.execute(ObStatement.java:456) ~[oceanbase-connector-java-2.0.8.20180730.jar:na] + at com.alipay.oceanbase.obproxy.statement.ObPreparedStatement.execute(ObPreparedStatement.java:148) ~[oceanbase-connector-java-2.0.8.20180730.jar:na] + at com.alibaba.datax.plugin.rdbms.writer.CommonRdbmsWriter$Task.doOneInsert(CommonRdbmsWriter.java:430) ~[plugin-rdbms-util-0.0.1-SNAPSHOT.jar:na] + at com.alibaba.datax.plugin.writer.oceanbasev10writer.task.InsertTask.doMultiInsert(InsertTask.java:196) [oceanbasev10writer-0.0.1-SNAPSHOT.jar:na] + at com.alibaba.datax.plugin.writer.oceanbasev10writer.task.InsertTask.run(InsertTask.java:85) [oceanbasev10writer-0.0.1-SNAPSHOT.jar:na] + at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1147) [na:1.8.0_112] + at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:622) [na:1.8.0_112] + at java.lang.Thread.run(Thread.java:834) [na:1.8.0_112] +Caused by: com.alipay.oceanbase.obproxy.mysql.jdbc.exceptions.jdbc4.MySQLSyntaxErrorException: INSERT command denied to user 'dwexp'@'%' for table 'mobile_product_version_info' + at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) ~[na:1.8.0_112] + at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62) ~[na:1.8.0_112] + at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) ~[na:1.8.0_112] + at java.lang.reflect.Constructor.newInstance(Constructor.java:423) ~[na:1.8.0_112] + at com.alipay.oceanbase.obproxy.mysql.jdbc.Util.handleNewInstance(Util.java:409) ~[oceanbase-connector-java-2.0.8.20180730.jar:na] + at com.alipay.oceanbase.obproxy.mysql.jdbc.Util.getInstance(Util.java:384) ~[oceanbase-connector-java-2.0.8.20180730.jar:na] + at com.alipay.oceanbase.obproxy.mysql.jdbc.SQLError.createSQLException(SQLError.java:1052) ~[oceanbase-connector-java-2.0.8.20180730.jar:na] + at com.alipay.oceanbase.obproxy.mysql.jdbc.MysqlIO.checkErrorPacket(MysqlIO.java:4403) ~[oceanbase-connector-java-2.0.8.20180730.jar:na] + at com.alipay.oceanbase.obproxy.mysql.jdbc.MysqlIO.checkErrorPacket(MysqlIO.java:4275) ~[oceanbase-connector-java-2.0.8.20180730.jar:na] + at com.alipay.oceanbase.obproxy.mysql.jdbc.MysqlIO.sendCommand(MysqlIO.java:2706) ~[oceanbase-connector-java-2.0.8.20180730.jar:na] + at com.alipay.oceanbase.obproxy.mysql.jdbc.MysqlIO.sqlQueryDirect(MysqlIO.java:2867) ~[oceanbase-connector-java-2.0.8.20180730.jar:na] + at com.alipay.oceanbase.obproxy.mysql.jdbc.ConnectionImpl.execSQL(ConnectionImpl.java:2843) ~[oceanbase-connector-java-2.0.8.20180730.jar:na] + at com.alipay.oceanbase.obproxy.mysql.jdbc.PreparedStatement.executeInternal(PreparedStatement.java:2085) ~[oceanbase-connector-java-2.0.8.20180730.jar:na] + at com.alipay.oceanbase.obproxy.mysql.jdbc.PreparedStatement.execute(PreparedStatement.java:1310) ~[oceanbase-connector-java-2.0.8.20180730.jar:na] + at com.alipay.oceanbase.obproxy.druid.pool.DruidPooledPreparedStatement.execute(DruidPooledPreparedStatement.java:493) ~[oceanbase-connector-java-2.0.8.20180730.jar:na] + at com.alipay.oceanbase.obproxy.statement.ObPreparedStatement.executeOnConnection(ObPreparedStatement.java:121) ~[oceanbase-connector-java-2.0.8.20180730.jar:na] + at com.alipay.oceanbase.obproxy.statement.ObStatement.doExecuteOnConnection(ObStatement.java:677) ~[oceanbase-connector-java-2.0.8.20180730.jar:na] + at com.alipay.oceanbase.obproxy.statement.ObStatement.doExecute(ObStatement.java:558) ~[oceanbase-connector-java-2.0.8.20180730.jar:na] + ... 8 common frames omitted +``` +可以看到,异常是由于没有insert权限(INSERT command denied to user 'dwexp'@'%' for table)引起的。 + +关键字:INSERT command denied to user 'dwexp'@'%' +可以看到这个错误是由于没有写入权限导致的,因此在observer的log、obproxy的log中都没有相关的信息。 +#### 解决方案 +在ob中给相关用户授权之后,任务重试即可成功。 + +参考授权命令为: +```sql +grant select, insert, update on dbName.tableName to dwexp; +grant select on oceanbase.gv$memstore to dwexp; +``` diff --git a/oceanbasev10writer/pom.xml b/oceanbasev10writer/pom.xml index 11997a1e3f..d19864012f 100644 --- a/oceanbasev10writer/pom.xml +++ b/oceanbasev10writer/pom.xml @@ -28,6 +28,16 @@ com.alibaba.datax plugin-rdbms-util ${datax-project-version} + + + guava + com.google.guava + + + com.alibaba + druid + + org.slf4j @@ -43,13 +53,11 @@ 4.0.4.RELEASE test - com.alipay.oceanbase @@ -64,6 +72,19 @@ + + + com.oceanbase + oceanbase-client + 2.4.11 + + + com.google.guava + guava + + + + com.oceanbase shade-ob-partition-calculator @@ -72,8 +93,13 @@ ${pom.basedir}/src/main/libs/shade-ob-partition-calculator-1.0-SNAPSHOT.jar - + com.google.guava + guava + 27.0-jre + + + log4j log4j 1.2.16 @@ -89,6 +115,157 @@ 4.11 test + + com.oceanbase + obkv-table-client + 1.4.0 + + + com.oceanbase + obkv-hbase-client + 2.1.0 + + + com.alibaba + fastjson + + + + org.slf4j + slf4j-api + + + + com.oceanbase + oceanbase-client + + + + com.google.guava + guava + + + commons-lang + commons-lang + + + com.alipay.sofa.common + sofa-common-tools + + + + io.netty + netty-codec-dns + + + io.netty + netty-codec-http + + + io.netty + netty-codec-http2 + + + io.netty + netty-codec-haproxy + + + io.netty + netty-codec-mqtt + + + io.netty + netty-codec-memcache + + + io.netty + netty-codec-redis + + + io.netty + netty-codec-smtp + + + io.netty + netty-codec-socks + + + io.netty + netty-codec-stomp + + + io.netty + netty-codec-xml + + + + io.netty + netty-handler-proxy + + + io.netty + netty-handler-ssl-ocsp + + + + io.netty + netty-resolver-dns + + + io.netty + netty-resolver-dns-classes-macos + + + io.netty + netty-resolver-dns-native-macos + + + + io.netty + netty-transport-rxtx + + + io.netty + netty-transport-udt + + + io.netty + netty-transport-sctp + + + + + com.alipay.sofa.common + sofa-common-tools + 1.3.11 + + + + org.slf4j + slf4j-api + + + + com.google.guava + guava + + + + + com.alibaba + fastjson + 1.2.83 + + + commons-lang + commons-lang + 2.6 + + + mysql + mysql-connector-java + ${mysql.driver.version} + diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/Config.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/Config.java index 6776196b52..a90de0a399 100644 --- a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/Config.java +++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/Config.java @@ -2,62 +2,86 @@ public interface Config { - String MEMSTORE_THRESHOLD = "memstoreThreshold"; + String MEMSTORE_THRESHOLD = "memstoreThreshold"; - double DEFAULT_MEMSTORE_THRESHOLD = 0.9d; + double DEFAULT_MEMSTORE_THRESHOLD = 0.9d; - double DEFAULT_SLOW_MEMSTORE_THRESHOLD = 0.75d; - String MEMSTORE_CHECK_INTERVAL_SECOND = "memstoreCheckIntervalSecond"; + double DEFAULT_SLOW_MEMSTORE_THRESHOLD = 0.75d; - long DEFAULT_MEMSTORE_CHECK_INTERVAL_SECOND = 30; + String MEMSTORE_CHECK_INTERVAL_SECOND = "memstoreCheckIntervalSecond"; - int DEFAULT_BATCH_SIZE = 100; - int MAX_BATCH_SIZE = 4096; + long DEFAULT_MEMSTORE_CHECK_INTERVAL_SECOND = 30; - String FAIL_TRY_COUNT = "failTryCount"; + int DEFAULT_BATCH_SIZE = 100; - int DEFAULT_FAIL_TRY_COUNT = 10000; + int MAX_BATCH_SIZE = 4096; - String WRITER_THREAD_COUNT = "writerThreadCount"; + String FAIL_TRY_COUNT = "failTryCount"; - int DEFAULT_WRITER_THREAD_COUNT = 1; + int DEFAULT_FAIL_TRY_COUNT = 10000; - String CONCURRENT_WRITE = "concurrentWrite"; + String WRITER_THREAD_COUNT = "writerThreadCount"; - boolean DEFAULT_CONCURRENT_WRITE = true; + int DEFAULT_WRITER_THREAD_COUNT = 1; - String OB_VERSION = "obVersion"; - String TIMEOUT = "timeout"; + String CONCURRENT_WRITE = "concurrentWrite"; - String PRINT_COST = "printCost"; - boolean DEFAULT_PRINT_COST = false; + boolean DEFAULT_CONCURRENT_WRITE = true; - String COST_BOUND = "costBound"; - long DEFAULT_COST_BOUND = 20; + String OB_VERSION = "obVersion"; - String MAX_ACTIVE_CONNECTION = "maxActiveConnection"; - int DEFAULT_MAX_ACTIVE_CONNECTION = 2000; + String TIMEOUT = "timeout"; - String WRITER_SUB_TASK_COUNT = "writerSubTaskCount"; - int DEFAULT_WRITER_SUB_TASK_COUNT = 1; - int MAX_WRITER_SUB_TASK_COUNT = 4096; + String PRINT_COST = "printCost"; + + boolean DEFAULT_PRINT_COST = false; + + String COST_BOUND = "costBound"; + + long DEFAULT_COST_BOUND = 20; + + String MAX_ACTIVE_CONNECTION = "maxActiveConnection"; + + int DEFAULT_MAX_ACTIVE_CONNECTION = 2000; + + String WRITER_SUB_TASK_COUNT = "writerSubTaskCount"; + + int DEFAULT_WRITER_SUB_TASK_COUNT = 1; + + int MAX_WRITER_SUB_TASK_COUNT = 4096; + + String OB_WRITE_MODE = "obWriteMode"; - String OB_WRITE_MODE = "obWriteMode"; String OB_COMPATIBLE_MODE = "obCompatibilityMode"; + String OB_COMPATIBLE_MODE_ORACLE = "ORACLE"; + String OB_COMPATIBLE_MODE_MYSQL = "MYSQL"; - String OCJ_GET_CONNECT_TIMEOUT = "ocjGetConnectTimeout"; - int DEFAULT_OCJ_GET_CONNECT_TIMEOUT = 5000; // 5s + String OCJ_GET_CONNECT_TIMEOUT = "ocjGetConnectTimeout"; + + int DEFAULT_OCJ_GET_CONNECT_TIMEOUT = 5000; // 5s + + String OCJ_PROXY_CONNECT_TIMEOUT = "ocjProxyConnectTimeout"; + + int DEFAULT_OCJ_PROXY_CONNECT_TIMEOUT = 5000; // 5s + + String OCJ_CREATE_RESOURCE_TIMEOUT = "ocjCreateResourceTimeout"; + + int DEFAULT_OCJ_CREATE_RESOURCE_TIMEOUT = 60000; // 60s + + String OB_UPDATE_COLUMNS = "obUpdateColumns"; + + String USE_PART_CALCULATOR = "usePartCalculator"; + + boolean DEFAULT_USE_PART_CALCULATOR = false; - String OCJ_PROXY_CONNECT_TIMEOUT = "ocjProxyConnectTimeout"; - int DEFAULT_OCJ_PROXY_CONNECT_TIMEOUT = 5000; // 5s + String BLOCKS_COUNT = "blocksCount"; - String OCJ_CREATE_RESOURCE_TIMEOUT = "ocjCreateResourceTimeout"; - int DEFAULT_OCJ_CREATE_RESOURCE_TIMEOUT = 60000; // 60s + String DIRECT_PATH = "directPath"; - String OB_UPDATE_COLUMNS = "obUpdateColumns"; + String RPC_PORT = "rpcPort"; - String USE_PART_CALCULATOR = "usePartCalculator"; - boolean DEFAULT_USE_PART_CALCULATOR = false; + // 区别于recordLimit,这个参数仅针对某张表。即一张表超过最大错误数不会影响其他表。仅用于旁路导入。 + String MAX_ERRORS = "maxErrors"; } diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/common/Table.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/common/Table.java new file mode 100644 index 0000000000..53c7025432 --- /dev/null +++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/common/Table.java @@ -0,0 +1,88 @@ +package com.alibaba.datax.plugin.writer.oceanbasev10writer.common; + +import java.util.Objects; + +public class Table { + private String tableName; + private String dbName; + private Throwable error; + private Status status; + + public Table(String dbName, String tableName) { + this.dbName = dbName; + this.tableName = tableName; + this.status = Status.INITIAL; + } + + public Throwable getError() { + return error; + } + + public void setError(Throwable error) { + this.error = error; + } + + public Status getStatus() { + return status; + } + + public void setStatus(Status status) { + this.status = status; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + Table table = (Table) o; + return tableName.equals(table.tableName) && dbName.equals(table.dbName); + } + + @Override + public int hashCode() { + return Objects.hash(tableName, dbName); + } + + public enum Status { + /** + * + */ + INITIAL(0), + + /** + * + */ + RUNNING(1), + + /** + * + */ + FAILURE(2), + + /** + * + */ + SUCCESS(3); + + private int code; + + /** + * @param code + */ + private Status(int code) { + this.code = code; + } + + public int getCode() { + return code; + } + + public void setCode(int code) { + this.code = code; + } + } +} \ No newline at end of file diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/common/TableCache.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/common/TableCache.java new file mode 100644 index 0000000000..c59dca0653 --- /dev/null +++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/common/TableCache.java @@ -0,0 +1,21 @@ +package com.alibaba.datax.plugin.writer.oceanbasev10writer.common; + +import java.util.concurrent.ConcurrentHashMap; + +public class TableCache { + private static final TableCache INSTANCE = new TableCache(); + private final ConcurrentHashMap TABLE_CACHE; + + private TableCache() { + TABLE_CACHE = new ConcurrentHashMap<>(); + } + + public static TableCache getInstance() { + return INSTANCE; + } + + public Table getTable(String dbName, String tableName) { + String fullTableName = String.join("-", dbName, tableName); + return TABLE_CACHE.computeIfAbsent(fullTableName, (k) -> new Table(dbName, tableName)); + } +} \ No newline at end of file diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/directPath/AbstractRestrictedConnection.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/directPath/AbstractRestrictedConnection.java new file mode 100644 index 0000000000..717e2e36fa --- /dev/null +++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/directPath/AbstractRestrictedConnection.java @@ -0,0 +1,257 @@ +package com.alibaba.datax.plugin.writer.oceanbasev10writer.directPath; + +import java.sql.Array; +import java.sql.Blob; +import java.sql.CallableStatement; +import java.sql.Clob; +import java.sql.DatabaseMetaData; +import java.sql.NClob; +import java.sql.PreparedStatement; +import java.sql.SQLClientInfoException; +import java.sql.SQLException; +import java.sql.SQLWarning; +import java.sql.SQLXML; +import java.sql.Savepoint; +import java.sql.Statement; +import java.sql.Struct; +import java.util.Map; +import java.util.Properties; +import java.util.concurrent.Executor; + +public abstract class AbstractRestrictedConnection implements java.sql.Connection { + + @Override + public CallableStatement prepareCall(String sql) throws SQLException { + throw new UnsupportedOperationException("prepareCall(String) is unsupported"); + } + + @Override + public String nativeSQL(String sql) throws SQLException { + throw new UnsupportedOperationException("nativeSQL(String) is unsupported"); + } + + @Override + public void setAutoCommit(boolean autoCommit) throws SQLException { + throw new UnsupportedOperationException("setAutoCommit(boolean) is unsupported"); + } + + @Override + public boolean getAutoCommit() throws SQLException { + throw new UnsupportedOperationException("getAutoCommit is unsupported"); + } + + @Override + public void abort(Executor executor) throws SQLException { + throw new UnsupportedOperationException("abort(Executor) is unsupported"); + } + + @Override + public void setNetworkTimeout(Executor executor, int milliseconds) throws SQLException { + throw new UnsupportedOperationException("setNetworkTimeout(Executor, int) is unsupported"); + } + + @Override + public int getNetworkTimeout() throws SQLException { + throw new UnsupportedOperationException("getNetworkTimeout is unsupported"); + } + + @Override + public DatabaseMetaData getMetaData() throws SQLException { + throw new UnsupportedOperationException("getMetaData is unsupported"); + } + + @Override + public void setReadOnly(boolean readOnly) throws SQLException { + throw new UnsupportedOperationException("setReadOnly(boolean) is unsupported"); + } + + @Override + public boolean isReadOnly() throws SQLException { + throw new UnsupportedOperationException("isReadOnly is unsupported"); + } + + @Override + public void setCatalog(String catalog) throws SQLException { + throw new UnsupportedOperationException("setCatalog(String) is unsupported"); + } + + @Override + public String getCatalog() throws SQLException { + throw new UnsupportedOperationException("getCatalog is unsupported"); + } + + @Override + public void setTransactionIsolation(int level) throws SQLException { + throw new UnsupportedOperationException("setTransactionIsolation(int) is unsupported"); + } + + @Override + public int getTransactionIsolation() throws SQLException { + throw new UnsupportedOperationException("getTransactionIsolation is unsupported"); + } + + @Override + public SQLWarning getWarnings() throws SQLException { + throw new UnsupportedOperationException("getWarnings is unsupported"); + } + + @Override + public void clearWarnings() throws SQLException { + throw new UnsupportedOperationException("clearWarnings is unsupported"); + } + + @Override + public Statement createStatement(int resultSetType, int resultSetConcurrency) throws SQLException { + throw new UnsupportedOperationException("createStatement(int, int) is unsupported"); + } + + @Override + public PreparedStatement prepareStatement(String sql, int resultSetType, int resultSetConcurrency) throws SQLException { + throw new UnsupportedOperationException("prepareStatement(String, int, int) is unsupported"); + } + + @Override + public CallableStatement prepareCall(String sql, int resultSetType, int resultSetConcurrency) throws SQLException { + throw new UnsupportedOperationException("prepareCall(String, int, int) is unsupported"); + } + + @Override + public Map> getTypeMap() throws SQLException { + throw new UnsupportedOperationException("getTypeMap is unsupported"); + } + + @Override + public void setTypeMap(Map> map) throws SQLException { + throw new UnsupportedOperationException("setTypeMap(Map>) is unsupported"); + } + + @Override + public void setHoldability(int holdability) throws SQLException { + throw new UnsupportedOperationException("setHoldability is unsupported"); + } + + @Override + public int getHoldability() throws SQLException { + throw new UnsupportedOperationException("getHoldability is unsupported"); + } + + @Override + public Savepoint setSavepoint() throws SQLException { + throw new UnsupportedOperationException("setSavepoint is unsupported"); + } + + @Override + public Savepoint setSavepoint(String name) throws SQLException { + throw new UnsupportedOperationException("setSavepoint(String) is unsupported"); + } + + @Override + public void rollback(Savepoint savepoint) throws SQLException { + throw new UnsupportedOperationException("rollback(Savepoint) is unsupported"); + } + + @Override + public void releaseSavepoint(Savepoint savepoint) throws SQLException { + throw new UnsupportedOperationException("releaseSavepoint(Savepoint) is unsupported"); + } + + @Override + public Statement createStatement(int resultSetType, int resultSetConcurrency, int resultSetHoldability) throws SQLException { + throw new UnsupportedOperationException("createStatement(int, int, int) is unsupported"); + } + + @Override + public PreparedStatement prepareStatement(String sql, int resultSetType, int resultSetConcurrency, int resultSetHoldability) throws SQLException { + throw new UnsupportedOperationException("prepareStatement(String, int, int, int) is unsupported"); + } + + @Override + public CallableStatement prepareCall(String sql, int resultSetType, int resultSetConcurrency, int resultSetHoldability) throws SQLException { + throw new UnsupportedOperationException("prepareCall(String, int, int, int) is unsupported"); + } + + @Override + public PreparedStatement prepareStatement(String sql, int autoGeneratedKeys) throws SQLException { + throw new UnsupportedOperationException("prepareStatement(String, int) is unsupported"); + } + + @Override + public PreparedStatement prepareStatement(String sql, int[] columnIndexes) throws SQLException { + throw new UnsupportedOperationException("prepareStatement(String, int[]) is unsupported"); + } + + @Override + public PreparedStatement prepareStatement(String sql, String[] columnNames) throws SQLException { + throw new UnsupportedOperationException("prepareStatement(String, String[]) is unsupported"); + } + + @Override + public Clob createClob() throws SQLException { + throw new UnsupportedOperationException("createClob is unsupported"); + } + + @Override + public Blob createBlob() throws SQLException { + throw new UnsupportedOperationException("createBlob is unsupported"); + } + + @Override + public NClob createNClob() throws SQLException { + throw new UnsupportedOperationException("createNClob is unsupported"); + } + + @Override + public SQLXML createSQLXML() throws SQLException { + throw new UnsupportedOperationException("createSQLXML is unsupported"); + } + + @Override + public boolean isValid(int timeout) throws SQLException { + throw new UnsupportedOperationException("isValid(int) is unsupported"); + } + + @Override + public void setClientInfo(String name, String value) throws SQLClientInfoException { + throw new UnsupportedOperationException("setClientInfo(String, String) is unsupported"); + } + + @Override + public void setClientInfo(Properties properties) throws SQLClientInfoException { + throw new UnsupportedOperationException("setClientInfo(Properties) is unsupported"); + } + + @Override + public String getClientInfo(String name) throws SQLException { + throw new UnsupportedOperationException("getClientInfo(String) is unsupported"); + } + + @Override + public Properties getClientInfo() throws SQLException { + throw new UnsupportedOperationException("getClientInfo is unsupported"); + } + + @Override + public Array createArrayOf(String typeName, Object[] elements) throws SQLException { + throw new UnsupportedOperationException("createArrayOf(String, Object[]) is unsupported"); + } + + @Override + public Struct createStruct(String typeName, Object[] attributes) throws SQLException { + throw new UnsupportedOperationException("createStruct(String, Object[]) is unsupported"); + } + + @Override + public void setSchema(String schema) throws SQLException { + throw new UnsupportedOperationException("setSchema(String) is unsupported"); + } + + @Override + public T unwrap(Class iface) throws SQLException { + throw new UnsupportedOperationException("unwrap(Class) is unsupported"); + } + + @Override + public boolean isWrapperFor(Class> iface) throws SQLException { + throw new UnsupportedOperationException("isWrapperFor(Class>) is unsupported"); + } +} \ No newline at end of file diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/directPath/AbstractRestrictedPreparedStatement.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/directPath/AbstractRestrictedPreparedStatement.java new file mode 100644 index 0000000000..47b2c98ba1 --- /dev/null +++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/directPath/AbstractRestrictedPreparedStatement.java @@ -0,0 +1,663 @@ +package com.alibaba.datax.plugin.writer.oceanbasev10writer.directPath; + +import java.io.InputStream; +import java.io.Reader; +import java.math.BigDecimal; +import java.math.BigInteger; +import java.net.URL; +import java.nio.charset.Charset; +import java.sql.Array; +import java.sql.Blob; +import java.sql.Clob; +import java.sql.Date; +import java.sql.NClob; +import java.sql.ParameterMetaData; +import java.sql.Ref; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.RowId; +import java.sql.SQLException; +import java.sql.SQLWarning; +import java.sql.SQLXML; +import java.sql.Time; +import java.sql.Timestamp; +import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.time.OffsetDateTime; +import java.time.OffsetTime; +import java.time.ZonedDateTime; +import java.util.Calendar; +import java.util.List; + +import com.alipay.oceanbase.rpc.protocol.payload.impl.ObObj; +import com.alipay.oceanbase.rpc.protocol.payload.impl.ObObjType; +import com.alipay.oceanbase.rpc.util.ObVString; +import org.apache.commons.io.IOUtils; + +public abstract class AbstractRestrictedPreparedStatement implements java.sql.PreparedStatement { + + private boolean closed; + + @Override + public void setNull(int parameterIndex, int sqlType) throws SQLException { + this.setParameter(parameterIndex, createObObj(null)); + } + + @Override + public void setNull(int parameterIndex, int sqlType, String typeName) throws SQLException { + throw new UnsupportedOperationException("setNull(int, int, String) is unsupported"); + } + + @Override + public void setBoolean(int parameterIndex, boolean x) throws SQLException { + this.setParameter(parameterIndex, createObObj(x)); + } + + @Override + public void setByte(int parameterIndex, byte x) throws SQLException { + this.setParameter(parameterIndex, createObObj(x)); + } + + @Override + public void setShort(int parameterIndex, short x) throws SQLException { + this.setParameter(parameterIndex, createObObj(x)); + } + + @Override + public void setInt(int parameterIndex, int x) throws SQLException { + this.setParameter(parameterIndex, createObObj(x)); + } + + @Override + public void setLong(int parameterIndex, long x) throws SQLException { + this.setParameter(parameterIndex, createObObj(x)); + } + + @Override + public void setFloat(int parameterIndex, float x) throws SQLException { + this.setParameter(parameterIndex, createObObj(x)); + } + + @Override + public void setDouble(int parameterIndex, double x) throws SQLException { + this.setParameter(parameterIndex, createObObj(x)); + } + + @Override + public void setBigDecimal(int parameterIndex, BigDecimal x) throws SQLException { + this.setParameter(parameterIndex, createObObj(x)); + } + + @Override + public void setString(int parameterIndex, String x) throws SQLException { + this.setParameter(parameterIndex, createObObj(x)); + } + + @Override + public void setBytes(int parameterIndex, byte[] x) throws SQLException { + this.setParameter(parameterIndex, createObObj(x)); + } + + @Override + public void setDate(int parameterIndex, Date x) throws SQLException { + this.setParameter(parameterIndex, createObObj(x)); + } + + @Override + public void setDate(int parameterIndex, Date x, Calendar cal) throws SQLException { + throw new UnsupportedOperationException("setDate(int, Date, Calendar) is unsupported"); + } + + @Override + public void setTime(int parameterIndex, Time x) throws SQLException { + this.setParameter(parameterIndex, createObObj(x)); + } + + @Override + public void setTime(int parameterIndex, Time x, Calendar cal) throws SQLException { + throw new UnsupportedOperationException("setTime(int, Time, Calendar) is unsupported"); + } + + @Override + public void setTimestamp(int parameterIndex, Timestamp x) throws SQLException { + this.setParameter(parameterIndex, createObObj(x)); + } + + @Override + public void setTimestamp(int parameterIndex, Timestamp x, Calendar cal) throws SQLException { + throw new UnsupportedOperationException("setTimestamp(int, Timestamp, Calendar) is unsupported"); + } + + @Override + public void setObject(int parameterIndex, Object x) throws SQLException { + this.setParameter(parameterIndex, createObObj(x)); + } + + @Override + public void setObject(int parameterIndex, Object x, int targetSqlType) throws SQLException { + throw new UnsupportedOperationException("setObject(int, Object, int) is unsupported"); + } + + @Override + public void setObject(int parameterIndex, Object x, int targetSqlType, int scaleOrLength) throws SQLException { + throw new UnsupportedOperationException("setObject(int, Object, int, int) is unsupported"); + } + + @Override + public void setRef(int parameterIndex, Ref x) throws SQLException { + throw new UnsupportedOperationException("setRef(int, Ref) is unsupported"); + } + + @Override + public void setArray(int parameterIndex, Array x) throws SQLException { + throw new UnsupportedOperationException("setArray(int, Array) is unsupported"); + } + + @Override + public void setSQLXML(int parameterIndex, SQLXML xmlObject) throws SQLException { + throw new UnsupportedOperationException("setSQLXML(int, SQLXML) is unsupported"); + } + + @Override + public void setURL(int parameterIndex, URL x) throws SQLException { + // if (x == null) { + // this.setParameter(parameterIndex, createObObj(x)); + // } else { + // // TODO If need BackslashEscapes and character encoding ? + // this.setParameter(parameterIndex, createObObj(x.toString())); + // } + throw new UnsupportedOperationException("setURL(int, URL) is unsupported"); + } + + @Override + public void setRowId(int parameterIndex, RowId x) throws SQLException { + throw new UnsupportedOperationException("setRowId(int, RowId) is unsupported"); + } + + @Override + public void setNString(int parameterIndex, String value) throws SQLException { + this.setParameter(parameterIndex, createObObj(value)); + } + + @Override + public void setBlob(int parameterIndex, Blob x) throws SQLException { + this.setParameter(parameterIndex, createObObj(x)); + } + + @Override + public void setBlob(int parameterIndex, InputStream x) throws SQLException { + this.setParameter(parameterIndex, createObObj(x)); + } + + @Override + public void setBlob(int parameterIndex, InputStream x, long length) throws SQLException { + throw new UnsupportedOperationException("setBlob(int, InputStream, length) is unsupported"); + } + + @Override + public void setClob(int parameterIndex, Clob x) throws SQLException { + this.setParameter(parameterIndex, createObObj(x)); + } + + @Override + public void setClob(int parameterIndex, Reader x) throws SQLException { + this.setCharacterStream(parameterIndex, x); + } + + @Override + public void setClob(int parameterIndex, Reader x, long length) throws SQLException { + throw new UnsupportedOperationException("setClob(int, Reader, length) is unsupported"); + } + + @Override + public void setNClob(int parameterIndex, NClob x) throws SQLException { + this.setClob(parameterIndex, (Clob) (x)); + } + + @Override + public void setNClob(int parameterIndex, Reader x) throws SQLException { + this.setClob(parameterIndex, x); + } + + @Override + public void setNClob(int parameterIndex, Reader x, long length) throws SQLException { + throw new UnsupportedOperationException("setNClob(int, Reader, length) is unsupported"); + } + + @Override + public void setAsciiStream(int parameterIndex, InputStream x) throws SQLException { + this.setParameter(parameterIndex, createObObj(x)); + } + + @Deprecated + @Override + public void setUnicodeStream(int parameterIndex, InputStream x, int length) throws SQLException { + this.setParameter(parameterIndex, createObObj(x)); + } + + @Override + public void setAsciiStream(int parameterIndex, InputStream x, int length) throws SQLException { + throw new UnsupportedOperationException("setAsciiStream(int, InputStream, length) is unsupported"); + } + + @Override + public void setAsciiStream(int parameterIndex, InputStream x, long length) throws SQLException { + throw new UnsupportedOperationException("setAsciiStream(int, InputStream, length) is unsupported"); + } + + @Override + public void setBinaryStream(int parameterIndex, InputStream x) throws SQLException { + this.setParameter(parameterIndex, createObObj(x)); + } + + @Override + public void setBinaryStream(int parameterIndex, InputStream x, int length) throws SQLException { + throw new UnsupportedOperationException("setBinaryStream(int, InputStream, length) is unsupported"); + } + + @Override + public void setBinaryStream(int parameterIndex, InputStream x, long length) throws SQLException { + throw new UnsupportedOperationException("setBinaryStream(int, InputStream, length) is unsupported"); + } + + @Override + public void setCharacterStream(int parameterIndex, Reader x) throws SQLException { + this.setParameter(parameterIndex, createObObj(x)); + } + + @Override + public void setCharacterStream(int parameterIndex, Reader x, int length) throws SQLException { + throw new UnsupportedOperationException("setCharacterStream(int, InputStream, length) is unsupported"); + } + + @Override + public void setCharacterStream(int parameterIndex, Reader x, long length) throws SQLException { + throw new UnsupportedOperationException("setCharacterStream(int, InputStream, length) is unsupported"); + } + + @Override + public void setNCharacterStream(int parameterIndex, Reader x) throws SQLException { + this.setParameter(parameterIndex, createObObj(x)); + } + + @Override + public void setNCharacterStream(int parameterIndex, Reader x, long length) throws SQLException { + throw new UnsupportedOperationException("setNCharacterStream(int, InputStream, length) is unsupported"); + } + + /** + * @return boolean + */ + protected abstract boolean isOracleMode(); + + /** + * Set parameter to the target position. + * + * @param parameterIndex + * @param obObj + * @throws SQLException + */ + protected abstract void setParameter(int parameterIndex, ObObj obObj) throws SQLException; + + /** + * Close the current prepared statement. + * + * @throws SQLException + */ + @Override + public void close() throws SQLException { + this.closed = true; + } + + /** + * Return whether the current prepared statement is closed? + * + * @return boolean + * @throws SQLException + */ + @Override + public boolean isClosed() throws SQLException { + return this.closed; + } + + /** + * Create a {@link ObObj } array with input values. + * + * @param values Original row value + * @return ObObj[] + */ + public ObObj[] createObObjArray(Object[] values) { + if (values == null) { + return null; + } + ObObj[] array = new ObObj[values.length]; + for (int i = 0; i < values.length; i++) { + array[i] = createObObj(values[i]); + } + return array; + } + + /** + * Create a {@link ObObj } array with input values. + * + * @param values Original row value + * @return ObObj[] + */ + public ObObj[] createObObjArray(List values) { + if (values == null) { + return null; + } + ObObj[] array = new ObObj[values.size()]; + for (int i = 0; i < values.size(); i++) { + array[i] = createObObj(values.get(i)); + } + return array; + } + + /** + * Create a {@link ObObj } instance. + * + * @param value Original column value + * @return ObObj + */ + public ObObj createObObj(Object value) { + try { + // Only used for strongly typed declared variables + Object convertedValue = value == null ? null : convertValue(value); + return new ObObj(ObObjType.defaultObjMeta(convertedValue), convertedValue); + } catch (Exception ex) { + throw new IllegalArgumentException(ex); + } + } + + /** + * Some values with data type is unsupported by ObObjType#valueOfType. + * We should convert the input value to supported value data type. + * + * @param value + * @return Object + * @throws Exception + */ + public static Object convertValue(Object value) throws Exception { + if (value instanceof BigDecimal) { + return value.toString(); + } else if (value instanceof BigInteger) { + return value.toString(); + } else if (value instanceof Instant) { + return Timestamp.from(((Instant) value)); + } else if (value instanceof LocalDate) { + // Warn: java.sql.Date.valueOf() is deprecated. As local zone is used. + return Date.valueOf(((LocalDate) value)); + } else if (value instanceof LocalTime) { + // Warn: java.sql.Time.valueOf() is deprecated. + Time t = Time.valueOf((LocalTime) value); + return new Timestamp(t.getTime()); + } else if (value instanceof LocalDateTime) { + return Timestamp.valueOf(((LocalDateTime) value)); + } else if (value instanceof OffsetDateTime) { + return Timestamp.from(((OffsetDateTime) value).toInstant()); + } else if (value instanceof Time) { + return new Timestamp(((Time) value).getTime()); + } else if (value instanceof ZonedDateTime) { + // Note: Be care of time zone!!! + return Timestamp.from(((ZonedDateTime) value).toInstant()); + } else if (value instanceof OffsetTime) { + LocalTime lt = ((OffsetTime) value).toLocalTime(); + // Warn: java.sql.Time.valueOf() is deprecated. + return new Timestamp(Time.valueOf(lt).getTime()); + } else if (value instanceof InputStream) { + try (InputStream is = ((InputStream) value)) { + // Note: Be care of character set!!! + return new ObVString(IOUtils.toString(is, Charset.defaultCharset())); + } + } else if (value instanceof Blob) { + Blob b = (Blob) value; + try (InputStream is = b.getBinaryStream()) { + if (is == null) { + return null; + } + // Note: Be care of character set!!! + return new ObVString(IOUtils.toString(is, Charset.defaultCharset())); + } finally { + b.free(); + } + } else if (value instanceof Reader) { + try (Reader r = ((Reader) value)) { + return IOUtils.toString(r); + } + } else if (value instanceof Clob) { + Clob c = (Clob) value; + try (Reader r = c.getCharacterStream()) { + return r == null ? null : IOUtils.toString(r); + } finally { + c.free(); + } + } else { + return value; + } + } + + // *********************************************************************************** // + + @Override + public boolean getMoreResults(int current) throws SQLException { + throw new UnsupportedOperationException("getMoreResults(int) is unsupported"); + } + + @Override + public ResultSet getGeneratedKeys() throws SQLException { + throw new UnsupportedOperationException("getGeneratedKeys is unsupported"); + } + + @Override + public int executeUpdate(String sql, int autoGeneratedKeys) throws SQLException { + throw new UnsupportedOperationException("executeUpdate(String, int) is unsupported"); + } + + @Override + public int executeUpdate(String sql, int[] columnIndexes) throws SQLException { + throw new UnsupportedOperationException("executeUpdate(String, int[]) is unsupported"); + } + + @Override + public int executeUpdate(String sql, String[] columnNames) throws SQLException { + throw new UnsupportedOperationException("executeUpdate(String, String[]) is unsupported"); + } + + @Override + public boolean execute(String sql, int autoGeneratedKeys) throws SQLException { + throw new UnsupportedOperationException("execute(String, int) is unsupported"); + } + + @Override + public boolean execute(String sql, int[] columnIndexes) throws SQLException { + throw new UnsupportedOperationException("execute(String, int[]) is unsupported"); + } + + @Override + public boolean execute(String sql, String[] columnNames) throws SQLException { + throw new UnsupportedOperationException("execute(String, String[]) is unsupported"); + } + + @Override + public int getResultSetHoldability() throws SQLException { + throw new UnsupportedOperationException("getResultSetHoldability is unsupported"); + } + + @Override + public void setPoolable(boolean poolable) throws SQLException { + throw new UnsupportedOperationException("setPoolable(boolean) is unsupported"); + } + + @Override + public boolean isPoolable() throws SQLException { + throw new UnsupportedOperationException("isPoolable is unsupported"); + } + + @Override + public void closeOnCompletion() throws SQLException { + throw new UnsupportedOperationException("closeOnCompletion is unsupported"); + } + + @Override + public boolean isCloseOnCompletion() throws SQLException { + throw new UnsupportedOperationException("isCloseOnCompletion is unsupported"); + } + + @Override + public ResultSet executeQuery(String sql) throws SQLException { + throw new UnsupportedOperationException("executeQuery(String) is unsupported"); + } + + @Override + public int executeUpdate(String sql) throws SQLException { + throw new UnsupportedOperationException("executeUpdate(String) is unsupported"); + } + + @Override + public int getMaxFieldSize() throws SQLException { + throw new UnsupportedOperationException("getMaxFieldSize is unsupported"); + } + + @Override + public void setMaxFieldSize(int max) throws SQLException { + throw new UnsupportedOperationException("setMaxFieldSize(int) is unsupported"); + } + + @Override + public int getMaxRows() throws SQLException { + throw new UnsupportedOperationException("getMaxRows is unsupported"); + } + + @Override + public void setMaxRows(int max) throws SQLException { + throw new UnsupportedOperationException("setMaxRows(int) is unsupported"); + } + + @Override + public void setEscapeProcessing(boolean enable) throws SQLException { + throw new UnsupportedOperationException("setEscapeProcessing(boolean) is unsupported"); + } + + @Override + public int getQueryTimeout() throws SQLException { + throw new UnsupportedOperationException("getQueryTimeout is unsupported"); + } + + @Override + public void setQueryTimeout(int seconds) throws SQLException { + throw new UnsupportedOperationException("setQueryTimeout(int) is unsupported"); + } + + @Override + public void cancel() throws SQLException { + throw new UnsupportedOperationException("cancel is unsupported"); + } + + @Override + public SQLWarning getWarnings() throws SQLException { + throw new UnsupportedOperationException("getWarnings is unsupported"); + } + + @Override + public void clearWarnings() throws SQLException { + throw new UnsupportedOperationException("clearWarnings is unsupported"); + } + + @Override + public void setCursorName(String name) throws SQLException { + throw new UnsupportedOperationException("setCursorName(String) is unsupported"); + } + + @Override + public boolean execute(String sql) throws SQLException { + throw new UnsupportedOperationException("execute(String) is unsupported"); + } + + @Override + public ResultSet getResultSet() throws SQLException { + throw new UnsupportedOperationException("getResultSet is unsupported"); + } + + @Override + public int getUpdateCount() throws SQLException { + throw new UnsupportedOperationException("getUpdateCount is unsupported"); + } + + @Override + public boolean getMoreResults() throws SQLException { + throw new UnsupportedOperationException("getMoreResults is unsupported"); + } + + @Override + public void setFetchDirection(int direction) throws SQLException { + throw new UnsupportedOperationException("setFetchDirection(int) is unsupported"); + } + + @Override + public int getFetchDirection() throws SQLException { + throw new UnsupportedOperationException("getFetchDirection is unsupported"); + } + + @Override + public void setFetchSize(int rows) throws SQLException { + throw new UnsupportedOperationException("setFetchSize(int) is unsupported"); + } + + @Override + public int getFetchSize() throws SQLException { + throw new UnsupportedOperationException("getFetchSize is unsupported"); + } + + @Override + public int getResultSetConcurrency() throws SQLException { + throw new UnsupportedOperationException("getResultSetConcurrency is unsupported"); + } + + @Override + public int getResultSetType() throws SQLException { + throw new UnsupportedOperationException("getResultSetType is unsupported"); + } + + @Override + public void addBatch(String sql) throws SQLException { + throw new UnsupportedOperationException("addBatch(String) is unsupported"); + } + + @Override + public ResultSet executeQuery() throws SQLException { + throw new UnsupportedOperationException("executeQuery is unsupported"); + } + + @Override + public int executeUpdate() throws SQLException { + throw new UnsupportedOperationException("executeUpdate is unsupported"); + } + + @Override + public boolean execute() throws SQLException { + throw new UnsupportedOperationException("execute is unsupported"); + } + + @Override + public ParameterMetaData getParameterMetaData() throws SQLException { + throw new UnsupportedOperationException("getParameterMetaData is unsupported"); + } + + @Override + public ResultSetMetaData getMetaData() throws SQLException { + throw new UnsupportedOperationException("getMetaData is unsupported"); + } + + @Override + public T unwrap(Class iface) throws SQLException { + throw new UnsupportedOperationException("isWrapperFor(Class) is unsupported"); + } + + @Override + public boolean isWrapperFor(Class> iface) throws SQLException { + throw new UnsupportedOperationException("isWrapperFor(Class>) is unsupported"); + } +} diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/directPath/DirectLoaderBuilder.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/directPath/DirectLoaderBuilder.java new file mode 100644 index 0000000000..22664947b0 --- /dev/null +++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/directPath/DirectLoaderBuilder.java @@ -0,0 +1,170 @@ +/* + * Copyright 2024 OceanBase. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.datax.plugin.writer.oceanbasev10writer.directPath; + +import java.io.Serializable; + +import com.alipay.oceanbase.rpc.direct_load.ObDirectLoadConnection; +import com.alipay.oceanbase.rpc.direct_load.ObDirectLoadManager; +import com.alipay.oceanbase.rpc.direct_load.ObDirectLoadStatement; +import com.alipay.oceanbase.rpc.direct_load.exception.ObDirectLoadException; +import com.alipay.oceanbase.rpc.exception.ObTableException; +import com.alipay.oceanbase.rpc.protocol.payload.impl.ObLoadDupActionType; +import org.apache.commons.lang.ObjectUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The builder for {@link ObTableDirectLoad}. + */ +public class DirectLoaderBuilder implements Serializable { + + private static final Logger log = LoggerFactory.getLogger(DirectLoaderBuilder.class); + private String host; + private int port; + + private String user; + private String tenant; + private String password; + + private String schema; + private String table; + + /** + * Server-side parallelism. + */ + private int parallel; + + private long maxErrorCount; + + private ObLoadDupActionType duplicateKeyAction; + + /** + * The overall timeout of the direct load task + */ + private Long timeout; + + private Long heartBeatTimeout; + + private Long heartBeatInterval; + + public DirectLoaderBuilder host(String host) { + this.host = host; + return this; + } + + public DirectLoaderBuilder port(int port) { + this.port = port; + return this; + } + + public DirectLoaderBuilder user(String user) { + //1.4.0的obkv版本只需要用户名称,不能带租户和集群信息 + int indexOf = user.indexOf("@"); + this.user = user; + if (indexOf > 0) { + this.user = user.substring(0, indexOf); + } + return this; + } + + public DirectLoaderBuilder tenant(String tenant) { + this.tenant = tenant; + return this; + } + + public DirectLoaderBuilder password(String password) { + this.password = password; + return this; + } + + public DirectLoaderBuilder schema(String schema) { + this.schema = schema; + return this; + } + + public DirectLoaderBuilder table(String table) { + this.table = table; + return this; + } + + public DirectLoaderBuilder parallel(int parallel) { + this.parallel = parallel; + return this; + } + + public DirectLoaderBuilder maxErrorCount(long maxErrorCount) { + this.maxErrorCount = maxErrorCount; + return this; + } + + public DirectLoaderBuilder duplicateKeyAction(ObLoadDupActionType duplicateKeyAction) { + this.duplicateKeyAction = duplicateKeyAction; + return this; + } + + public DirectLoaderBuilder timeout(long timeout) { + this.timeout = timeout; + return this; + } + + public DirectLoaderBuilder heartBeatTimeout(Long heartBeatTimeout) { + this.heartBeatTimeout = heartBeatTimeout; + return this; + } + + public DirectLoaderBuilder heartBeatInterval(Long heartBeatInterval) { + this.heartBeatInterval = heartBeatInterval; + return this; + } + + public ObTableDirectLoad build() { + try { + ObDirectLoadConnection obDirectLoadConnection = buildConnection(parallel); + ObDirectLoadStatement obDirectLoadStatement = buildStatement(obDirectLoadConnection); + return new ObTableDirectLoad(schema, table, obDirectLoadStatement, obDirectLoadConnection); + } catch (ObDirectLoadException e) { + throw new ObTableException(e.getMessage(), e); + } + } + + private ObDirectLoadConnection buildConnection(int writeThreadNum) throws ObDirectLoadException { + if (heartBeatTimeout == null || heartBeatInterval == null) { + throw new IllegalArgumentException("heartBeatTimeout and heartBeatInterval must not be null"); + } + ObDirectLoadConnection build = ObDirectLoadManager.getConnectionBuilder() + .setServerInfo(host, port) + .setLoginInfo(tenant, user, password, schema) + .setHeartBeatInfo(heartBeatTimeout, heartBeatInterval) + .enableParallelWrite(writeThreadNum) + .build(); + log.info("ObDirectLoadConnection value is:{}", ObjectUtils.toString(build)); + return build; + } + + private ObDirectLoadStatement buildStatement(ObDirectLoadConnection connection) throws ObDirectLoadException { + ObDirectLoadStatement build = connection.getStatementBuilder() + .setTableName(table) + .setParallel(parallel) + .setQueryTimeout(timeout) + .setDupAction(duplicateKeyAction) + .setMaxErrorRowCount(maxErrorCount) + .build(); + log.info("ObDirectLoadStatement value is:{}", ObjectUtils.toString(build)); + return build; + } +} diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/directPath/DirectPathConnection.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/directPath/DirectPathConnection.java new file mode 100644 index 0000000000..ce7ef7e3c2 --- /dev/null +++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/directPath/DirectPathConnection.java @@ -0,0 +1,398 @@ +package com.alibaba.datax.plugin.writer.oceanbasev10writer.directPath; + +import java.sql.SQLException; +import java.util.Arrays; + +import com.alibaba.datax.common.util.Configuration; + +import com.alipay.oceanbase.rpc.direct_load.ObDirectLoadBucket; +import com.alipay.oceanbase.rpc.protocol.payload.impl.ObLoadDupActionType; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static com.google.common.base.Preconditions.checkArgument; + +public class DirectPathConnection extends AbstractRestrictedConnection { + + private static final int OB_DIRECT_PATH_DEFAULT_BLOCKS = 1; + private static final long OB_DIRECT_PATH_HEART_BEAT_TIMEOUT = 60000; + private static final long OB_DIRECT_PATH_HEART_BEAT_INTERVAL = 10000; + private static final int DEFAULT_BUFFERSIZE = 1048576; + private final Configuration configuration; + + private State state; + private int commiters; + + private final int blocks; + private final ObTableDirectLoad load; + private final Object lock = new Object(); + + private static final Logger log = LoggerFactory.getLogger(DirectPathConnection.class); + + /** + * Construct a new instance. + * + * @param load + * @param blocks + */ + private DirectPathConnection(ObTableDirectLoad load, int blocks, Configuration configuration) { + this.configuration = configuration; + this.load = load; + this.blocks = blocks; + } + + /** + * Begin a new {@link DirectPathConnection } + * + * @return DirectPathConnection + * @throws SQLException + */ + public DirectPathConnection begin() throws SQLException { + synchronized (lock) { + if (state == null || state == State.CLOSED) { + try { + this.load.begin(); + this.state = State.BEGIN; + } catch (Exception ex) { + throw new SQLException(ex); + } + } else { + throw new IllegalStateException("Begin transaction failed as connection state is already BEGIN"); + } + } + return this; + } + + /** + * Commit buffered data with MAXIMUM timeout. + * + * @throws SQLException + */ + @Override + public void commit() throws SQLException { + synchronized (lock) { + if (state == State.BEGIN) { + this.commiters++; + if (commiters == blocks) { + try { + this.load.commit(); + state = State.FINISHED; + } catch (Exception ex) { + throw new SQLException(ex); + } + } else if (commiters > blocks) { + throw new IllegalStateException("Your commit have exceed the limit. (" + commiters + ">" + blocks + ")"); + } + } else { + throw new IllegalStateException("Commit transaction failed as connection state is not BEGIN"); + } + } + } + + /** + * Rollback if error occurred. + * + * @throws SQLException + */ + @Override + public void rollback() throws SQLException { + synchronized (lock) { + if (state == State.BEGIN) { + try { + //obkv-table-client-2.1.0的close方法包含回滚逻辑 + this.load.close(); + } catch (Exception ex) { + throw new SQLException(ex); + } + } else { + throw new IllegalStateException("Rollback transaction failed as connection state is not BEGIN"); + } + } + } + + /** + * Close this connection. + */ + @Override + public void close() { + synchronized (lock) { + // Closed only if state is BEGIN + this.load.close(); + this.state = State.CLOSED; + } + } + + /** + * @return DirectPathPreparedStatement + */ + @Override + public DirectPathPreparedStatement createStatement() throws SQLException { + return this.prepareStatement(null); + } + + /** + * A new batch need create a new {@link DirectPathPreparedStatement }. + * The {@link DirectPathPreparedStatement } can not be reuse, otherwise it may cause duplicate records. + * + * @return DirectPathStatement + */ + @Override + public DirectPathPreparedStatement prepareStatement(String sql) throws SQLException { + if (state == State.BEGIN) { + Integer bufferSize = configuration.getInt(DirectPathConstants.BUFFERSIZE, DEFAULT_BUFFERSIZE); + log.info("The current bufferSize size is{}", bufferSize); + return new DirectPathPreparedStatement(this, bufferSize); + } else { + throw new IllegalStateException("Create statement failed as connection state is not BEGIN"); + } + } + + /** + * Return the schema name of this connection instance. + * + * @return String + */ + @Override + public String getSchema() { + if (state == State.BEGIN) { + return this.load.getTable().getDatabase(); + } else { + throw new IllegalStateException("Get schema failed as connection state is not BEGIN"); + } + } + + /** + * Return the table name of this connection instance. + * + * @return String + */ + public String getTableName() { + if (state == State.BEGIN) { + return this.load.getTableName(); + } else { + throw new IllegalStateException("Get table failed as connection state is not BEGIN"); + } + } + + /** + * Return whether this connection is closed. + * + * @return boolean + */ + @Override + public boolean isClosed() { + synchronized (lock) { + return this.state == State.CLOSED; + } + } + + public boolean isFinished() { + return this.state.equals(State.FINISHED); + } + + /** + * Insert bucket data into buffer. + * + * @param bucket + * @return int[] + * @throws SQLException + */ + int[] insert(ObDirectLoadBucket bucket) throws SQLException { + try { + this.load.write(bucket); + int[] result = new int[bucket.getRowNum()]; + Arrays.fill(result, 1); + return result; + } catch (Exception ex) { + throw new SQLException(ex); + } + } + + /** + * Indicates the state of {@link DirectPathConnection } + */ + enum State { + + /** + * Begin transaction + */ + BEGIN, + /** + * Transaction is finished, ready to close. + */ + FINISHED, + + /** + * Transaction is closed. + */ + CLOSED; + } + + /** + * This builder used to build a new {@link DirectPathConnection } + */ + public static class Builder { + + private String host; + private int port; + + private String user; + private String tenant; + private String password; + + private String schema; + private String table; + + /** + * Client job count. + */ + private int blocks = OB_DIRECT_PATH_DEFAULT_BLOCKS; + + /** + * Server threads used to sort. + */ + private int parallel; + + private long maxErrorCount; + + private ObLoadDupActionType duplicateKeyAction; + + // Used for load data + private long serverTimeout; + + private Configuration configuration; + + public Builder host(String host) { + this.host = host; + return this; + } + + public Builder port(int port) { + this.port = port; + return this; + } + + public Builder user(String user) { + this.user = user; + return this; + } + + public Builder tenant(String tenant) { + this.tenant = tenant; + return this; + } + + public Builder password(String password) { + this.password = password; + return this; + } + + public Builder schema(String schema) { + this.schema = schema; + return this; + } + + public Builder table(String table) { + this.table = table; + return this; + } + + public Builder blocks(int blocks) { + this.blocks = blocks; + return this; + } + + public Builder parallel(int parallel) { + this.parallel = parallel; + return this; + } + + public Builder maxErrorCount(long maxErrorCount) { + this.maxErrorCount = maxErrorCount; + return this; + } + + public Builder duplicateKeyAction(ObLoadDupActionType duplicateKeyAction) { + this.duplicateKeyAction = duplicateKeyAction; + return this; + } + + public Builder serverTimeout(long serverTimeout) { + this.serverTimeout = serverTimeout; + return this; + } + + public Builder configuration(Configuration configuration) { + this.configuration = configuration; + return this; + } + + /** + * Build a new {@link DirectPathConnection } + * + * @return DirectPathConnection + */ + public DirectPathConnection build() throws Exception { + return createConnection(host, port, user, tenant, password, schema, table, // + blocks, parallel, maxErrorCount, duplicateKeyAction, serverTimeout, duplicateKeyAction).begin(); + } + + /** + * Create a new {@link DirectPathConnection } + * + * @param host + * @param port + * @param user + * @param tenant + * @param password + * @param schema + * @param table + * @param parallel + * @param maxErrorCount + * @param action + * @param serverTimeout + * @return DirectPathConnection + * @throws Exception + */ + DirectPathConnection createConnection(String host, int port, String user, String tenant, String password, String schema, String table, // + int blocks, int parallel, long maxErrorCount, ObLoadDupActionType action, long serverTimeout, ObLoadDupActionType obLoadDupActionType) throws Exception { + + checkArgument(StringUtils.isNotBlank(host), "Host is null.(host=%s)", host); + checkArgument((port > 0 && port < 65535), "Port is invalid.(port=%s)", port); + checkArgument(StringUtils.isNotBlank(user), "User Name is null.(user=%s)", user); + checkArgument(StringUtils.isNotBlank(tenant), "Tenant Name is null.(tenant=%s)", tenant); + checkArgument(StringUtils.isNotBlank(schema), "Schema Name is null.(schema=%s)", schema); + checkArgument(StringUtils.isNotBlank(table), "Table Name is null.(table=%s)", table); + + checkArgument(blocks > 0, "Client Blocks is invalid.(blocks=%s)", blocks); + checkArgument(parallel > 0, "Server Parallel is invalid.(parallel=%s)", parallel); + checkArgument(maxErrorCount > -1, "MaxErrorCount is invalid.(maxErrorCount=%s)", maxErrorCount); + checkArgument(action != null, "ObLoadDupActionType is null.(obLoadDupActionType=%s)", action); + checkArgument(serverTimeout > 0, "Server timeout is invalid.(timeout=%s)", serverTimeout); + Long heartBeatTimeout = 0L; + Long heartBeatInterval = 0L; + if (configuration != null) { + heartBeatTimeout = configuration.getLong(DirectPathConstants.HEART_BEAT_TIMEOUT, OB_DIRECT_PATH_HEART_BEAT_TIMEOUT); + heartBeatInterval = configuration.getLong(DirectPathConstants.HEART_BEAT_INTERVAL, OB_DIRECT_PATH_HEART_BEAT_INTERVAL); + parallel = configuration.getInt(DirectPathConstants.PARALLEL, parallel); + } + DirectLoaderBuilder builder = new DirectLoaderBuilder() + .host(host).port(port) + .user(user) + .tenant(tenant) + .password(password) + .schema(schema) + .table(table) + .parallel(parallel) + .maxErrorCount(maxErrorCount) + .timeout(serverTimeout) + .duplicateKeyAction(obLoadDupActionType) + .heartBeatTimeout(heartBeatTimeout) + .heartBeatInterval(heartBeatInterval); + ObTableDirectLoad directLoad = builder.build(); + + return new DirectPathConnection(directLoad, blocks, configuration); + } + } +} diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/directPath/DirectPathConstants.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/directPath/DirectPathConstants.java new file mode 100644 index 0000000000..d32c966c59 --- /dev/null +++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/directPath/DirectPathConstants.java @@ -0,0 +1,12 @@ +package com.alibaba.datax.plugin.writer.oceanbasev10writer.directPath; + +public class DirectPathConstants { + // 以下常量已在DirectPathConnection中被正确使用 + public static final String HEART_BEAT_TIMEOUT = "heartBeatTimeout"; + + public static final String HEART_BEAT_INTERVAL = "heartBeatInterval"; + + public static final String PARALLEL = "parallel"; + + public static final String BUFFERSIZE = "bufferSize"; +} diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/directPath/DirectPathPreparedStatement.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/directPath/DirectPathPreparedStatement.java new file mode 100644 index 0000000000..8f4aa7cf4c --- /dev/null +++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/directPath/DirectPathPreparedStatement.java @@ -0,0 +1,164 @@ +package com.alibaba.datax.plugin.writer.oceanbasev10writer.directPath; + +import java.sql.SQLException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.OptionalInt; + +import com.alipay.oceanbase.rpc.direct_load.ObDirectLoadBucket; +import com.alipay.oceanbase.rpc.direct_load.exception.ObDirectLoadException; +import com.alipay.oceanbase.rpc.protocol.payload.impl.ObObj; + +import static com.google.common.base.Preconditions.checkArgument; + +public class DirectPathPreparedStatement extends AbstractRestrictedPreparedStatement { + + private ObDirectLoadBucket bucket; + private final DirectPathConnection conn; + private final Map parameters; + private final Integer bufferSize; + private static final int DEFAULT_BUFFERSIZE = 1048576; + public static final int[] EMPTY_ARRAY = new int[0]; + + /** + * Construct a new {@link DirectPathConnection } instance. + * + * @param conn + */ + public DirectPathPreparedStatement(DirectPathConnection conn) { + this.conn = conn; + this.parameters = new HashMap<>(); + this.bufferSize = DEFAULT_BUFFERSIZE; + this.bucket = new ObDirectLoadBucket(); + } + + public DirectPathPreparedStatement(DirectPathConnection conn, Integer bufferSize) { + this.conn = conn; + this.parameters = new HashMap<>(); + this.bufferSize = bufferSize; + this.bucket = new ObDirectLoadBucket(bufferSize); + } + + /** + * Return current direct path connection. + * + * @return DirectPathConnection + * @throws SQLException + */ + @Override + public DirectPathConnection getConnection() throws SQLException { + return this.conn; + } + + /** + * Copy a new row data avoid overwrite. + * + * @throws SQLException + */ + @Override + public void addBatch() throws SQLException { + checkRange(); + ObObj[] objObjArray = new ObObj[parameters.size()]; + for (Map.Entry entry : parameters.entrySet()) { + objObjArray[entry.getKey() - 1] = entry.getValue(); + } + this.addBatch(objObjArray); + } + + /** + * Add a new row into buffer with input original value list. + * + * @param values One original row data. + */ + public void addBatch(List values) { + this.addBatch(createObObjArray(values)); + } + + /** + * Add a new row into buffer with input original value array. + * + * @param values One original row data. + */ + public void addBatch(Object[] values) { + this.addBatch(createObObjArray(values)); + } + + /** + * Add a new row into buffer with input ObObj array. + * + * @param arr One row data described as ObObj. + */ + private void addBatch(ObObj[] arr) { + checkArgument(arr != null && arr.length > 0, "Input values is null"); + try { + this.bucket.addRow(arr); + } catch (ObDirectLoadException e) { + throw new RuntimeException(e); + } + } + + /** + * Buffered the row data in memory. (defined in the bucket) + * You must invoke {@code ObDirectLoadBucket.clearBatch } after executeBatch. + * + * @return int[] + * @throws SQLException + */ + @Override + public int[] executeBatch() throws SQLException { + return this.bucket.isEmpty() ? EMPTY_ARRAY : this.conn.insert(bucket); + } + + /** + * Clear batch is always recreate a new {@link ObDirectLoadBucket} + */ + @Override + public void clearBatch() { + this.parameters.clear(); + this.bucket = new ObDirectLoadBucket(bufferSize); + } + + /** + * Clear the holder parameters. + * + * @throws SQLException + */ + @Override + public void clearParameters() throws SQLException { + this.parameters.clear(); + } + + /** + * @return boolean + */ + @Override + public boolean isOracleMode() { + return false; + } + + /** + * Set parameter to the target position. + * + * @param parameterIndex Start From 1 + * @param obObj Convert original value to {@link ObObj } + * @throws SQLException + */ + @Override + protected void setParameter(int parameterIndex, ObObj obObj) throws SQLException { + checkArgument(parameterIndex > 0, "Parameter index should start from 1"); + this.parameters.put(parameterIndex, obObj); + } + + /** + * Avoid range exception: + * + * Map.put(1, "abc"); + * Map.put(5, "def"); // Error: parameter index is 5, but 2 values exists. + */ + private void checkRange() { + OptionalInt optionalInt = parameters.keySet().stream().mapToInt(e -> e).max(); + int parameterIndex = optionalInt.orElseThrow(() -> new IllegalArgumentException("No parameter index found")); + checkArgument(parameterIndex == parameters.size(), "Parameter index(%s) is unmatched with value list(%s)", parameterIndex, parameters.size()); + } +} \ No newline at end of file diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/directPath/ObTableDirectLoad.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/directPath/ObTableDirectLoad.java new file mode 100644 index 0000000000..859fcedd16 --- /dev/null +++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/directPath/ObTableDirectLoad.java @@ -0,0 +1,154 @@ +package com.alibaba.datax.plugin.writer.oceanbasev10writer.directPath; + +import java.sql.SQLException; +import java.util.Objects; + +import com.alipay.oceanbase.rpc.direct_load.ObDirectLoadBucket; +import com.alipay.oceanbase.rpc.direct_load.ObDirectLoadConnection; +import com.alipay.oceanbase.rpc.direct_load.ObDirectLoadStatement; +import com.alipay.oceanbase.rpc.direct_load.ObDirectLoadTraceId; +import com.alipay.oceanbase.rpc.direct_load.exception.ObDirectLoadException; +import com.alipay.oceanbase.rpc.direct_load.protocol.payload.ObTableLoadClientStatus; +import com.alipay.oceanbase.rpc.table.ObTable; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Wrapper of the direct-load API for OceanBase. + */ +public class ObTableDirectLoad implements AutoCloseable { + private static final Logger LOG = LoggerFactory.getLogger(ObTableDirectLoad.class); + + private final String tableName; + private final String schemaTableName; + private final ObDirectLoadStatement statement; + private final ObDirectLoadConnection connection; + + public ObTableDirectLoad(String schemaName, String tableName, ObDirectLoadStatement statement, ObDirectLoadConnection connection) { + Objects.requireNonNull(schemaName, "schemaName must not be null"); + Objects.requireNonNull(tableName, "tableName must not be null"); + Objects.requireNonNull(statement, "statement must not be null"); + Objects.requireNonNull(connection, "connection must not be null"); + this.tableName = tableName; + this.schemaTableName = String.format("%s.%s", schemaName, tableName); + this.statement = statement; + this.connection = connection; + } + + /** + * Begin the direct load operation. + * + * @throws ObDirectLoadException if an error occurs during the operation. + */ + public void begin() throws ObDirectLoadException { + statement.begin(); + } + + /** + * Write data into the direct load operation. + * + * @param bucket The data bucket to write. + * @throws SQLException if writing fails. + */ + public void write(ObDirectLoadBucket bucket) throws SQLException { + try { + if (bucket == null || bucket.isEmpty()) { + throw new IllegalArgumentException("Bucket must not be null or empty."); + } + LOG.info("Writing {} rows to table: {}", bucket.getRowNum(), schemaTableName); + statement.write(bucket); + LOG.info("Successfully wrote bucket data to table: {}", schemaTableName); + } catch (ObDirectLoadException e) { + LOG.error("Failed to write to table: {}", schemaTableName, e); + throw new SQLException(String.format("Failed to write to table: %s", schemaTableName), e); + } + } + + /** + * Commit the current direct load operation. + * + * @throws SQLException if commit fails. + */ + public void commit() throws SQLException { + try { + LOG.info("Committing direct load for table: {}", schemaTableName); + statement.commit(); + LOG.info("Successfully committed direct load for table: {}", schemaTableName); + } catch (ObDirectLoadException e) { + LOG.error("Failed to commit for table: {}", schemaTableName, e); + throw new SQLException(String.format("Failed to commit for table: %s", schemaTableName), e); + } + } + + /** + * Close the direct load operation. + */ + public void close() { + LOG.info("Closing direct load for table: {}", schemaTableName); + statement.close(); + connection.close(); + LOG.info("Direct load closed for table: {}", schemaTableName); + } + + /** + * Gets the status from the current connection based on the traceId + */ + public ObTableLoadClientStatus getStatus() throws SQLException { + ObDirectLoadTraceId traceId = statement.getTraceId(); + // Check if traceId is null and throw an exception with a clear message + if (traceId == null) { + throw new SQLException("traceId is null."); + } + // Retrieve the status using the traceId + ObTableLoadClientStatus status = statement.getConnection().getProtocol().getHeartBeatRpc(traceId).getStatus(); + if (status == null) { + LOG.info("Direct load connect protocol heartBeatRpc for table is null: {}", schemaTableName); + throw new SQLException("status is null."); + } + // Return status if not null; otherwise, return ERROR + return status; + } + + /** + * Gets the current table + */ + public ObTable getTable() { + try { + return this.statement.getObTablePool().getControlObTable(); + } catch (ObDirectLoadException e) { + throw new RuntimeException(e); + } + } + + public String getTableName() { + if (StringUtils.isBlank(tableName)) { + throw new IllegalArgumentException("tableName is blank."); + } + return tableName; + } + + /** + * Inserts data into the direct load operation. + * + * @param bucket The data bucket containing rows to insert. + * @throws SQLException if an error occurs during the insert operation. + */ + public void insert(ObDirectLoadBucket bucket) throws SQLException { + LOG.info("Inserting {} rows to table: {}", bucket.getRowNum(), schemaTableName); + + if (bucket.isEmpty()) { + LOG.warn("Parameter 'bucket' must not be empty."); + throw new IllegalArgumentException("Parameter 'bucket' must not be empty."); + } + + try { + // Perform the insertion into the load operation + statement.write(bucket); + LOG.info("Successfully inserted data into table: {}", schemaTableName); + } catch (Exception ex) { + LOG.error("Unexpected error during insert operation for table: {}", schemaTableName, ex); + throw new SQLException("Unexpected error during insert operation.", ex); + } + } +} diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/AbstractConnHolder.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/AbstractConnHolder.java index c8630cd0af..6e4d4aab51 100644 --- a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/AbstractConnHolder.java +++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/AbstractConnHolder.java @@ -14,6 +14,17 @@ public abstract class AbstractConnHolder { protected final Configuration config; protected Connection conn; + protected String jdbcUrl; + protected String userName; + protected String password; + + protected AbstractConnHolder(Configuration config, String jdbcUrl, String userName, String password) { + this.config = config; + this.jdbcUrl = jdbcUrl; + this.userName = userName; + this.password = password; + } + public AbstractConnHolder(Configuration config) { this.config = config; } @@ -45,4 +56,6 @@ public Connection reconnect() { public abstract String getUserName(); public abstract void destroy(); + + public abstract void doCommit(); } diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/DirectPathAbstractConnHolder.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/DirectPathAbstractConnHolder.java new file mode 100644 index 0000000000..c5c6dbe0ca --- /dev/null +++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/DirectPathAbstractConnHolder.java @@ -0,0 +1,61 @@ +package com.alibaba.datax.plugin.writer.oceanbasev10writer.ext; + +import java.sql.Connection; + +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.util.DBUtil; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public abstract class DirectPathAbstractConnHolder { + private static final Logger LOG = LoggerFactory.getLogger(AbstractConnHolder.class); + protected Configuration config; + protected String jdbcUrl; + protected String userName; + protected String password; + + protected Connection conn; + + protected DirectPathAbstractConnHolder(Configuration config, String jdbcUrl, String userName, String password) { + this.config = config; + this.jdbcUrl = jdbcUrl; + this.userName = userName; + this.password = password; + } + + public Connection reconnect() { + DBUtil.closeDBResources(null, conn); + return initConnection(); + } + + public Connection getConn() { + if (conn == null) { + return initConnection(); + } else { + try { + if (conn.isClosed()) { + return reconnect(); + } + return conn; + } catch (Exception e) { + LOG.debug("can not judge whether the hold connection is closed or not, just reuse the hold connection"); + return conn; + } + } + } + + public String getJdbcUrl() { + return jdbcUrl; + } + + public Configuration getConfig() { + return config; + } + + public void doCommit() {} + + public abstract void destroy(); + + public abstract Connection initConnection(); +} diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/DirectPathConnHolder.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/DirectPathConnHolder.java new file mode 100644 index 0000000000..352eda1ca4 --- /dev/null +++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/DirectPathConnHolder.java @@ -0,0 +1,115 @@ +package com.alibaba.datax.plugin.writer.oceanbasev10writer.ext; + +import java.sql.Connection; +import java.sql.SQLException; +import java.util.Optional; +import java.util.concurrent.ConcurrentHashMap; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.util.DBUtil; +import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; +import com.alibaba.datax.plugin.writer.oceanbasev10writer.Config; +import com.alibaba.datax.plugin.writer.oceanbasev10writer.common.Table; +import com.alibaba.datax.plugin.writer.oceanbasev10writer.directPath.DirectPathConnection; + +import com.alipay.oceanbase.rpc.protocol.payload.impl.ObLoadDupActionType; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class DirectPathConnHolder extends AbstractConnHolder { + private static final Logger LOG = LoggerFactory.getLogger(DirectPathConnHolder.class); + + /** + * The server side timeout. + */ + private static final long SERVER_TIMEOUT = 24L * 60 * 60 * 1000 * 1000; + + private static final ConcurrentHashMap cache = new ConcurrentHashMap<>(); + + private String tableName; + private String host; + private int rpcPort; + private String tenantName; + private String databaseName; + private int blocks; + private int threads; + private int maxErrors; + private ObLoadDupActionType duplicateKeyAction; + + public DirectPathConnHolder(Configuration config, ServerConnectInfo connectInfo, String tableName, int threadsPerChannel) { + super(config, connectInfo.jdbcUrl, connectInfo.userName, connectInfo.password); + // direct path: + //● publicCloud & odp - single or full + //● publicCloud & observer - not support + //● !publicCloud & odp - full + //● !publicCloud & observer - single + this.userName = connectInfo.getFullUserName(); + this.host = connectInfo.host; + this.rpcPort = connectInfo.rpcPort; + this.tenantName = connectInfo.tenantName; + if (!connectInfo.publicCloud && StringUtils.isEmpty(tenantName)) { + throw new IllegalStateException("tenant name is needed when using direct path load in private cloud."); + } + this.databaseName = connectInfo.databaseName; + this.tableName = tableName; + this.blocks = config.getInt(Config.BLOCKS_COUNT, 1); + this.threads = threadsPerChannel * Math.min(blocks, 32); + this.maxErrors = config.getInt(Config.MAX_ERRORS, 0); + this.duplicateKeyAction = "insert".equalsIgnoreCase(config.getString(Config.OB_WRITE_MODE)) ? ObLoadDupActionType.IGNORE : ObLoadDupActionType.REPLACE; + } + + @Override + public Connection initConnection() { + synchronized (cache) { + conn = cache.computeIfAbsent(new Table(databaseName, tableName), e -> { + try { + return new DirectPathConnection.Builder().host(host) // + .port(rpcPort) // + .tenant(tenantName) // + .user(userName) // + .password(Optional.ofNullable(password).orElse("")) // + .schema(databaseName) // + .table(tableName) // + .blocks(blocks) // + .parallel(threads) // + .maxErrorCount(maxErrors) // + .duplicateKeyAction(duplicateKeyAction) // + .serverTimeout(SERVER_TIMEOUT) // + .configuration(config) + .build(); + } catch (Exception ex) { + throw DataXException.asDataXException(DBUtilErrorCode.CONN_DB_ERROR, ex); + } + }); + } + return conn; + } + + public String getJdbcUrl() { + return ""; + } + + public String getUserName() { + return ""; + } + + @Override + public void destroy() { + if (conn != null && ((DirectPathConnection) conn).isFinished()) { + DBUtil.closeDBResources(null, conn); + } + } + + @Override + public void doCommit() { + try { + if (conn != null) { + conn.commit(); + } + } catch (SQLException e) { + throw new RuntimeException(e); + } + } +} diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/OCJConnHolder.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/OCJConnHolder.java index 262fb1cb7b..02277fbe8c 100644 --- a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/OCJConnHolder.java +++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/OCJConnHolder.java @@ -1,42 +1,54 @@ package com.alibaba.datax.plugin.writer.oceanbasev10writer.ext; import java.sql.Connection; +import java.sql.SQLException; import com.alibaba.datax.common.util.Configuration; /** * wrap oceanbase java client + * * @author oceanbase */ public class OCJConnHolder extends AbstractConnHolder { - private ServerConnectInfo connectInfo; - private String dataSourceKey; - - public OCJConnHolder (Configuration config, ServerConnectInfo connInfo) { - super(config); - this.connectInfo = connInfo; - this.dataSourceKey = OBDataSourceV10.genKey(connectInfo.getFullUserName(), connectInfo.databaseName); - OBDataSourceV10.init(config, connectInfo.getFullUserName(), connectInfo.password, connectInfo.databaseName); - } - - @Override - public Connection initConnection() { - conn = OBDataSourceV10.getConnection(dataSourceKey); - return conn; - } - - @Override - public String getJdbcUrl() { - return connectInfo.jdbcUrl; - } - - @Override - public String getUserName() { - return connectInfo.userName; - } - - public void destroy() { - OBDataSourceV10.destory(this.dataSourceKey); - } + private ServerConnectInfo connectInfo; + private String dataSourceKey; + + public OCJConnHolder(Configuration config, ServerConnectInfo connInfo) { + super(config); + this.connectInfo = connInfo; + this.dataSourceKey = OBDataSourceV10.genKey(connectInfo.getFullUserName(), connectInfo.databaseName); + OBDataSourceV10.init(config, connectInfo.getFullUserName(), connectInfo.password, connectInfo.databaseName); + } + + @Override + public Connection initConnection() { + conn = OBDataSourceV10.getConnection(dataSourceKey); + return conn; + } + + @Override + public String getJdbcUrl() { + return connectInfo.jdbcUrl; + } + + @Override + public String getUserName() { + return connectInfo.userName; + } + + public void destroy() { + OBDataSourceV10.destory(this.dataSourceKey); + } + + public void doCommit() { + try { + if (conn != null) { + conn.commit(); + } + } catch (SQLException e) { + throw new RuntimeException(e); + } + } } diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/ObClientConnHolder.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/ObClientConnHolder.java index ac75d359dc..c0e885c250 100644 --- a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/ObClientConnHolder.java +++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/ObClientConnHolder.java @@ -1,6 +1,7 @@ package com.alibaba.datax.plugin.writer.oceanbasev10writer.ext; import java.sql.Connection; +import java.sql.SQLException; import java.util.ArrayList; import java.util.List; @@ -14,50 +15,60 @@ * 数据库连接代理对象,负责创建连接,重新连接 * * @author oceanbase - * */ public class ObClientConnHolder extends AbstractConnHolder { - private final String jdbcUrl; - private final String userName; - private final String password; - - public ObClientConnHolder(Configuration config, String jdbcUrl, String userName, String password) { - super(config); - this.jdbcUrl = jdbcUrl; - this.userName = userName; - this.password = password; - } - - // Connect to ob with obclient and obproxy - @Override - public Connection initConnection() { - String BASIC_MESSAGE = String.format("jdbcUrl:[%s]", this.jdbcUrl); - DataBaseType dbType = DataBaseType.OceanBase; - if (ObWriterUtils.isOracleMode()) { - // set up for writing timestamp columns - List sessionConfig = config.getList(Key.SESSION, new ArrayList(), String.class); - sessionConfig.add("ALTER SESSION SET NLS_DATE_FORMAT='YYYY-MM-DD HH24:MI:SS'"); - sessionConfig.add("ALTER SESSION SET NLS_TIMESTAMP_FORMAT='YYYY-MM-DD HH24:MI:SS.FF'"); - sessionConfig.add("ALTER SESSION SET NLS_TIMESTAMP_TZ_FORMAT='YYYY-MM-DD HH24:MI:SS.FF TZR TZD'"); - config.set(Key.SESSION, sessionConfig); - } - conn = DBUtil.getConnection(dbType, jdbcUrl, userName, password); - DBUtil.dealWithSessionConfig(conn, config, dbType, BASIC_MESSAGE); - return conn; - } - - @Override - public String getJdbcUrl() { - return jdbcUrl; - } - - @Override - public String getUserName() { - return userName; - } - - @Override - public void destroy() { - DBUtil.closeDBResources(null, conn); - } + private final String jdbcUrl; + private final String userName; + private final String password; + + public ObClientConnHolder(Configuration config, String jdbcUrl, String userName, String password) { + super(config); + this.jdbcUrl = jdbcUrl; + this.userName = userName; + this.password = password; + } + + // Connect to ob with obclient and obproxy + @Override + public Connection initConnection() { + String BASIC_MESSAGE = String.format("jdbcUrl:[%s]", this.jdbcUrl); + DataBaseType dbType = DataBaseType.OceanBase; + if (ObWriterUtils.isOracleMode()) { + // set up for writing timestamp columns + List sessionConfig = config.getList(Key.SESSION, new ArrayList(), String.class); + sessionConfig.add("ALTER SESSION SET NLS_DATE_FORMAT='YYYY-MM-DD HH24:MI:SS'"); + sessionConfig.add("ALTER SESSION SET NLS_TIMESTAMP_FORMAT='YYYY-MM-DD HH24:MI:SS.FF'"); + sessionConfig.add("ALTER SESSION SET NLS_TIMESTAMP_TZ_FORMAT='YYYY-MM-DD HH24:MI:SS.FF TZR TZD'"); + config.set(Key.SESSION, sessionConfig); + } + conn = DBUtil.getConnection(dbType, jdbcUrl, userName, password); + DBUtil.dealWithSessionConfig(conn, config, dbType, BASIC_MESSAGE); + return conn; + } + + @Override + public String getJdbcUrl() { + return jdbcUrl; + } + + @Override + public String getUserName() { + return userName; + } + + @Override + public void destroy() { + DBUtil.closeDBResources(null, conn); + } + + @Override + public void doCommit() { + try { + if (conn != null) { + conn.commit(); + } + } catch (SQLException e) { + throw new RuntimeException(e); + } + } } diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/ServerConnectInfo.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/ServerConnectInfo.java index fe8889e167..714d731296 100644 --- a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/ServerConnectInfo.java +++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/ServerConnectInfo.java @@ -5,94 +5,112 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.alibaba.datax.common.util.Configuration; + public class ServerConnectInfo { - - public String clusterName; - public String tenantName; - public String userName; - public String password; - public String databaseName; - public String ipPort; - public String jdbcUrl; - public boolean publicCloud; - /** - * - * @param jdbcUrl format is jdbc:oceanbase//ip:port - * @param username format is cluster:tenant:username or username@tenant#cluster or user@tenant or user - * @param password - */ - public ServerConnectInfo(final String jdbcUrl, final String username, final String password) { - this.jdbcUrl = jdbcUrl; - this.password = password; - parseJdbcUrl(jdbcUrl); - parseFullUserName(username); - } + public String clusterName; + public String tenantName; + // userName doesn't contain tenantName or clusterName + public String userName; + public String password; + public String databaseName; + public String ipPort; + public String jdbcUrl; + public String host; + public String port; + public boolean publicCloud; + public int rpcPort; + public Configuration config; + + public ServerConnectInfo(final String jdbcUrl, final String username, final String password, Configuration config) { + this.jdbcUrl = jdbcUrl; + this.password = password; + this.config = config; + parseJdbcUrl(jdbcUrl); + parseFullUserName(username); + } + + private void parseJdbcUrl(final String jdbcUrl) { + Pattern pattern = Pattern.compile("//([\\w\\.\\-]+:\\d+)/([^\\\\?]*)"); + Matcher matcher = pattern.matcher(jdbcUrl); + if (matcher.find()) { + String ipPort = matcher.group(1); + String dbName = matcher.group(2); + this.ipPort = ipPort; + this.host = ipPort.split(":")[0]; + this.port = ipPort.split(":")[1]; + this.databaseName = dbName; + this.publicCloud = host.endsWith("aliyuncs.com"); + } else { + throw new RuntimeException("Invalid argument:" + jdbcUrl); + } + } - private void parseJdbcUrl(final String jdbcUrl) { - Pattern pattern = Pattern.compile("//([\\w\\.\\-]+:\\d+)/([\\w-]+)\\?"); - Matcher matcher = pattern.matcher(jdbcUrl); - if (matcher.find()) { - String ipPort = matcher.group(1); - String dbName = matcher.group(2); - this.ipPort = ipPort; - this.databaseName = dbName; - this.publicCloud = ipPort.split(":")[0].endsWith("aliyuncs.com"); - } else { - throw new RuntimeException("Invalid argument:" + jdbcUrl); - } - } + protected void parseFullUserName(final String fullUserName) { + int tenantIndex = fullUserName.indexOf("@"); + int clusterIndex = fullUserName.indexOf("#"); + // 适用于jdbcUrl以||_dsc_ob10_dsc_开头的场景 + if (fullUserName.contains(":") && tenantIndex < 0) { + String[] names = fullUserName.split(":"); + if (names.length != 3) { + throw new RuntimeException("invalid argument: " + fullUserName); + } else { + this.clusterName = names[0]; + this.tenantName = names[1]; + this.userName = names[2]; + } + } else if (tenantIndex < 0) { + // 适用于short jdbcUrl,且username中不含租户名(主要是公有云场景,此场景下不计算分区) + this.userName = fullUserName; + this.clusterName = EMPTY; + this.tenantName = EMPTY; + } else { + // 适用于short jdbcUrl,且username中含租户名 + this.userName = fullUserName.substring(0, tenantIndex); + if (clusterIndex < 0) { + this.clusterName = EMPTY; + this.tenantName = fullUserName.substring(tenantIndex + 1); + } else { + this.clusterName = fullUserName.substring(clusterIndex + 1); + this.tenantName = fullUserName.substring(tenantIndex + 1, clusterIndex); + } + } + } - private void parseFullUserName(final String fullUserName) { - int tenantIndex = fullUserName.indexOf("@"); - int clusterIndex = fullUserName.indexOf("#"); - if (fullUserName.contains(":") && tenantIndex < 0) { - String[] names = fullUserName.split(":"); - if (names.length != 3) { - throw new RuntimeException("invalid argument: " + fullUserName); - } else { - this.clusterName = names[0]; - this.tenantName = names[1]; - this.userName = names[2]; - } - } else if (!publicCloud || tenantIndex < 0) { - this.userName = tenantIndex < 0 ? fullUserName : fullUserName.substring(0, tenantIndex); - this.clusterName = clusterIndex < 0 ? EMPTY : fullUserName.substring(clusterIndex + 1); - this.tenantName = tenantIndex < 0 ? EMPTY : fullUserName.substring(tenantIndex + 1, clusterIndex); - } else { - // If in public cloud, the username with format user@tenant#cluster should be parsed, otherwise, connection can't be created. - this.userName = fullUserName.substring(0, tenantIndex); - if (clusterIndex > tenantIndex) { - this.tenantName = fullUserName.substring(tenantIndex + 1, clusterIndex); - this.clusterName = fullUserName.substring(clusterIndex + 1); - } else { - this.tenantName = fullUserName.substring(tenantIndex + 1); - this.clusterName = EMPTY; - } - } - } + @Override + public String toString() { + return "ServerConnectInfo{" + + "clusterName='" + clusterName + '\'' + + ", tenantName='" + tenantName + '\'' + + ", userName='" + userName + '\'' + + ", password='" + password + '\'' + + ", databaseName='" + databaseName + '\'' + + ", ipPort='" + ipPort + '\'' + + ", jdbcUrl='" + jdbcUrl + '\'' + + ", host='" + host + '\'' + + ", publicCloud=" + publicCloud + + ", rpcPort=" + rpcPort + + '}'; + } - @Override - public String toString() { - StringBuffer strBuffer = new StringBuffer(); - return strBuffer.append("clusterName:").append(clusterName).append(", tenantName:").append(tenantName) - .append(", userName:").append(userName).append(", databaseName:").append(databaseName) - .append(", ipPort:").append(ipPort).append(", jdbcUrl:").append(jdbcUrl).toString(); - } + public String getFullUserName() { + StringBuilder builder = new StringBuilder(); + builder.append(userName); + if (!EMPTY.equals(tenantName)) { + builder.append("@").append(tenantName); + } - public String getFullUserName() { - StringBuilder builder = new StringBuilder(); - builder.append(userName); - if (!EMPTY.equals(tenantName)) { - builder.append("@").append(tenantName); - } + if (!EMPTY.equals(clusterName)) { + builder.append("#").append(clusterName); + } + if (EMPTY.equals(this.clusterName) && EMPTY.equals(this.tenantName)) { + return this.userName; + } + return builder.toString(); + } - if (!EMPTY.equals(clusterName)) { - builder.append("#").append(clusterName); - } - if (EMPTY.equals(this.clusterName) && EMPTY.equals(this.tenantName)) { - return this.userName; - } - return builder.toString(); - } + public void setRpcPort(int rpcPort) { + this.rpcPort = rpcPort; + } } diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/AbstractInsertTask.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/AbstractInsertTask.java new file mode 100644 index 0000000000..d4f215e11d --- /dev/null +++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/AbstractInsertTask.java @@ -0,0 +1,127 @@ +package com.alibaba.datax.plugin.writer.oceanbasev10writer.task; + +import java.util.List; +import java.util.Queue; +import java.util.concurrent.TimeUnit; + +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.writer.oceanbasev10writer.Config; +import com.alibaba.datax.plugin.writer.oceanbasev10writer.ext.AbstractConnHolder; +import com.alibaba.datax.plugin.writer.oceanbasev10writer.ext.ServerConnectInfo; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public abstract class AbstractInsertTask implements Runnable { + private static final Logger LOG = LoggerFactory.getLogger(AbstractInsertTask.class); + protected final long taskId; + protected ConcurrentTableWriterTask writerTask; + protected ConcurrentTableWriterTask.ConcurrentTableWriter writer; + protected Queue> queue; + protected boolean isStop; + protected Configuration config; + protected ServerConnectInfo connInfo; + protected AbstractConnHolder connHolder; + protected long totalCost = 0; + protected long insertCount = 0; + private boolean printCost = Config.DEFAULT_PRINT_COST; + private long costBound = Config.DEFAULT_COST_BOUND; + + public AbstractInsertTask(final long taskId, Queue> recordsQueue, Configuration config, ServerConnectInfo connectInfo, ConcurrentTableWriterTask task, ConcurrentTableWriterTask.ConcurrentTableWriter writer) { + this.taskId = taskId; + this.queue = recordsQueue; + this.config = config; + this.connInfo = connectInfo; + this.isStop = false; + this.printCost = config.getBool(Config.PRINT_COST, Config.DEFAULT_PRINT_COST); + this.costBound = config.getLong(Config.COST_BOUND, Config.DEFAULT_COST_BOUND); + this.writer = writer; + this.writerTask = task; + initConnHolder(); + } + + public AbstractInsertTask(final long taskId, Queue> recordsQueue, Configuration config, ServerConnectInfo connectInfo) { + this.taskId = taskId; + this.queue = recordsQueue; + this.config = config; + this.connInfo = connectInfo; + this.isStop = false; + this.printCost = config.getBool(Config.PRINT_COST, Config.DEFAULT_PRINT_COST); + this.costBound = config.getLong(Config.COST_BOUND, Config.DEFAULT_COST_BOUND); + initConnHolder(); + } + + protected abstract void initConnHolder(); + + public void setWriterTask(ConcurrentTableWriterTask writerTask) { + this.writerTask = writerTask; + } + + public void setWriter(ConcurrentTableWriterTask.ConcurrentTableWriter writer) { + this.writer = writer; + } + + private boolean isStop() { + return isStop; + } + + public void setStop() { + isStop = true; + } + + public AbstractConnHolder getConnHolder() { + return connHolder; + } + + public void calStatistic(final long cost) { + writer.increFinishCount(); + insertCount++; + totalCost += cost; + if (this.printCost && cost > this.costBound) { + LOG.info("slow multi insert cost {}ms", cost); + } + } + + @Override + public void run() { + Thread.currentThread().setName(String.format("%d-insertTask-%d", taskId, Thread.currentThread().getId())); + LOG.debug("Task {} start to execute...", taskId); + while (!isStop()) { + try { + List records = queue.poll(); + if (null != records) { + write(records); + } else if (writerTask.isFinished()) { + writerTask.singalTaskFinish(); + LOG.debug("not more task, thread exist ..."); + break; + } else { + TimeUnit.MILLISECONDS.sleep(5); + } + } catch (InterruptedException e) { + LOG.debug("TableWriter is interrupt"); + } catch (Exception e) { + LOG.warn("ERROR UNEXPECTED ", e); + break; + } + } + LOG.debug("Thread exist..."); + } + + protected abstract void write(List records); + + public long getTotalCost() { + return totalCost; + } + + public long getInsertCount() { + return insertCount; + } + + public void destroy() { + if (connHolder != null) { + connHolder.destroy(); + } + } +} diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/ConcurrentTableWriterTask.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/ConcurrentTableWriterTask.java index 0ad3a1ed2f..dae31d1ab1 100644 --- a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/ConcurrentTableWriterTask.java +++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/ConcurrentTableWriterTask.java @@ -1,5 +1,6 @@ package com.alibaba.datax.plugin.writer.oceanbasev10writer.task; +import com.alibaba.datax.common.element.Column; import com.alibaba.datax.common.element.Record; import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.plugin.RecordReceiver; @@ -17,10 +18,13 @@ import com.alibaba.datax.plugin.writer.oceanbasev10writer.part.ObPartitionCalculatorV1; import com.alibaba.datax.plugin.writer.oceanbasev10writer.part.ObPartitionCalculatorV2; import com.alibaba.datax.plugin.writer.oceanbasev10writer.util.ObWriterUtils; + import com.oceanbase.partition.calculator.enums.ObServerMode; + import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.SQLException; +import java.sql.Types; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -32,8 +36,10 @@ import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import static com.alibaba.datax.plugin.writer.oceanbasev10writer.Config.DEFAULT_SLOW_MEMSTORE_THRESHOLD; import static com.alibaba.datax.plugin.writer.oceanbasev10writer.util.ObWriterUtils.LoadMode.FAST; import static com.alibaba.datax.plugin.writer.oceanbasev10writer.util.ObWriterUtils.LoadMode.PAUSE; @@ -42,15 +48,15 @@ public class ConcurrentTableWriterTask extends CommonRdbmsWriter.Task { private static final Logger LOG = LoggerFactory.getLogger(ConcurrentTableWriterTask.class); - // memstore_total 与 memstore_limit 比例的阈值,一旦超过这个值,则暂停写入 - private double memstoreThreshold = Config.DEFAULT_MEMSTORE_THRESHOLD; - // memstore检查的间隔 - private long memstoreCheckIntervalSecond = Config.DEFAULT_MEMSTORE_CHECK_INTERVAL_SECOND; - // 最后一次检查 - private long lastCheckMemstoreTime; + // memstore_total 与 memstore_limit 比例的阈值,一旦超过这个值,则暂停写入 + private double memstoreThreshold = Config.DEFAULT_MEMSTORE_THRESHOLD; + // memstore检查的间隔 + private long memstoreCheckIntervalSecond = Config.DEFAULT_MEMSTORE_CHECK_INTERVAL_SECOND; + // 最后一次检查 + private long lastCheckMemstoreTime; + + private volatile ObWriterUtils.LoadMode loadMode = FAST; - private volatile ObWriterUtils.LoadMode loadMode = FAST; - private static AtomicLong totalTask = new AtomicLong(0); private long taskId = -1; private AtomicBoolean isMemStoreFull = new AtomicBoolean(false); @@ -67,38 +73,41 @@ public class ConcurrentTableWriterTask extends CommonRdbmsWriter.Task { private String obUpdateColumns = null; private String dbName; private int calPartFailedCount = 0; + private boolean directPath; - public ConcurrentTableWriterTask(DataBaseType dataBaseType) { - super(dataBaseType); - taskId = totalTask.getAndIncrement(); - } - - @Override - public void init(Configuration config) { - super.init(config); - // OceanBase 所有操作都是 insert into on duplicate key update 模式 - // writeMode应该使用enum来定义 - this.writeMode = "update"; + public ConcurrentTableWriterTask(DataBaseType dataBaseType) { + super(dataBaseType); + taskId = totalTask.getAndIncrement(); + } + + @Override + public void init(Configuration config) { + super.init(config); + this.directPath = config.getBool(Config.DIRECT_PATH, false); + // OceanBase 所有操作都是 insert into on duplicate key update 模式 + // writeMode应该使用enum来定义 + this.writeMode = "update"; obWriteMode = config.getString(Config.OB_WRITE_MODE, "update"); - ServerConnectInfo connectInfo = new ServerConnectInfo(jdbcUrl, username, password); - dbName = connectInfo.databaseName; - //init check memstore - this.memstoreThreshold = config.getDouble(Config.MEMSTORE_THRESHOLD, Config.DEFAULT_MEMSTORE_THRESHOLD); - this.memstoreCheckIntervalSecond = config.getLong(Config.MEMSTORE_CHECK_INTERVAL_SECOND, - Config.DEFAULT_MEMSTORE_CHECK_INTERVAL_SECOND); - - this.connHolder = new ObClientConnHolder(config, connectInfo.jdbcUrl, - connectInfo.getFullUserName(), connectInfo.password); - this.isOracleCompatibleMode = ObWriterUtils.isOracleMode(); - if (isOracleCompatibleMode) { - connectInfo.databaseName = connectInfo.databaseName.toUpperCase(); - //在转义的情况下不翻译 - if (!(table.startsWith("\"") && table.endsWith("\""))) { - table = table.toUpperCase(); - } - - LOG.info(String.format("this is oracle compatible mode, change database to %s, table to %s", - connectInfo.databaseName, table)); + ServerConnectInfo connectInfo = new ServerConnectInfo(jdbcUrl, username, password, config); + connectInfo.setRpcPort(config.getInt(Config.RPC_PORT, 0)); + dbName = connectInfo.databaseName; + //init check memstore + this.memstoreThreshold = config.getDouble(Config.MEMSTORE_THRESHOLD, Config.DEFAULT_MEMSTORE_THRESHOLD); + this.memstoreCheckIntervalSecond = config.getLong(Config.MEMSTORE_CHECK_INTERVAL_SECOND, + Config.DEFAULT_MEMSTORE_CHECK_INTERVAL_SECOND); + + this.connHolder = new ObClientConnHolder(config, connectInfo.jdbcUrl, + connectInfo.getFullUserName(), connectInfo.password); + this.isOracleCompatibleMode = ObWriterUtils.isOracleMode(); + if (isOracleCompatibleMode) { + connectInfo.databaseName = connectInfo.databaseName.toUpperCase(); + //在转义的情况下不翻译 + if (!(table.startsWith("\"") && table.endsWith("\""))) { + table = table.toUpperCase(); + } + + LOG.info(String.format("this is oracle compatible mode, change database to %s, table to %s", + connectInfo.databaseName, table)); } if (config.getBool(Config.USE_PART_CALCULATOR, Config.DEFAULT_USE_PART_CALCULATOR)) { @@ -133,37 +142,37 @@ private IObPartCalculator createPartitionCalculator(ServerConnectInfo connectInf return new ObPartitionCalculatorV1(connectInfo, table, columns); } - public boolean isFinished() { - return allTaskInQueue && concurrentWriter.checkFinish(); - } - - public boolean allTaskInQueue() { - return allTaskInQueue; - } - - public void setPutAllTaskInQueue() { - this.allTaskInQueue = true; - LOG.info("ConcurrentTableWriter has put all task in queue, queueSize = {}, total = {}, finished = {}", - concurrentWriter.getTaskQueueSize(), - concurrentWriter.getTotalTaskCount(), - concurrentWriter.getFinishTaskCount()); - } - - private void rewriteSql() { - Connection conn = connHolder.initConnection(); - if (isOracleCompatibleMode && obWriteMode.equalsIgnoreCase("update")) { - // change obWriteMode to insert so the insert statement will be generated. - obWriteMode = "insert"; - } - this.writeRecordSql = ObWriterUtils.buildWriteSql(table, columns, conn, obWriteMode, obUpdateColumns); - LOG.info("writeRecordSql :{}", this.writeRecordSql); - } + public boolean isFinished() { + return allTaskInQueue && concurrentWriter.checkFinish(); + } + + public boolean allTaskInQueue() { + return allTaskInQueue; + } + + public void setPutAllTaskInQueue() { + this.allTaskInQueue = true; + LOG.info("ConcurrentTableWriter has put all task in queue, queueSize = {}, total = {}, finished = {}", + concurrentWriter.getTaskQueueSize(), + concurrentWriter.getTotalTaskCount(), + concurrentWriter.getFinishTaskCount()); + } + + private void rewriteSql() { + Connection conn = connHolder.initConnection(); + if (isOracleCompatibleMode && obWriteMode.equalsIgnoreCase("update")) { + // change obWriteMode to insert so the insert statement will be generated. + obWriteMode = "insert"; + } + this.writeRecordSql = ObWriterUtils.buildWriteSql(table, columns, conn, obWriteMode, obUpdateColumns); + LOG.info("writeRecordSql :{}", this.writeRecordSql); + } @Override - public void prepare(Configuration writerSliceConfig) { - super.prepare(writerSliceConfig); - concurrentWriter.start(); - } + public void prepare(Configuration writerSliceConfig) { + super.prepare(writerSliceConfig); + concurrentWriter.start(); + } @Override public void startWriteWithConnection(RecordReceiver recordReceiver, TaskPluginCollector taskPluginCollector, Connection connection) { @@ -173,25 +182,25 @@ public void startWriteWithConnection(RecordReceiver recordReceiver, TaskPluginCo int retryTimes = 0; boolean needRetry = false; do { - try { - if (retryTimes > 0) { - TimeUnit.SECONDS.sleep((1 << retryTimes)); - DBUtil.closeDBResources(null, connection); - connection = DBUtil.getConnection(dataBaseType, jdbcUrl, username, password); - LOG.warn("getColumnMetaData of table {} failed, retry the {} times ...", this.table, retryTimes); - } - ColumnMetaCache.init(connection, this.table, this.columns); - this.resultSetMetaData = ColumnMetaCache.getColumnMeta(); - needRetry = false; - } catch (SQLException e) { - needRetry = true; - ++retryTimes; - e.printStackTrace(); - LOG.warn("fetch column meta of [{}] failed..., retry {} times", this.table, retryTimes); - } catch (InterruptedException e) { - LOG.warn("startWriteWithConnection interrupt, ignored"); - } finally { - } + try { + if (retryTimes > 0) { + TimeUnit.SECONDS.sleep((1 << retryTimes)); + DBUtil.closeDBResources(null, connection); + connection = DBUtil.getConnection(dataBaseType, jdbcUrl, username, password); + LOG.warn("getColumnMetaData of table {} failed, retry the {} times ...", this.table, retryTimes); + } + ColumnMetaCache.init(connection, this.table, this.columns); + this.resultSetMetaData = ColumnMetaCache.getColumnMeta(); + needRetry = false; + } catch (SQLException e) { + needRetry = true; + ++retryTimes; + e.printStackTrace(); + LOG.warn("fetch column meta of [{}] failed..., retry {} times", this.table, retryTimes); + } catch (InterruptedException e) { + LOG.warn("startWriteWithConnection interrupt, ignored"); + } finally { + } } while (needRetry && retryTimes < 100); try { @@ -200,8 +209,8 @@ public void startWriteWithConnection(RecordReceiver recordReceiver, TaskPluginCo while ((record = recordReceiver.getFromReader()) != null) { if (record.getColumnNumber() != this.columnNumber) { // 源头读取字段列数与目的表字段写入列数不相等,直接报错 - LOG.error("column not equal {} != {}, record = {}", - this.columnNumber, record.getColumnNumber(), record.toString()); + LOG.error("column not equal {} != {}, record = {}", + this.columnNumber, record.getColumnNumber(), record.toString()); throw DataXException .asDataXException( DBUtilErrorCode.CONF_ERROR, @@ -221,231 +230,408 @@ public void startWriteWithConnection(RecordReceiver recordReceiver, TaskPluginCo } } - public PreparedStatement fillStatement(PreparedStatement preparedStatement, Record record) - throws SQLException { - return fillPreparedStatement(preparedStatement, record); - } - - private void addLeftRecords() { - //不需要刷新Cache,已经是最后一批数据了 - for (List groupValues : groupInsertValues.values()) { - if (groupValues.size() > 0 ) { - addRecordsToWriteQueue(groupValues); - } - } - } - - private void addRecordToCache(final Record record) { - Long partId =null; - try { - partId = obPartCalculator == null ? Long.MAX_VALUE : obPartCalculator.calculate(record); - } catch (Exception e1) { - if (calPartFailedCount++ < 10) { - LOG.warn("fail to get partition id: " + e1.getMessage() + ", record: " + record); - } - } + public PreparedStatement fillStatement(PreparedStatement preparedStatement, Record record) + throws SQLException { + return fillPreparedStatement(preparedStatement, record); + } + + private void addLeftRecords() { + //不需要刷新Cache,已经是最后一批数据了 + for (List groupValues : groupInsertValues.values()) { + if (groupValues.size() > 0) { + addRecordsToWriteQueue(groupValues); + } + } + } + + private void addRecordToCache(final Record record) { + Long partId = null; + try { + partId = obPartCalculator == null ? Long.MAX_VALUE : obPartCalculator.calculate(record); + } catch (Exception e1) { + if (calPartFailedCount++ < 10) { + LOG.warn("fail to get partition id: " + e1.getMessage() + ", record: " + record); + } + } if (partId == null) { LOG.debug("fail to calculate parition id, just put into the default buffer."); partId = Long.MAX_VALUE; } - List groupValues = groupInsertValues.computeIfAbsent(partId, k -> new ArrayList(batchSize)); - groupValues.add(record); - if (groupValues.size() >= batchSize) { - groupValues = addRecordsToWriteQueue(groupValues); - groupInsertValues.put(partId, groupValues); - } - } - - /** - * - * @param records - * @return 返回一个新的Cache用于存储接下来的数据 - */ - private List addRecordsToWriteQueue(List records) { - int i = 0; - while (true) { - if (i > 0) { - LOG.info("retry add batch record the {} times", i); - } - try { - concurrentWriter.addBatchRecords(records); - break; - } catch (InterruptedException e) { - i++; - LOG.info("Concurrent table writer is interrupted"); - } - } - return new ArrayList(batchSize); - } - private void checkMemStore() { - Connection checkConn = connHolder.getConn(); - try { - if (checkConn == null || checkConn.isClosed()) { - checkConn = connHolder.reconnect(); - } - }catch (Exception e) { - LOG.warn("Check connection is unusable"); - } - - long now = System.currentTimeMillis(); - if (now - lastCheckMemstoreTime < 1000 * memstoreCheckIntervalSecond) { - return; - } - double memUsedRatio = ObWriterUtils.queryMemUsedRatio(checkConn); - if (memUsedRatio >= DEFAULT_SLOW_MEMSTORE_THRESHOLD) { - this.loadMode = memUsedRatio >= memstoreThreshold ? PAUSE : SLOW; - LOG.info("Memstore used ration is {}. Load data {}", memUsedRatio, loadMode.name()); - }else { - this.loadMode = FAST; - } - lastCheckMemstoreTime = now; - } - - public boolean isMemStoreFull() { - return isMemStoreFull.get(); - } - - public boolean isShouldPause() { - return this.loadMode.equals(PAUSE); - } - - public boolean isShouldSlow() { - return this.loadMode.equals(SLOW); - } - - public void print() { - if (LOG.isDebugEnabled()) { - LOG.debug("Statistic total task {}, finished {}, queue Size {}", - concurrentWriter.getTotalTaskCount(), - concurrentWriter.getFinishTaskCount(), - concurrentWriter.getTaskQueueSize()); - concurrentWriter.printStatistics(); - } - } - - public void waitTaskFinish() { - setPutAllTaskInQueue(); - lock.lock(); - try { - while (!concurrentWriter.checkFinish()) { - condition.await(15, TimeUnit.SECONDS); - print(); - checkMemStore(); - } - } catch (InterruptedException e) { - LOG.warn("Concurrent table writer wait task finish interrupt"); - } finally { - lock.unlock(); - } - LOG.debug("wait all InsertTask finished ..."); - } - - public void singalTaskFinish() { - lock.lock(); - condition.signal(); - lock.unlock(); - } - - @Override - public void destroy(Configuration writerSliceConfig) { - if(concurrentWriter!=null) { - concurrentWriter.destory(); - } - // 把本级持有的conn关闭掉 - DBUtil.closeDBResources(null, connHolder.getConn()); - super.destroy(writerSliceConfig); - } - - public class ConcurrentTableWriter { - private BlockingQueue> queue; - private List insertTasks; - private Configuration config; - private ServerConnectInfo connectInfo; - private String rewriteRecordSql; - private AtomicLong totalTaskCount; - private AtomicLong finishTaskCount; - private final int threadCount; - - public ConcurrentTableWriter(Configuration config, ServerConnectInfo connInfo, String rewriteRecordSql) { - threadCount = config.getInt(Config.WRITER_THREAD_COUNT, Config.DEFAULT_WRITER_THREAD_COUNT); - queue = new LinkedBlockingQueue>(threadCount << 1); - insertTasks = new ArrayList(threadCount); - this.config = config; - this.connectInfo = connInfo; - this.rewriteRecordSql = rewriteRecordSql; - this.totalTaskCount = new AtomicLong(0); - this.finishTaskCount = new AtomicLong(0); - } - - public long getTotalTaskCount() { - return totalTaskCount.get(); - } - - public long getFinishTaskCount() { - return finishTaskCount.get(); - } - - public int getTaskQueueSize() { - return queue.size(); - } - - public void increFinishCount() { - finishTaskCount.incrementAndGet(); - } - - //should check after put all the task in the queue - public boolean checkFinish() { - long finishCount = finishTaskCount.get(); - long totalCount = totalTaskCount.get(); - return finishCount == totalCount; - } - - public synchronized void start() { - for (int i = 0; i < threadCount; ++i) { - LOG.info("start {} insert task.", (i+1)); - InsertTask insertTask = new InsertTask(taskId, queue, config, connectInfo, rewriteRecordSql); - insertTask.setWriterTask(ConcurrentTableWriterTask.this); - insertTask.setWriter(this); - insertTasks.add(insertTask); - } - WriterThreadPool.executeBatch(insertTasks); - } - - public void printStatistics() { - long insertTotalCost = 0; - long insertTotalCount = 0; - for (InsertTask task: insertTasks) { - insertTotalCost += task.getTotalCost(); - insertTotalCount += task.getInsertCount(); - } - long avgCost = 0; - if (insertTotalCount != 0) { - avgCost = insertTotalCost / insertTotalCount; - } - ConcurrentTableWriterTask.LOG.debug("Insert {} times, totalCost {} ms, average {} ms", - insertTotalCount, insertTotalCost, avgCost); - } - - public void addBatchRecords(final List records) throws InterruptedException { - boolean isSucc = false; - while (!isSucc) { - isSucc = queue.offer(records, 5, TimeUnit.MILLISECONDS); - checkMemStore(); - } - totalTaskCount.incrementAndGet(); - } - - public synchronized void destory() { - if (insertTasks != null) { - for(InsertTask task : insertTasks) { - task.setStop(); - } - for(InsertTask task: insertTasks) { - task.destroy(); - } - } - } - } + List groupValues = groupInsertValues.computeIfAbsent(partId, k -> new ArrayList(batchSize)); + groupValues.add(record); + if (groupValues.size() >= batchSize) { + groupValues = addRecordsToWriteQueue(groupValues); + groupInsertValues.put(partId, groupValues); + } + } + + /** + * @param records + * @return 返回一个新的Cache用于存储接下来的数据 + */ + private List addRecordsToWriteQueue(List records) { + int i = 0; + while (true) { + if (i > 0) { + LOG.info("retry add batch record the {} times", i); + } + try { + concurrentWriter.addBatchRecords(records); + break; + } catch (InterruptedException e) { + i++; + LOG.info("Concurrent table writer is interrupted"); + } + } + return new ArrayList(batchSize); + } + + private void checkMemStore() { + Connection checkConn = connHolder.getConn(); + try { + if (checkConn == null || checkConn.isClosed()) { + checkConn = connHolder.reconnect(); + } + } catch (Exception e) { + LOG.warn("Check connection is unusable"); + } + + long now = System.currentTimeMillis(); + if (now - lastCheckMemstoreTime < 1000 * memstoreCheckIntervalSecond) { + return; + } + double memUsedRatio = ObWriterUtils.queryMemUsedRatio(checkConn); + if (memUsedRatio >= DEFAULT_SLOW_MEMSTORE_THRESHOLD) { + this.loadMode = memUsedRatio >= memstoreThreshold ? PAUSE : SLOW; + LOG.info("Memstore used ration is {}. Load data {}", memUsedRatio, loadMode.name()); + } else { + this.loadMode = FAST; + } + lastCheckMemstoreTime = now; + } + + public boolean isMemStoreFull() { + return isMemStoreFull.get(); + } + + public boolean isShouldPause() { + return this.loadMode.equals(PAUSE); + } + + public boolean isShouldSlow() { + return this.loadMode.equals(SLOW); + } + + public void print() { + if (LOG.isDebugEnabled()) { + LOG.debug("Statistic total task {}, finished {}, queue Size {}", + concurrentWriter.getTotalTaskCount(), + concurrentWriter.getFinishTaskCount(), + concurrentWriter.getTaskQueueSize()); + concurrentWriter.printStatistics(); + } + } + + public void waitTaskFinish() { + setPutAllTaskInQueue(); + lock.lock(); + try { + while (!concurrentWriter.checkFinish()) { + condition.await(15, TimeUnit.SECONDS); + print(); + checkMemStore(); + } + if (directPath){ + concurrentWriter.doCommit(); + } + } catch (InterruptedException e) { + LOG.warn("Concurrent table writer wait task finish interrupt"); + } finally { + lock.unlock(); + } + LOG.debug("wait all InsertTask finished ..."); + } + + public void singalTaskFinish() { + lock.lock(); + condition.signal(); + lock.unlock(); + } + + @Override + public void destroy(Configuration writerSliceConfig) { + if (concurrentWriter != null) { + concurrentWriter.destory(); + } + // 把本级持有的conn关闭掉 + DBUtil.closeDBResources(null, connHolder.getConn()); + super.destroy(writerSliceConfig); + } + + public class ConcurrentTableWriter { + private BlockingQueue> queue; + private List abstractInsertTasks; + private Configuration config; + private ServerConnectInfo connectInfo; + private String rewriteRecordSql; + private AtomicLong totalTaskCount; + private AtomicLong finishTaskCount; + private final int threadCount; + + public ConcurrentTableWriter(Configuration config, ServerConnectInfo connInfo, String rewriteRecordSql) { + threadCount = config.getInt(Config.WRITER_THREAD_COUNT, Config.DEFAULT_WRITER_THREAD_COUNT); + queue = new LinkedBlockingQueue>(threadCount << 1); + abstractInsertTasks = new ArrayList(threadCount); + this.config = config; + this.connectInfo = connInfo; + this.rewriteRecordSql = rewriteRecordSql; + this.totalTaskCount = new AtomicLong(0); + this.finishTaskCount = new AtomicLong(0); + } + + public long getTotalTaskCount() { + return totalTaskCount.get(); + } + + public long getFinishTaskCount() { + return finishTaskCount.get(); + } + + public int getTaskQueueSize() { + return queue.size(); + } + + public void increFinishCount() { + finishTaskCount.incrementAndGet(); + } + + //should check after put all the task in the queue + public boolean checkFinish() { + long finishCount = finishTaskCount.get(); + long totalCount = totalTaskCount.get(); + return finishCount == totalCount; + } + + public synchronized void start() { + for (int i = 0; i < threadCount; ++i) { + LOG.info("start {} insert task.", (i + 1)); + AbstractInsertTask insertTask = null; + if (directPath) { + insertTask = new DirectPathInsertTask(taskId, queue, config, connectInfo, ConcurrentTableWriterTask.this, this); + } else { + insertTask = new InsertTask(taskId, queue, config, connectInfo, rewriteRecordSql); + } + insertTask.setWriterTask(ConcurrentTableWriterTask.this); + insertTask.setWriter(this); + abstractInsertTasks.add(insertTask); + } + WriterThreadPool.executeBatch(abstractInsertTasks); + } + + public void doCommit() { + this.abstractInsertTasks.get(0).getConnHolder().doCommit(); + } + + public int getThreadCount() { + return threadCount; + } + + public void printStatistics() { + long insertTotalCost = 0; + long insertTotalCount = 0; + for (AbstractInsertTask task : abstractInsertTasks) { + insertTotalCost += task.getTotalCost(); + insertTotalCount += task.getInsertCount(); + } + long avgCost = 0; + if (insertTotalCount != 0) { + avgCost = insertTotalCost / insertTotalCount; + } + ConcurrentTableWriterTask.LOG.debug("Insert {} times, totalCost {} ms, average {} ms", + insertTotalCount, insertTotalCost, avgCost); + } + + public void addBatchRecords(final List records) throws InterruptedException { + boolean isSucc = false; + while (!isSucc) { + isSucc = queue.offer(records, 5, TimeUnit.MILLISECONDS); + checkMemStore(); + } + totalTaskCount.incrementAndGet(); + } + + public synchronized void destory() { + if (abstractInsertTasks != null) { + for (AbstractInsertTask task : abstractInsertTasks) { + task.setStop(); + } + for (AbstractInsertTask task : abstractInsertTasks) { + task.destroy(); + } + } + } + } + + public String getTable() { + return table; + } + + // 直接使用了两个类变量:columnNumber,resultSetMetaData + protected PreparedStatement fillPreparedStatement(PreparedStatement preparedStatement, Record record) + throws SQLException { + for (int i = 0; i < this.columnNumber; i++) { + int columnSqltype = this.resultSetMetaData.getMiddle().get(i); + String typeName = this.resultSetMetaData.getRight().get(i); + preparedStatement = fillPreparedStatementColumnType(preparedStatement, i, columnSqltype, typeName, record.getColumn(i)); + } + + return preparedStatement; + } + + protected PreparedStatement fillPreparedStatementColumnType(PreparedStatement preparedStatement, int columnIndex, + int columnSqltype, String typeName, Column column) throws SQLException { + java.util.Date utilDate; + switch (columnSqltype) { + case Types.CHAR: + case Types.NCHAR: + case Types.CLOB: + case Types.NCLOB: + case Types.VARCHAR: + case Types.LONGVARCHAR: + case Types.NVARCHAR: + case Types.LONGNVARCHAR: + preparedStatement.setString(columnIndex + 1, column + .asString()); + break; + + case Types.SMALLINT: + case Types.INTEGER: + case Types.BIGINT: + case Types.NUMERIC: + case Types.DECIMAL: + case Types.FLOAT: + case Types.REAL: + case Types.DOUBLE: + String strValue = column.asString(); + if (emptyAsNull && "".equals(strValue)) { + preparedStatement.setString(columnIndex + 1, null); + } else { + preparedStatement.setString(columnIndex + 1, strValue); + } + break; + + //tinyint is a little special in some database like mysql {boolean->tinyint(1)} + case Types.TINYINT: + Long longValue = column.asLong(); + if (null == longValue) { + preparedStatement.setString(columnIndex + 1, null); + } else { + preparedStatement.setString(columnIndex + 1, longValue.toString()); + } + break; + + // for mysql bug, see http://bugs.mysql.com/bug.php?id=35115 + case Types.DATE: + if (typeName == null) { + typeName = this.resultSetMetaData.getRight().get(columnIndex); + } + + if (typeName.equalsIgnoreCase("year")) { + if (column.asBigInteger() == null) { + preparedStatement.setString(columnIndex + 1, null); + } else { + preparedStatement.setInt(columnIndex + 1, column.asBigInteger().intValue()); + } + } else { + java.sql.Date sqlDate = null; + try { + utilDate = column.asDate(); + } catch (DataXException e) { + throw new SQLException(String.format( + "Date 类型转换错误:[%s]", column)); + } + + if (null != utilDate) { + sqlDate = new java.sql.Date(utilDate.getTime()); + } + preparedStatement.setDate(columnIndex + 1, sqlDate); + } + break; + + case Types.TIME: + java.sql.Time sqlTime = null; + try { + utilDate = column.asDate(); + } catch (DataXException e) { + throw new SQLException(String.format( + "TIME 类型转换错误:[%s]", column)); + } + + if (null != utilDate) { + sqlTime = new java.sql.Time(utilDate.getTime()); + } + preparedStatement.setTime(columnIndex + 1, sqlTime); + break; + + case Types.TIMESTAMP: + java.sql.Timestamp sqlTimestamp = null; + try { + utilDate = column.asDate(); + } catch (DataXException e) { + throw new SQLException(String.format( + "TIMESTAMP 类型转换错误:[%s]", column)); + } + + if (null != utilDate) { + sqlTimestamp = new java.sql.Timestamp( + utilDate.getTime()); + } + preparedStatement.setTimestamp(columnIndex + 1, sqlTimestamp); + break; + case Types.VARBINARY: + case Types.BLOB: + case Types.LONGVARBINARY: + preparedStatement.setBytes(columnIndex + 1, column + .asBytes()); + break; + case Types.BINARY: + String isArray = column.getRawData().toString(); + if (isArray.startsWith("[") && isArray.endsWith("]")) { + preparedStatement.setString(columnIndex + 1, column + .asString()); + } else { + preparedStatement.setBytes(columnIndex + 1, column + .asBytes()); + } + break; + case Types.BOOLEAN: + preparedStatement.setBoolean(columnIndex + 1, column.asBoolean()); + break; + + // warn: bit(1) -> Types.BIT 可使用setBoolean + // warn: bit(>1) -> Types.VARBINARY 可使用setBytes + case Types.BIT: + if (this.dataBaseType == DataBaseType.MySql) { + preparedStatement.setBoolean(columnIndex + 1, column.asBoolean()); + } else { + preparedStatement.setString(columnIndex + 1, column.asString()); + } + break; + default: + throw DataXException + .asDataXException( + DBUtilErrorCode.UNSUPPORTED_TYPE, + String.format( + "您的配置文件中的列配置信息有误. 因为DataX 不支持数据库写入这种字段类型. 字段名:[%s], 字段类型:[%d], 字段Java类型:[%s]. 请修改表中该字段的类型或者不同步该字段.", + this.resultSetMetaData.getLeft() + .get(columnIndex), + this.resultSetMetaData.getMiddle() + .get(columnIndex), + this.resultSetMetaData.getRight() + .get(columnIndex))); + } + return preparedStatement; + } } diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/DirectPathInsertTask.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/DirectPathInsertTask.java new file mode 100644 index 0000000000..185321580e --- /dev/null +++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/DirectPathInsertTask.java @@ -0,0 +1,76 @@ +package com.alibaba.datax.plugin.writer.oceanbasev10writer.task; + +import java.text.MessageFormat; +import java.util.Arrays; +import java.util.List; +import java.util.Queue; + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.Column.Type; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.writer.oceanbasev10writer.common.Table; +import com.alibaba.datax.plugin.writer.oceanbasev10writer.common.TableCache; +import com.alibaba.datax.plugin.writer.oceanbasev10writer.directPath.DirectPathConnection; +import com.alibaba.datax.plugin.writer.oceanbasev10writer.directPath.DirectPathPreparedStatement; +import com.alibaba.datax.plugin.writer.oceanbasev10writer.ext.DirectPathConnHolder; +import com.alibaba.datax.plugin.writer.oceanbasev10writer.ext.ServerConnectInfo; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class DirectPathInsertTask extends AbstractInsertTask { + private static final Logger LOG = LoggerFactory.getLogger(DirectPathInsertTask.class); + + public DirectPathInsertTask(long taskId, Queue> recordsQueue, Configuration config, ServerConnectInfo connectInfo, ConcurrentTableWriterTask task, ConcurrentTableWriterTask.ConcurrentTableWriter writer) { + super(taskId, recordsQueue, config, connectInfo, task, writer); + } + + @Override + protected void initConnHolder() { + this.connHolder = new DirectPathConnHolder(config, connInfo, writerTask.getTable(), writer.getThreadCount()); + this.connHolder.initConnection(); + } + + @Override + protected void write(List records) { + Table table = TableCache.getInstance().getTable(connInfo.databaseName, writerTask.getTable()); + if (Table.Status.FAILURE.equals(table.getStatus())) { + return; + } + DirectPathConnection conn = (DirectPathConnection) connHolder.getConn(); + if (records != null && !records.isEmpty()) { + long startTime = System.currentTimeMillis(); + try (DirectPathPreparedStatement stmt = conn.createStatement()) { + final int columnNumber = records.get(0).getColumnNumber(); + Object[] values = new Object[columnNumber]; + for (Record record : records) { + for (int i = 0; i < columnNumber; i++) { + Column column = record.getColumn(i); + //处理一下时间类型 + if (column.getType().equals(Type.DATE)) { + values[i] = record.getColumn(i).asString(); + } else { + values[i] = record.getColumn(i).getRawData(); + } + } + stmt.addBatch(values); + } + + int[] result = stmt.executeBatch(); + + if (LOG.isDebugEnabled()) { + LOG.debug("[{}] Insert {} rows success", Thread.currentThread().getName(), Arrays.stream(result).sum()); + } + calStatistic(System.currentTimeMillis() - startTime); + stmt.clearBatch(); + } catch (Throwable ex) { + String msg = MessageFormat.format("Insert data into table \"{0}\" failed. Error: {1}", writerTask.getTable(), ex.getMessage()); + LOG.error(msg, ex); + table.setError(ex); + table.setStatus(Table.Status.FAILURE); + throw new RuntimeException(msg); + } + } + } +} \ No newline at end of file diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/InsertTask.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/InsertTask.java index df80cf7ff2..1524d76f31 100644 --- a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/InsertTask.java +++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/InsertTask.java @@ -20,7 +20,7 @@ import java.util.concurrent.BlockingQueue; import java.util.concurrent.TimeUnit; -public class InsertTask implements Runnable { +public class InsertTask extends AbstractInsertTask implements Runnable { private static final Logger LOG = LoggerFactory.getLogger(InsertTask.class); @@ -49,6 +49,7 @@ public InsertTask( Configuration config, ServerConnectInfo connectInfo, String writeRecordSql) { + super(taskId, recordsQueue, config, connectInfo); this.taskId = taskId; this.queue = recordsQueue; this.connInfo = connectInfo; @@ -62,11 +63,15 @@ public InsertTask( connHolder.initConnection(); } - void setWriterTask(ConcurrentTableWriterTask writerTask) { + protected void initConnHolder() { + + } + + public void setWriterTask(ConcurrentTableWriterTask writerTask) { this.writerTask = writerTask; } - void setWriter(ConcurrentTableWriter writer) { + public void setWriter(ConcurrentTableWriter writer) { this.writer = writer; } @@ -109,6 +114,10 @@ public void run() { LOG.debug("Thread exist..."); } + protected void write(List records) { + + } + public void destroy() { connHolder.destroy(); } diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/WriterThreadPool.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/WriterThreadPool.java index 8add538259..90657373e9 100644 --- a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/WriterThreadPool.java +++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/WriterThreadPool.java @@ -4,6 +4,7 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import org.checkerframework.checker.units.qual.A; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -29,8 +30,8 @@ public static synchronized void execute(InsertTask task) { executorService.execute(task); } - public static synchronized void executeBatch(List tasks) { - for (InsertTask task : tasks) { + public static synchronized void executeBatch(List tasks) { + for (AbstractInsertTask task : tasks) { executorService.execute(task); } } diff --git a/package.xml b/package.xml index e51c11e1d5..7e122a2705 100644 --- a/package.xml +++ b/package.xml @@ -39,6 +39,13 @@ datax + + obhbasereader/target/datax/ + + **/*.* + + datax + drdsreader/target/datax/ @@ -250,6 +257,13 @@ datax + + gaussdbreader/target/datax/ + + **/*.* + + datax + @@ -476,6 +490,13 @@ datax + + obhbasewriter/target/datax/ + + **/*.* + + datax + gdbwriter/target/datax/ @@ -532,5 +553,19 @@ datax + + gaussdbwriter/target/datax/ + + **/*.* + + datax + + + milvuswriter/target/datax/ + + **/*.* + + datax + diff --git a/plugin-rdbms-util/pom.xml b/plugin-rdbms-util/pom.xml index c49f64af16..6dc69e06c6 100755 --- a/plugin-rdbms-util/pom.xml +++ b/plugin-rdbms-util/pom.xml @@ -33,6 +33,17 @@ ${mysql.driver.version} test + + com.oceanbase + oceanbase-client + 2.4.11 + + + com.google.guava + guava + + + org.slf4j slf4j-api diff --git a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/reader/util/ObVersion.java b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/reader/util/ObVersion.java index 0eb34feb0d..da078df924 100644 --- a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/reader/util/ObVersion.java +++ b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/reader/util/ObVersion.java @@ -16,6 +16,8 @@ public class ObVersion implements Comparable { private int patchNumber; public static final ObVersion V2276 = valueOf("2.2.76"); + public static final ObVersion V2252 = valueOf("2.2.52"); + public static final ObVersion V3 = valueOf("3.0.0.0"); public static final ObVersion V4000 = valueOf("4.0.0.0"); private static final ObVersion DEFAULT_VERSION = diff --git a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/SplitedSlice.java b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/SplitedSlice.java new file mode 100644 index 0000000000..d8de129490 --- /dev/null +++ b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/SplitedSlice.java @@ -0,0 +1,37 @@ +package com.alibaba.datax.plugin.rdbms.util; + +public class SplitedSlice { + private String begin; + private String end; + private String range; + + public SplitedSlice(String begin, String end, String range) { + this.begin = begin; + this.end = end; + this.range = range; + } + + public String getBegin() { + return begin; + } + + public void setBegin(String begin) { + this.begin = begin; + } + + public String getEnd() { + return end; + } + + public void setEnd(String end) { + this.end = end; + } + + public String getRange() { + return range; + } + + public void setRange(String range) { + this.range = range; + } +} diff --git a/pom.xml b/pom.xml index eeb4bfaffe..1b364a7546 100644 --- a/pom.xml +++ b/pom.xml @@ -55,6 +55,7 @@ oraclereader cassandrareader oceanbasev10reader + obhbasereader rdbmsreader odpsreader @@ -93,6 +94,7 @@ kingbaseeswriter adswriter oceanbasev10writer + obhbasewriter adbpgwriter hologresjdbcwriter rdbmswriter @@ -127,6 +129,7 @@ adbmysqlwriter sybasewriter neo4jwriter + milvuswriter plugin-rdbms-util plugin-unstructured-storage-util diff --git a/selectdbwriter/doc/stream2selectdb.json b/selectdbwriter/doc/stream2selectdb.json index d5e14c4884..0c5be6dc0a 100644 --- a/selectdbwriter/doc/stream2selectdb.json +++ b/selectdbwriter/doc/stream2selectdb.json @@ -1,92 +1,61 @@ { - "core":{ - "transport":{ - "channel":{ - "speed":{ - "byte":10485760 + "core": { + "transport": { + "channel": { + "speed": { + "byte": 10485760 } } } }, - "job":{ - "content":[ + "job": { + "content": [ { - "reader":{ - "name":"streamreader", - "parameter":{ - "column":[ - { - "type":"string", - "value":"DataX" - }, - { - "type":"int", - "value":19890604 - }, - { - "type":"date", - "value":"1989-06-04 00:00:00" - }, - { - "type":"bool", - "value":true - }, - { - "type":"string", - "value":"test" - } - ], - "sliceRecordCount":1000000 - } - }, - "writer":{ - "name":"selectdbwriter", - "parameter":{ - "loadUrl":[ + "reader": {}, + "writer": { + "name": "selectdbwriter", + "parameter": { + "loadUrl": [ "xxx:35871" ], - "loadProps":{ - "file.type":"json", - "file.strip_outer_array":"true" + "loadProps": { + "file.type": "json", + "file.strip_outer_array": "true" }, - "database":"db1", - "column":[ + "database": "db1", + "column": [ "k1", "k2", "k3", "k4", "k5" ], - "username":"admin", - "password":"SelectDB2022", - "postSql":[ - - ], - "preSql":[ - - ], - "connection":[ + "username": "admin", + "password": "SelectDB2022", + "postSql": [], + "preSql": [], + "connection": [ { - "jdbcUrl":"jdbc:mysql://xxx:32386/cl_test", - "table":[ + "jdbcUrl": "jdbc:mysql://xxx:32386/cl_test", + "table": [ "test_selectdb" ], - "selectedDatabase":"cl_test" + "selectedDatabase": "cl_test" } ], - "maxBatchRows":200000, - "batchSize":53687091200 + "maxBatchRows": 200000, + "batchSize": 53687091200 } } } ], - "setting":{ - "errorLimit":{ - "percentage":0.02, - "record":0 + "setting": { + "errorLimit": { + "percentage": 0.02, + "record": 0 }, - "speed":{ - "byte":10485760 + "speed": { + "byte": 10485760 } } } diff --git a/transformer/doc/transformer.md b/transformer/doc/transformer.md index 0a00dbaa9c..a9da83a4ed 100644 --- a/transformer/doc/transformer.md +++ b/transformer/doc/transformer.md @@ -47,7 +47,7 @@ dx_replace(1,"5","10","****") column 1的value为“dataxTest”=>"datax****" 4. dx_filter (关联filter暂不支持,即多个字段的联合判断,函参太过复杂,用户难以使用。) * 参数: * 第一个参数:字段编号,对应record中第几个字段。 - * 第二个参数:运算符,支持一下运算符:like, not like, >, =, <, >=, !=, <= + * 第二个参数:运算符,支持以下运算符:like, not like, >, =, <, >=, !=, <= * 第三个参数:正则表达式(java正则表达式)、值。 * 返回: * 如果匹配正则表达式,返回Null,表示过滤该行。不匹配表达式时,表示保留该行。(注意是该行)。对于>=<都是对字段直接compare的结果. @@ -145,11 +145,11 @@ String code3 = "Column column = record.getColumn(1);\n" + "type": "string" }, { - "value": 19890604, + "value": 1724154616370, "type": "long" }, { - "value": "1989-06-04 00:00:00", + "value": "2024-01-01 00:00:00", "type": "date" }, { @@ -157,11 +157,11 @@ String code3 = "Column column = record.getColumn(1);\n" + "type": "bool" }, { - "value": "test", + "value": "TestRawData", "type": "bytes" } ], - "sliceRecordCount": 100000 + "sliceRecordCount": 100 } }, "writer": { @@ -174,38 +174,44 @@ String code3 = "Column column = record.getColumn(1);\n" + "transformer": [ { "name": "dx_substr", - "parameter": - { - "columnIndex":5, - "paras":["1","3"] - } + "parameter": { + "columnIndex": 5, + "paras": [ + "1", + "3" + ] + } }, { "name": "dx_replace", - "parameter": - { - "columnIndex":4, - "paras":["3","4","****"] - } + "parameter": { + "columnIndex": 4, + "paras": [ + "3", + "4", + "****" + ] + } }, { "name": "dx_digest", - "parameter": - { - "columnIndex":3, - "paras":["md5", "toLowerCase"] - } + "parameter": { + "columnIndex": 3, + "paras": [ + "md5", + "toLowerCase" + ] + } }, { "name": "dx_groovy", - "parameter": - { - "code": "//groovy code//", - "extraPackage":[ - "import somePackage1;", - "import somePackage2;" - ] - } + "parameter": { + "code": "//groovy code//", + "extraPackage": [ + "import somePackage1;", + "import somePackage2;" + ] + } } ] }
+ * Job类init-->prepare-->split + * + * Task类init-->prepare-->startWrite-->post-->destroy + * Task类init-->prepare-->startWrite-->post-->destroy + * + * Job类post-->destroy + *
+ * Map.put(1, "abc"); + * Map.put(5, "def"); // Error: parameter index is 5, but 2 values exists. + */ + private void checkRange() { + OptionalInt optionalInt = parameters.keySet().stream().mapToInt(e -> e).max(); + int parameterIndex = optionalInt.orElseThrow(() -> new IllegalArgumentException("No parameter index found")); + checkArgument(parameterIndex == parameters.size(), "Parameter index(%s) is unmatched with value list(%s)", parameterIndex, parameters.size()); + } +} \ No newline at end of file diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/directPath/ObTableDirectLoad.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/directPath/ObTableDirectLoad.java new file mode 100644 index 0000000000..859fcedd16 --- /dev/null +++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/directPath/ObTableDirectLoad.java @@ -0,0 +1,154 @@ +package com.alibaba.datax.plugin.writer.oceanbasev10writer.directPath; + +import java.sql.SQLException; +import java.util.Objects; + +import com.alipay.oceanbase.rpc.direct_load.ObDirectLoadBucket; +import com.alipay.oceanbase.rpc.direct_load.ObDirectLoadConnection; +import com.alipay.oceanbase.rpc.direct_load.ObDirectLoadStatement; +import com.alipay.oceanbase.rpc.direct_load.ObDirectLoadTraceId; +import com.alipay.oceanbase.rpc.direct_load.exception.ObDirectLoadException; +import com.alipay.oceanbase.rpc.direct_load.protocol.payload.ObTableLoadClientStatus; +import com.alipay.oceanbase.rpc.table.ObTable; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Wrapper of the direct-load API for OceanBase. + */ +public class ObTableDirectLoad implements AutoCloseable { + private static final Logger LOG = LoggerFactory.getLogger(ObTableDirectLoad.class); + + private final String tableName; + private final String schemaTableName; + private final ObDirectLoadStatement statement; + private final ObDirectLoadConnection connection; + + public ObTableDirectLoad(String schemaName, String tableName, ObDirectLoadStatement statement, ObDirectLoadConnection connection) { + Objects.requireNonNull(schemaName, "schemaName must not be null"); + Objects.requireNonNull(tableName, "tableName must not be null"); + Objects.requireNonNull(statement, "statement must not be null"); + Objects.requireNonNull(connection, "connection must not be null"); + this.tableName = tableName; + this.schemaTableName = String.format("%s.%s", schemaName, tableName); + this.statement = statement; + this.connection = connection; + } + + /** + * Begin the direct load operation. + * + * @throws ObDirectLoadException if an error occurs during the operation. + */ + public void begin() throws ObDirectLoadException { + statement.begin(); + } + + /** + * Write data into the direct load operation. + * + * @param bucket The data bucket to write. + * @throws SQLException if writing fails. + */ + public void write(ObDirectLoadBucket bucket) throws SQLException { + try { + if (bucket == null || bucket.isEmpty()) { + throw new IllegalArgumentException("Bucket must not be null or empty."); + } + LOG.info("Writing {} rows to table: {}", bucket.getRowNum(), schemaTableName); + statement.write(bucket); + LOG.info("Successfully wrote bucket data to table: {}", schemaTableName); + } catch (ObDirectLoadException e) { + LOG.error("Failed to write to table: {}", schemaTableName, e); + throw new SQLException(String.format("Failed to write to table: %s", schemaTableName), e); + } + } + + /** + * Commit the current direct load operation. + * + * @throws SQLException if commit fails. + */ + public void commit() throws SQLException { + try { + LOG.info("Committing direct load for table: {}", schemaTableName); + statement.commit(); + LOG.info("Successfully committed direct load for table: {}", schemaTableName); + } catch (ObDirectLoadException e) { + LOG.error("Failed to commit for table: {}", schemaTableName, e); + throw new SQLException(String.format("Failed to commit for table: %s", schemaTableName), e); + } + } + + /** + * Close the direct load operation. + */ + public void close() { + LOG.info("Closing direct load for table: {}", schemaTableName); + statement.close(); + connection.close(); + LOG.info("Direct load closed for table: {}", schemaTableName); + } + + /** + * Gets the status from the current connection based on the traceId + */ + public ObTableLoadClientStatus getStatus() throws SQLException { + ObDirectLoadTraceId traceId = statement.getTraceId(); + // Check if traceId is null and throw an exception with a clear message + if (traceId == null) { + throw new SQLException("traceId is null."); + } + // Retrieve the status using the traceId + ObTableLoadClientStatus status = statement.getConnection().getProtocol().getHeartBeatRpc(traceId).getStatus(); + if (status == null) { + LOG.info("Direct load connect protocol heartBeatRpc for table is null: {}", schemaTableName); + throw new SQLException("status is null."); + } + // Return status if not null; otherwise, return ERROR + return status; + } + + /** + * Gets the current table + */ + public ObTable getTable() { + try { + return this.statement.getObTablePool().getControlObTable(); + } catch (ObDirectLoadException e) { + throw new RuntimeException(e); + } + } + + public String getTableName() { + if (StringUtils.isBlank(tableName)) { + throw new IllegalArgumentException("tableName is blank."); + } + return tableName; + } + + /** + * Inserts data into the direct load operation. + * + * @param bucket The data bucket containing rows to insert. + * @throws SQLException if an error occurs during the insert operation. + */ + public void insert(ObDirectLoadBucket bucket) throws SQLException { + LOG.info("Inserting {} rows to table: {}", bucket.getRowNum(), schemaTableName); + + if (bucket.isEmpty()) { + LOG.warn("Parameter 'bucket' must not be empty."); + throw new IllegalArgumentException("Parameter 'bucket' must not be empty."); + } + + try { + // Perform the insertion into the load operation + statement.write(bucket); + LOG.info("Successfully inserted data into table: {}", schemaTableName); + } catch (Exception ex) { + LOG.error("Unexpected error during insert operation for table: {}", schemaTableName, ex); + throw new SQLException("Unexpected error during insert operation.", ex); + } + } +} diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/AbstractConnHolder.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/AbstractConnHolder.java index c8630cd0af..6e4d4aab51 100644 --- a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/AbstractConnHolder.java +++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/AbstractConnHolder.java @@ -14,6 +14,17 @@ public abstract class AbstractConnHolder { protected final Configuration config; protected Connection conn; + protected String jdbcUrl; + protected String userName; + protected String password; + + protected AbstractConnHolder(Configuration config, String jdbcUrl, String userName, String password) { + this.config = config; + this.jdbcUrl = jdbcUrl; + this.userName = userName; + this.password = password; + } + public AbstractConnHolder(Configuration config) { this.config = config; } @@ -45,4 +56,6 @@ public Connection reconnect() { public abstract String getUserName(); public abstract void destroy(); + + public abstract void doCommit(); } diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/DirectPathAbstractConnHolder.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/DirectPathAbstractConnHolder.java new file mode 100644 index 0000000000..c5c6dbe0ca --- /dev/null +++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/DirectPathAbstractConnHolder.java @@ -0,0 +1,61 @@ +package com.alibaba.datax.plugin.writer.oceanbasev10writer.ext; + +import java.sql.Connection; + +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.util.DBUtil; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public abstract class DirectPathAbstractConnHolder { + private static final Logger LOG = LoggerFactory.getLogger(AbstractConnHolder.class); + protected Configuration config; + protected String jdbcUrl; + protected String userName; + protected String password; + + protected Connection conn; + + protected DirectPathAbstractConnHolder(Configuration config, String jdbcUrl, String userName, String password) { + this.config = config; + this.jdbcUrl = jdbcUrl; + this.userName = userName; + this.password = password; + } + + public Connection reconnect() { + DBUtil.closeDBResources(null, conn); + return initConnection(); + } + + public Connection getConn() { + if (conn == null) { + return initConnection(); + } else { + try { + if (conn.isClosed()) { + return reconnect(); + } + return conn; + } catch (Exception e) { + LOG.debug("can not judge whether the hold connection is closed or not, just reuse the hold connection"); + return conn; + } + } + } + + public String getJdbcUrl() { + return jdbcUrl; + } + + public Configuration getConfig() { + return config; + } + + public void doCommit() {} + + public abstract void destroy(); + + public abstract Connection initConnection(); +} diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/DirectPathConnHolder.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/DirectPathConnHolder.java new file mode 100644 index 0000000000..352eda1ca4 --- /dev/null +++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/DirectPathConnHolder.java @@ -0,0 +1,115 @@ +package com.alibaba.datax.plugin.writer.oceanbasev10writer.ext; + +import java.sql.Connection; +import java.sql.SQLException; +import java.util.Optional; +import java.util.concurrent.ConcurrentHashMap; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.util.DBUtil; +import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; +import com.alibaba.datax.plugin.writer.oceanbasev10writer.Config; +import com.alibaba.datax.plugin.writer.oceanbasev10writer.common.Table; +import com.alibaba.datax.plugin.writer.oceanbasev10writer.directPath.DirectPathConnection; + +import com.alipay.oceanbase.rpc.protocol.payload.impl.ObLoadDupActionType; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class DirectPathConnHolder extends AbstractConnHolder { + private static final Logger LOG = LoggerFactory.getLogger(DirectPathConnHolder.class); + + /** + * The server side timeout. + */ + private static final long SERVER_TIMEOUT = 24L * 60 * 60 * 1000 * 1000; + + private static final ConcurrentHashMap