Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support general jdbc datasource #96

Merged
merged 1 commit into from
Sep 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,7 @@ object Configs {
case "CLICKHOUSE" => SourceCategory.CLICKHOUSE
case "POSTGRESQL" => SourceCategory.POSTGRESQL
case "ORACLE" => SourceCategory.ORACLE
case "JDBC" => SourceCategory.JDBC
case _ => throw new IllegalArgumentException(s"${category} not support")
}
}
Expand Down Expand Up @@ -682,6 +683,46 @@ object Configs {
config.getString("table"),
getOrElse(config, "sentence", null)
)
case SourceCategory.JDBC =>
val partitionColumn =
if (config.hasPath("partitionColumn"))
Some(config.getString("partitionColumn"))
else None

val lowerBound =
if (config.hasPath("lowerBound"))
Some(config.getLong("lowerBound"))
else None

val upperBound =
if (config.hasPath("upperBound"))
Some(config.getLong("upperBound"))
else None

val numPartitions =
if (config.hasPath("numPartitions"))
Some(config.getLong("numPartitions"))
else None

val fetchSize =
if (config.hasPath("fetchSize"))
Some(config.getLong("fetchSize"))
else None

JdbcConfigEntry(
SourceCategory.JDBC,
config.getString("url"),
config.getString("driver"),
config.getString("user"),
config.getString("password"),
config.getString("table"),
partitionColumn,
lowerBound,
upperBound,
numPartitions,
fetchSize,
getOrElse(config, "sentence", null)
)
case SourceCategory.KAFKA =>
val intervalSeconds =
if (config.hasPath("interval.seconds")) config.getInt("interval.seconds")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ object SourceCategory extends Enumeration {
val CLICKHOUSE = Value("CLICKHOUSE")
val POSTGRESQL = Value("POSTGRESQL")
val ORACLE = Value("ORACLE")
val JDBC = Value("JDBC")

val SOCKET = Value("SOCKET")
val KAFKA = Value("KAFKA")
Expand Down Expand Up @@ -312,3 +313,34 @@ case class OracleConfigEntry(override val category: SourceCategory.Value,
s"Oracle source {url:$url, driver:$driver, user:$user, passwd:$passwd, table:$table, sentence:$sentence}"
}
}

/**
* JdbcConfigEntry
*
* @param url JDBC database url of the form `jdbc:subprotocol:subname`.
* @param table Name of the table in the external database.
* @param partitionColumn the name of a column of integral type that will be used for partitioning.
* @param lowerBound the minimum value of `columnName` used to decide partition stride.
* @param upperBound the maximum value of `columnName` used to decide partition stride.
* @param numPartitions the number of partitions. This, along with `lowerBound` (inclusive),
* `upperBound` (exclusive), form partition strides for generated WHERE
* clause expressions used to split the column `columnName` evenly. When
* the input is less than 1, the number is set to 1.
*/
case class JdbcConfigEntry(override val category: SourceCategory.Value,
url: String,
driver: String,
user: String,
passwd: String,
table: String,
partitionColumn: Option[String] = None,
lowerBound: Option[Long] = None,
upperBound: Option[Long] = None,
numPartitions: Option[Long] = None,
fetchSize: Option[Long] = None,
override val sentence: String)
extends ServerDataSourceConfigEntry {
override def toString: String = {
s"Jdbc source {url:$url, driver:$driver, user:$user, passwd:$passwd, table:$table, sentence:$sentence}"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
package com.vesoft.nebula.exchange

import org.apache.spark.sql.{DataFrame, SparkSession}

import java.io.File

import com.vesoft.exchange.Argument
import com.vesoft.exchange.common.{CheckPointHandler, ErrorHandler}
import com.vesoft.exchange.common.config.{
Expand All @@ -18,6 +18,7 @@ import com.vesoft.exchange.common.config.{
HBaseSourceConfigEntry,
HiveSourceConfigEntry,
JanusGraphSourceConfigEntry,
JdbcConfigEntry,
KafkaSourceConfigEntry,
MaxComputeConfigEntry,
MySQLSourceConfigEntry,
Expand All @@ -35,14 +36,15 @@ import com.vesoft.nebula.exchange.reader.{
HiveReader,
JSONReader,
JanusGraphReader,
JdbcReader,
KafkaReader,
MaxcomputeReader,
MySQLReader,
Neo4JReader,
OracleReader,
ORCReader,
PostgreSQLReader,
OracleReader,
ParquetReader,
PostgreSQLReader,
PulsarReader
}
import com.vesoft.exchange.common.processor.ReloadProcessor
Expand Down Expand Up @@ -321,6 +323,11 @@ object Exchange {
val reader = new OracleReader(session, oracleConfig)
Some(reader.read())
}
case SourceCategory.JDBC => {
val jdbcConfig = config.asInstanceOf[JdbcConfigEntry]
val reader = new JdbcReader(session, jdbcConfig)
Some(reader.read())
}
case _ => {
LOG.error(s"Data source ${config.category} not supported")
None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import com.vesoft.exchange.common.config.{
HBaseSourceConfigEntry,
HiveSourceConfigEntry,
JanusGraphSourceConfigEntry,
JdbcConfigEntry,
MaxComputeConfigEntry,
MySQLSourceConfigEntry,
Neo4JSourceConfigEntry,
Expand Down Expand Up @@ -317,3 +318,47 @@ class OracleReader(override val session: SparkSession, oracleConfig: OracleConfi
df
}
}

/**
* Jdbc reader
*/
class JdbcReader(override val session: SparkSession, jdbcConfig: JdbcConfigEntry)
extends ServerBaseReader(session, jdbcConfig.sentence) {
Class.forName(jdbcConfig.driver)
override def read(): DataFrame = {
var dfReader = session.read
.format("jdbc")
.option("url", jdbcConfig.url)
.option("dbtable", jdbcConfig.table)
.option("user", jdbcConfig.user)
.option("password", jdbcConfig.passwd)
.option("driver", jdbcConfig.driver)

if (jdbcConfig.partitionColumn.isDefined) {
dfReader.option("partitionColumn", jdbcConfig.partitionColumn.get)
}
if (jdbcConfig.numPartitions.isDefined) {
dfReader.option("numPartitions", jdbcConfig.numPartitions.get)
}
if (jdbcConfig.lowerBound.isDefined) {
dfReader.option("lowerBound", jdbcConfig.lowerBound.get)
}
if (jdbcConfig.upperBound.isDefined) {
dfReader.option("upperBound", jdbcConfig.upperBound.get)
}
if (jdbcConfig.fetchSize.isDefined) {
dfReader.option("fetchsize", jdbcConfig.fetchSize.get)
}

var df = dfReader.load()

if (jdbcConfig.sentence != null) {
val tableName = if (jdbcConfig.table.contains(".")) {
jdbcConfig.table.split("\\.")(1)
} else jdbcConfig.table
df.createOrReplaceTempView(tableName)
df = session.sql(sentence)
}
df
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
package com.vesoft.nebula.exchange

import org.apache.spark.sql.{DataFrame, SparkSession}

import java.io.File

import com.vesoft.exchange.Argument
import com.vesoft.exchange.common.{CheckPointHandler, ErrorHandler}
import com.vesoft.exchange.common.config.{
Expand All @@ -18,6 +18,7 @@ import com.vesoft.exchange.common.config.{
HBaseSourceConfigEntry,
HiveSourceConfigEntry,
JanusGraphSourceConfigEntry,
JdbcConfigEntry,
KafkaSourceConfigEntry,
MaxComputeConfigEntry,
MySQLSourceConfigEntry,
Expand All @@ -35,14 +36,15 @@ import com.vesoft.nebula.exchange.reader.{
HiveReader,
JSONReader,
JanusGraphReader,
JdbcReader,
KafkaReader,
MaxcomputeReader,
MySQLReader,
Neo4JReader,
OracleReader,
ORCReader,
PostgreSQLReader,
OracleReader,
ParquetReader,
PostgreSQLReader,
PulsarReader
}
import com.vesoft.exchange.common.processor.ReloadProcessor
Expand Down Expand Up @@ -321,6 +323,11 @@ object Exchange {
val reader = new OracleReader(session, oracleConfig)
Some(reader.read())
}
case SourceCategory.JDBC => {
val jdbcConfig = config.asInstanceOf[JdbcConfigEntry]
val reader = new JdbcReader(session, jdbcConfig)
Some(reader.read())
}
case _ => {
LOG.error(s"Data source ${config.category} not supported")
None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import com.vesoft.exchange.common.config.{
HBaseSourceConfigEntry,
HiveSourceConfigEntry,
JanusGraphSourceConfigEntry,
JdbcConfigEntry,
MaxComputeConfigEntry,
MySQLSourceConfigEntry,
Neo4JSourceConfigEntry,
Expand Down Expand Up @@ -375,3 +376,47 @@ class OracleReader(override val session: SparkSession, oracleConfig: OracleConfi
df
}
}

/**
* Jdbc reader
*/
class JdbcReader(override val session: SparkSession, jdbcConfig: JdbcConfigEntry)
extends ServerBaseReader(session, jdbcConfig.sentence) {
Class.forName(jdbcConfig.driver)
override def read(): DataFrame = {
var dfReader = session.read
.format("jdbc")
.option("url", jdbcConfig.url)
.option("dbtable", jdbcConfig.table)
.option("user", jdbcConfig.user)
.option("password", jdbcConfig.passwd)
.option("driver", jdbcConfig.driver)

if (jdbcConfig.partitionColumn.isDefined) {
dfReader.option("partitionColumn", jdbcConfig.partitionColumn.get)
}
if (jdbcConfig.numPartitions.isDefined) {
dfReader.option("numPartitions", jdbcConfig.numPartitions.get)
}
if (jdbcConfig.lowerBound.isDefined) {
dfReader.option("lowerBound", jdbcConfig.lowerBound.get)
}
if (jdbcConfig.upperBound.isDefined) {
dfReader.option("upperBound", jdbcConfig.upperBound.get)
}
if (jdbcConfig.fetchSize.isDefined) {
dfReader.option("fetchsize", jdbcConfig.fetchSize.get)
}

var df = dfReader.load()

if (jdbcConfig.sentence != null) {
val tableName = if (jdbcConfig.table.contains(".")) {
jdbcConfig.table.split("\\.")(1)
} else jdbcConfig.table
df.createOrReplaceTempView(tableName)
df = session.sql(sentence)
}
df
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package com.vesoft.nebula.exchange

import org.apache.spark.sql.{DataFrame, SparkSession}
import java.io.File

import com.vesoft.exchange.Argument
import com.vesoft.exchange.common.{CheckPointHandler, ErrorHandler}
import com.vesoft.exchange.common.config.{
Expand All @@ -17,6 +18,7 @@ import com.vesoft.exchange.common.config.{
HBaseSourceConfigEntry,
HiveSourceConfigEntry,
JanusGraphSourceConfigEntry,
JdbcConfigEntry,
KafkaSourceConfigEntry,
MaxComputeConfigEntry,
MySQLSourceConfigEntry,
Expand All @@ -34,12 +36,13 @@ import com.vesoft.nebula.exchange.reader.{
HiveReader,
JSONReader,
JanusGraphReader,
JdbcReader,
KafkaReader,
MaxcomputeReader,
MySQLReader,
Neo4JReader,
OracleReader,
ORCReader,
OracleReader,
ParquetReader,
PostgreSQLReader,
PulsarReader
Expand Down Expand Up @@ -320,6 +323,11 @@ object Exchange {
val reader = new OracleReader(session, oracleConfig)
Some(reader.read())
}
case SourceCategory.JDBC => {
val jdbcConfig = config.asInstanceOf[JdbcConfigEntry]
val reader = new JdbcReader(session, jdbcConfig)
Some(reader.read())
}
case _ => {
LOG.error(s"Data source ${config.category} not supported")
None
Expand Down
Loading