-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #24 from usedatabrew/dat-371
Dat 371
- Loading branch information
Showing
9 changed files
with
418 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
service: | ||
id: 32333 | ||
pipeline_id: 32333 | ||
enable_etcd_registry: true | ||
etcd: | ||
host: http://etcd-server.internal:2379 | ||
reload_on_restart: false | ||
source: | ||
driver: mysql_cdc | ||
config: | ||
host: 127.0.0.1 | ||
port: 3306 | ||
database: db | ||
user: root | ||
password: test | ||
flavor: mysql | ||
stream_snapshot: true | ||
tables: | ||
- lights | ||
stream_schema: | ||
- stream: flights | ||
columns: | ||
- name: id | ||
databrewType: Int32 | ||
nativeConnectorType: integer | ||
pk: true | ||
nullable: false | ||
- name: created_at | ||
databrewType: String | ||
nativeConnectorType: varchar | ||
pk: false | ||
nullable: true | ||
- name: payment_method | ||
databrewType: String | ||
nativeConnectorType: character varying | ||
pk: false | ||
nullable: false | ||
- name: duration | ||
databrewType: Int32 | ||
nativeConnectorType: integer | ||
pk: false | ||
nullable: false | ||
|
||
sink: | ||
driver: stdout | ||
config: { } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
package mysql_cdc | ||
|
||
type Config struct { | ||
Host string `json:"host" yaml:"host"` | ||
Port uint16 `json:"port" yaml:"port"` | ||
Database string `json:"database" yaml:"database"` | ||
User string `json:"user" yaml:"user"` | ||
Password string `json:"password" yaml:"password"` | ||
Flavor string `json:"flavor" yaml:"flavor"` | ||
StreamSnapshot bool `json:"stream_snapshot" yaml:"stream_snapshot"` | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
package mysql_cdc | ||
|
||
import ( | ||
"encoding/json" | ||
"strings" | ||
"time" | ||
|
||
"github.com/go-mysql-org/go-mysql/mysql" | ||
"github.com/go-mysql-org/go-mysql/schema" | ||
) | ||
|
||
const mysqlDateFormat = "2006-01-02" | ||
|
||
func convertData(col schema.TableColumn, value interface{}) interface{} { | ||
switch col.Type { | ||
case schema.TYPE_ENUM: | ||
switch value := value.(type) { | ||
case int64: | ||
// for binlog, ENUM may be int64, but for dump, enum is string | ||
eNum := value - 1 | ||
if eNum < 0 || eNum >= int64(len(col.EnumValues)) { | ||
return "" | ||
} | ||
|
||
return col.EnumValues[eNum] | ||
} | ||
case schema.TYPE_SET: | ||
switch value := value.(type) { | ||
case int64: | ||
// for binlog, SET may be int64, but for dump, SET is string | ||
bitmask := value | ||
sets := make([]string, 0, len(col.SetValues)) | ||
for i, s := range col.SetValues { | ||
if bitmask&int64(1<<uint(i)) > 0 { | ||
sets = append(sets, s) | ||
} | ||
} | ||
return strings.Join(sets, ",") | ||
} | ||
case schema.TYPE_BIT: | ||
switch value := value.(type) { | ||
case string: | ||
// for binlog, BIT is int64, but for dump, BIT is string | ||
// for dump 0x01 is for 1, \0 is for 0 | ||
if value == "\x01" { | ||
return int64(1) | ||
} | ||
|
||
return int64(0) | ||
} | ||
case schema.TYPE_STRING: | ||
switch value := value.(type) { | ||
case []byte: | ||
return string(value[:]) | ||
} | ||
case schema.TYPE_JSON: | ||
var f interface{} | ||
var err error | ||
switch v := value.(type) { | ||
case string: | ||
err = json.Unmarshal([]byte(v), &f) | ||
case []byte: | ||
err = json.Unmarshal(v, &f) | ||
} | ||
if err == nil && f != nil { | ||
return f | ||
} | ||
case schema.TYPE_DATETIME, schema.TYPE_TIMESTAMP: | ||
switch v := value.(type) { | ||
case string: | ||
vt, err := time.ParseInLocation(mysql.TimeFormat, string(v), time.Local) | ||
if err != nil || vt.IsZero() { // failed to parse date or zero date | ||
return nil | ||
} | ||
return vt.Format(time.RFC3339) | ||
} | ||
case schema.TYPE_DATE: | ||
switch v := value.(type) { | ||
case string: | ||
vt, err := time.Parse(mysqlDateFormat, string(v)) | ||
if err != nil || vt.IsZero() { // failed to parse date or zero date | ||
return nil | ||
} | ||
return vt.Format(mysqlDateFormat) | ||
} | ||
} | ||
|
||
return value | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,189 @@ | ||
package mysql_cdc | ||
|
||
import ( | ||
"context" | ||
"errors" | ||
"fmt" | ||
|
||
"github.com/apache/arrow/go/v14/arrow" | ||
"github.com/apache/arrow/go/v14/arrow/array" | ||
"github.com/apache/arrow/go/v14/arrow/memory" | ||
"github.com/cloudquery/plugin-sdk/v4/scalar" | ||
"github.com/go-mysql-org/go-mysql/canal" | ||
"github.com/usedatabrew/blink/internal/helper" | ||
"github.com/usedatabrew/blink/internal/schema" | ||
"github.com/usedatabrew/blink/internal/sources" | ||
"github.com/usedatabrew/message" | ||
) | ||
|
||
type DataTableSchema struct { | ||
TableName string | ||
Schema *arrow.Schema | ||
} | ||
|
||
type ProcessEventParams struct { | ||
initValue, incrementValue int | ||
} | ||
|
||
type SourcePlugin struct { | ||
config Config | ||
inputSchema map[string]schema.StreamSchema | ||
outputSchema map[string]DataTableSchema | ||
messagesStream chan sources.MessageEvent | ||
canal *canal.Canal | ||
canal.DummyEventHandler | ||
} | ||
|
||
func NewMysqlSourcePlugin(config Config, sCh []schema.StreamSchema) sources.DataSource { | ||
iSchema := make(map[string]schema.StreamSchema) | ||
|
||
for _, stream := range sCh { | ||
iSchema[stream.StreamName] = stream | ||
} | ||
|
||
instance := &SourcePlugin{ | ||
config: config, | ||
inputSchema: iSchema, | ||
messagesStream: make(chan sources.MessageEvent), | ||
} | ||
|
||
instance.buildOutputSchema() | ||
|
||
return instance | ||
} | ||
|
||
func (p *SourcePlugin) Connect(ctx context.Context) error { | ||
cfg := canal.NewDefaultConfig() | ||
cfg.Addr = fmt.Sprintf("%s:%d", p.config.Host, p.config.Port) | ||
cfg.User = p.config.User | ||
cfg.Password = p.config.Password | ||
cfg.Flavor = p.config.Flavor | ||
|
||
cfg.Dump.TableDB = p.config.Database | ||
|
||
var tables []string | ||
|
||
for _, table := range p.inputSchema { | ||
tables = append(tables, table.StreamName) | ||
} | ||
|
||
cfg.Dump.Tables = tables | ||
|
||
c, err := canal.NewCanal(cfg) | ||
|
||
if err != nil { | ||
return err | ||
} | ||
|
||
p.canal = c | ||
|
||
return nil | ||
} | ||
|
||
func (p *SourcePlugin) Start() { | ||
p.canal.SetEventHandler(p) | ||
|
||
if p.config.StreamSnapshot { | ||
p.canal.Run() | ||
} else { | ||
coords, _ := p.canal.GetMasterPos() | ||
|
||
p.canal.RunFrom(coords) | ||
} | ||
} | ||
|
||
func (p *SourcePlugin) Stop() { | ||
p.canal.Close() | ||
} | ||
|
||
func (p *SourcePlugin) Events() chan sources.MessageEvent { | ||
return p.messagesStream | ||
} | ||
|
||
func (p *SourcePlugin) OnRow(e *canal.RowsEvent) error { | ||
if p.config.Database != e.Table.Schema { | ||
return nil | ||
} | ||
|
||
if _, ok := p.inputSchema[e.Table.Name]; !ok { | ||
return nil | ||
} | ||
|
||
switch e.Action { | ||
case canal.InsertAction: | ||
return p.processEvent(e, ProcessEventParams{initValue: 0, incrementValue: 1}) | ||
case canal.DeleteAction: | ||
return p.processEvent(e, ProcessEventParams{initValue: 0, incrementValue: 1}) | ||
case canal.UpdateAction: | ||
return p.processEvent(e, ProcessEventParams{initValue: 1, incrementValue: 2}) | ||
default: | ||
return errors.New("invalid rows action") | ||
} | ||
} | ||
|
||
func (p *SourcePlugin) processEvent(e *canal.RowsEvent, params ProcessEventParams) error { | ||
inputSchema := p.inputSchema[e.Table.Name] | ||
outputSchema := p.outputSchema[e.Table.Name] | ||
|
||
builder := array.NewRecordBuilder(memory.DefaultAllocator, outputSchema.Schema) | ||
|
||
for i := params.initValue; i < len(e.Rows); i += params.incrementValue { | ||
for i, v := range e.Rows[i] { | ||
outputIndex := -1 | ||
|
||
for inputSchemaIndex, inputSchemaColumn := range inputSchema.Columns { | ||
if e.Table.Columns[i].Name == inputSchemaColumn.Name { | ||
outputIndex = inputSchemaIndex | ||
} | ||
} | ||
|
||
if outputIndex == -1 { | ||
continue | ||
} | ||
|
||
s := scalar.NewScalar(outputSchema.Schema.Field(outputIndex).Type) | ||
|
||
if err := s.Set(convertData(e.Table.Columns[i], v)); err != nil { | ||
panic(err) | ||
} | ||
|
||
scalar.AppendToBuilder(builder.Field(outputIndex), s) | ||
} | ||
} | ||
|
||
bytes, _ := builder.NewRecord().MarshalJSON() | ||
m := message.NewMessage(message.Event(e.Action), e.Table.Name, bytes) | ||
|
||
p.messagesStream <- sources.MessageEvent{ | ||
Message: m, | ||
Err: nil, | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func (p *SourcePlugin) buildOutputSchema() { | ||
outputSchema := make(map[string]DataTableSchema) | ||
|
||
for _, stream := range p.inputSchema { | ||
tSch := DataTableSchema{ | ||
TableName: stream.StreamName, | ||
} | ||
|
||
var arrowSchemaFields []arrow.Field | ||
|
||
for _, schemaCol := range stream.Columns { | ||
arrowSchemaFields = append(arrowSchemaFields, arrow.Field{ | ||
Name: schemaCol.Name, | ||
Type: helper.MapPlainTypeToArrow(schemaCol.DatabrewType), | ||
Nullable: schemaCol.Nullable, | ||
Metadata: arrow.Metadata{}, | ||
}) | ||
} | ||
|
||
tSch.Schema = arrow.NewSchema(arrowSchemaFields, nil) | ||
outputSchema[stream.StreamName] = tSch | ||
} | ||
|
||
p.outputSchema = outputSchema | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.