-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.go
107 lines (86 loc) · 2.48 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
package main
import (
"bytes"
"errors"
"io"
"os"
"time"
"github.com/alecthomas/kong"
json "github.com/goccy/go-json"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
"github.com/wolfeidau/jsontemplate"
)
var (
version = "development"
flags struct {
Version kong.VersionFlag
Source string `arg:"" required:"" help:"Source github archive file containing JSON and compressed with Gzip"`
Destination string `arg:"" required:"" help:"Destination parquet output file"`
EventType string `enum:"PullRequestEvent" default:"PullRequestEvent"`
}
)
type githubEvent struct {
ID string `json:"id,omitempty"`
EventType string `json:"type,omitempty"`
}
func main() {
kong.Parse(&flags,
kong.Vars{"version": version}, // bind a var for version
kong.Name("arrow-gh-processor"),
)
log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr})
rawf, err := os.Open(flags.Source)
if err != nil {
log.Fatal().Err(err).Msg("failed to open source file")
}
gzr, err := NewGzipJSONReader(rawf)
if err != nil {
log.Fatal().Err(err).Msg("failed to open line reader")
}
log.Info().Str("event_type", flags.EventType).Msg("exporting events to parquet")
pw, err := NewParquetWriter(pullRequestArrowSchema, defaultWrtp)
if err != nil {
log.Fatal().Err(err).Msg("failed to open parquet writer")
}
ts := time.Now()
tpl, err := jsontemplate.NewTemplate(pullRequestJSONTemplate)
if err != nil {
log.Fatal().Err(err).Msg("failed to compile template")
}
// used to extract the event id and type
ghe := new(githubEvent)
for {
lineb, err := gzr.ReadLine()
if err != nil {
if errors.Is(err, io.EOF) {
break
}
log.Fatal().Err(err).Msg("failed to read line reader")
}
err = json.Unmarshal(lineb, ghe)
if err != nil {
log.Fatal().Err(err).Msg("failed to un marshal line")
}
if ghe.EventType == "PullRequestEvent" {
buf := new(bytes.Buffer)
_, err = tpl.Execute(buf, lineb)
if err != nil {
log.Fatal().Err(err).Msg("failed to execute template")
}
err := pw.Write(buf.Bytes())
if err != nil {
log.Fatal().Err(err).Msg("failed to write parquet record")
}
}
}
err = gzr.Close()
if err != nil {
log.Fatal().Err(err).Msg("failed to close line reader")
}
err = pw.Close()
if err != nil {
log.Fatal().Err(err).Msg("failed to close parquet writer")
}
log.Info().Int64("data_length", gzr.BytesRead()).Int("line_count", gzr.LineCount()).Int("record_count", pw.RecordCount()).Dur("taken", time.Since(ts)).Msg("output")
}