/
linkedin2016.go
330 lines (290 loc) · 8.23 KB
/
linkedin2016.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
package main
import (
"bufio"
"context"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"time"
"go.mongodb.org/mongo-driver/bson"
"go.mongodb.org/mongo-driver/bson/primitive"
"go.mongodb.org/mongo-driver/mongo"
"go.mongodb.org/mongo-driver/mongo/options"
)
type LinkedinData struct {
Id primitive.ObjectID `json:"id" bson:"_id,omitempty"`
MemberID int `bson:"memberid"`
Email string `bson:"email"`
Liame string `bson:"liame"`
PasswordHash string `bson:"passwordhash"`
Password string `bson:"password"`
Breach string `bson:"breach"`
}
func main() {
// Connect to mongodb
ctx := context.Background()
clientOptions := options.Client().ApplyURI("mongodb://localhost").SetTimeout(48 * time.Hour)
mdb, err := mongo.Connect(ctx, clientOptions)
defer mdb.Disconnect(ctx)
if err != nil {
fmt.Fprintf(os.Stderr, "Could not connect to MongoDB: %v\r\n", err)
os.Exit(1)
}
threads := 15
sqlthreader := make(chan string, threads*20) // buffered to 20 * thread size
sqldoner := make(chan bool, threads)
for i := 0; i < threads; i++ {
go importSQLLine(sqlthreader, mdb, sqldoner, ctx)
}
fmt.Println("Importing SQL")
// To start with, lets parse in the MySQL dump
sqlfile, err := os.Open("1.sql.txt")
if err != nil {
fmt.Fprintf(os.Stderr, "Error opening file %v\r\n", err)
os.Exit(1)
}
defer sqlfile.Close()
scanner := bufio.NewScanner(sqlfile)
for scanner.Scan() {
// For each line of the SQL
sqlthreader <- scanner.Text()
}
close(sqlthreader)
// wait until all threads signal done
for i := 0; i < threads; i++ {
<-sqldoner
fmt.Println("SQL Thread signaled done!")
}
fmt.Println("SQL imported")
// Glob for our files
matches, err := filepath.Glob("datafiles/*.txt")
if err != nil {
fmt.Fprintf(os.Stderr, "error globbing for files: %v\r\n", err)
os.Exit(1)
}
threader := make(chan string, threads*20) // buffered to 20 * thread size
doner := make(chan bool, threads)
for i := 0; i < threads; i++ {
go importLine(threader, mdb, doner, ctx)
}
for _, f := range matches {
fmt.Printf("Parsing file %v...\r\n", f)
// open the file
file, err := os.Open(f)
if err != nil {
fmt.Fprintf(os.Stderr, "Error opening file %v\r\n", f)
continue
}
defer file.Close()
// TODO: Threaded!
scanner := bufio.NewScanner(file)
for scanner.Scan() {
// For each line of the file write to the channel
threader <- scanner.Text()
}
if err := scanner.Err(); err != nil {
fmt.Fprintf(os.Stderr, "Error scanning file %v\r\n", f)
continue
}
}
// close the threader channel
close(threader)
// wait until all threads signal done
for i := 0; i < threads; i++ {
<-doner
fmt.Println("Thread signaled done!")
}
}
func importSQLLine(threader <-chan string, mgo *mongo.Client, doner chan<- bool, ctx context.Context) {
// create a real collection/mgo
c := mgo.Database("steamer").Collection("dumps")
bc := 0 // insert in counts of 100
var buffer []interface{}
opts := options.InsertMany().SetOrdered(false)
for text := range threader {
if bc > 1000 {
bc = 0
c.InsertMany(ctx, buffer, opts)
buffer = nil
}
bc += 1
// Check that these will be valid
if (strings.Index(text, "'") + 1) >= (strings.Index(text, ",") - 1) {
// invalid row
fmt.Println("invalid", text)
continue
}
if (strings.Index(text, ",") + 3) >= (strings.LastIndex(text, "'")) {
// invalid row
fmt.Println("invalid2", text)
continue
}
memberidstr := text[strings.Index(text, "'")+1 : strings.Index(text, ",")-1]
memberid, err := strconv.Atoi(memberidstr)
if err != nil {
fmt.Println("Error converting MemberID ", memberidstr)
continue
}
email := text[strings.Index(text, ",")+3 : strings.LastIndex(text, "'")]
entry := LinkedinData{
Id: primitive.NewObjectID(),
MemberID: memberid,
Email: email,
Liame: Reverse(email),
Breach: "LinkedIn2016",
}
// Insert into database
buffer = append(buffer, entry)
}
// final bulk insert
c.InsertMany(ctx, buffer, opts)
doner <- true
}
func importLine(threader <-chan string, client *mongo.Client, doner chan<- bool, ctx context.Context) {
c := client.Database("steamer").Collection("dumps")
for text := range threader {
// Split the line into x:y
data := strings.Split(text, ":")
// remove bogus data
if data[0] == "null" {
continue
}
if data[1] == "xxx" {
continue
}
results := []LinkedinData{}
var err error
var cursor *mongo.Cursor
// fetch a different result based on memberid vs email
if strings.Index(data[0], "@") == -1 {
// parse memberid if possible
memberid, err := strconv.Atoi(data[0])
if err != nil {
fmt.Println("error parsing memberid", data)
continue
}
cursor, err = c.Find(ctx, bson.M{"breach": "LinkedIn2016", "memberid": memberid})
cursor.All(ctx, &results)
if err != nil {
fmt.Println("error finding with memberid: ", memberid, err)
continue
}
if len(results) < 1 {
// no results, a memberid we have no email for
continue
}
if len(results) > 1 {
// a complex situation, but the easiest solution is to insert a new collection that matches the previous one(s)
entry := LinkedinData{
Id: primitive.NewObjectID(),
MemberID: memberid,
Email: results[0].Email,
Liame: Reverse(results[0].Email),
Breach: "LinkedIn2016",
PasswordHash: data[1],
}
_, err = c.InsertOne(ctx, entry)
if err != nil {
fmt.Println("error inserting into db", err)
continue
}
}
if len(results) == 1 {
// we can skip if the entry is already "up to date"
if results[0].PasswordHash == data[1] {
continue
}
if results[0].PasswordHash == "" {
// Update the record with the real password hash
c.UpdateOne(ctx, bson.M{"_id": results[0].Id}, bson.M{"$set": bson.M{"passwordhash": data[1]}})
} else {
// WE have a new never seen before hash for this email!
entry := LinkedinData{
Id: primitive.NewObjectID(),
MemberID: memberid,
Email: results[0].Email,
Liame: Reverse(results[0].Email),
Breach: "LinkedIn2016",
PasswordHash: data[1],
}
_, err = c.InsertOne(ctx, entry)
if err != nil {
fmt.Println("error inserting into db", err)
continue
}
}
}
} else {
// email:hash format
cursor, err = c.Find(ctx, bson.M{"breach": "LinkedIn2016", "email": data[0]})
cursor.All(ctx, &results)
if err != nil {
fmt.Println("error finding with memberid: ", data[0], err)
continue
}
if len(results) < 1 {
// create a new result
entry := LinkedinData{
Id: primitive.NewObjectID(),
Email: data[0],
Liame: Reverse(data[0]),
Breach: "LinkedIn2016",
PasswordHash: data[1],
}
_, err = c.InsertOne(ctx, entry)
if err != nil {
fmt.Println("error inserting into db", err)
continue
}
}
if len(results) == 1 {
if data[1] == results[0].PasswordHash {
// matching hash, is fine
continue
}
// update the entry
_, err := c.UpdateOne(ctx, bson.M{"_id": results[0].Id}, bson.M{"$set": bson.M{"passwordhash": data[1]}})
if err != nil {
fmt.Println("error inserting into db", err)
continue
}
}
if len(results) > 1 {
// complicated situation, lets just add another row with the previou results if we don't have it stored already
skip := false
for _, entry := range results {
if data[1] == entry.PasswordHash {
skip = true
}
}
if skip {
continue
}
// lets just add a new result I guess and hope for the best!
entry := LinkedinData{
Id: primitive.NewObjectID(),
MemberID: results[0].MemberID,
Email: data[0],
Liame: Reverse(data[0]),
Breach: "LinkedIn2016",
PasswordHash: data[1],
}
_, err = c.InsertOne(ctx, entry)
if err != nil {
fmt.Println("error inserting into db", err)
continue
}
}
}
}
doner <- true
}
func Reverse(s string) string {
runes := []rune(s)
for i, j := 0, len(runes)-1; i < j; i, j = i+1, j-1 {
runes[i], runes[j] = runes[j], runes[i]
}
return string(runes)
}