forked from pinterest/mysql_utils
-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_recent_checksums.py
executable file
·140 lines (120 loc) · 4.91 KB
/
get_recent_checksums.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#!/usr/bin/env python
import argparse
import socket
import sys
from lib import host_utils
from lib import mysql_lib
def main():
parser = argparse.ArgumentParser(description='MySQL checksum interface')
parser.add_argument('-i',
'--instance',
help='Defaults to localhost:3306',
default=''.join((socket.getfqdn(), ':3306')),
required=True)
parser.add_argument('-d',
'--db',
help="Restrict results to a single named db",
default=False)
parser.add_argument('-t',
'--table',
help='Restrict results to a single table',
default=False)
args = parser.parse_args()
instance = host_utils.HostAddr(args.instance)
checksums = get_checksums(instance, args.db)
if not checksums:
print "No data found!"
sys.exit(1)
header = ''
for checksum in checksums:
# Humans don't care about false positives for diffs
if (checksum['checksum_status'] == 'ROW_DIFFS_FOUND' and
checksum['rows_checked'] == 'YES' and
checksum['row_diffs'] == 0):
checksum['checksum_status'] = 'GOOD'
# build up and print a reasonable header.
if header == '':
m_spc = ' ' * 26
r_spc = ' ' * 25
tbl_spc = ' ' * 35
header = ("Master{m_spc}Replica{r_spc}Date"
"\t\t\tDatabase\tTable{tbl_spc}Row Count\t"
"Diff Count\tStatus").format(m_spc=m_spc,
r_spc=r_spc,
tbl_spc=tbl_spc)
print header
print '=' * 190
tbl_padding = ' ' * (40 - len(checksum['tbl']))
master_padding = ' ' * (32 - len(checksum['master_instance']))
slave_padding = ' ' * (32 - len(checksum['instance']))
if args.table is False or args.table == checksum['tbl']:
print ("{m}{m_spc}{r}{s_spc}{dt}\t{db}\t{tbl}{t_spc}{count}\t\t"
"{row_diffs}\t\t{status}"
"".format(m=checksum['master_instance'],
r=checksum['instance'],
db=checksum['db'],
tbl=checksum['tbl'],
t_spc=tbl_padding,
m_spc=master_padding,
s_spc=slave_padding,
status=checksum['checksum_status'],
count=checksum['row_count'],
row_diffs=checksum['row_diffs'],
dt=checksum['reported_at']))
def get_checksums(instance, db=False):
""" Get recent mysql replication checksums
Args:
instance - a hostaddr object for what server to pull results for
db - a string of a data to for which to restrict results
Returns:
A list of dicts from a select * on the relevant rows
"""
vars_for_query = dict()
vars_for_query['instance'] = instance
zk = host_utils.MysqlZookeeper()
host_shard_map = zk.get_host_shard_map()
if db is False:
cnt = 0
shard_param_set = set()
for entry in host_shard_map[instance.__str__()]:
key = ''.join(('shard', str(cnt)))
vars_for_query[key] = entry
shard_param_set.add(key)
cnt += 1
shard_param = ''.join(('%(',
')s,%('.join(shard_param_set),
')s'))
else:
shard_param = '%(shard1)s'
vars_for_query['shard1'] = db
# connect to the instance we care about and get some data.
conn = mysql_lib.connect_mysql(instance, 'scriptrw')
# We only care about the most recent checksum
cursor = conn.cursor()
sql = ("SELECT detail.master_instance, "
" detail.instance, "
" detail.db, "
" detail.tbl, "
" detail.reported_at, "
" detail.checksum_status, "
" detail.rows_checked, "
" detail.row_count, "
" detail.row_diffs "
"FROM "
" (SELECT master_instance,"
" instance, "
" db, "
" tbl, "
" MAX(reported_at) AS reported_at "
" FROM test.checksum_detail "
" WHERE master_instance=%(instance)s "
" AND db IN ({sp}) "
" GROUP BY 1,2,3,4 "
" ) AS most_recent "
"JOIN test.checksum_detail AS detail "
"USING(master_instance, instance, db, tbl, reported_at) ").format(sp=shard_param)
cursor.execute(sql, vars_for_query)
checksums = cursor.fetchall()
return checksums
if __name__ == "__main__":
main()