/
git_diff_xlsx.py
65 lines (54 loc) · 1.92 KB
/
git_diff_xlsx.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# Converts a Microsoft Excel 2007+ file into plain text
# for comparison using git diff
#
# Instructions for setup:
# 1. Place this file in a folder
# 2. Add the following line to the global .gitconfig:
# [diff "zip"]
# binary = True
# textconv = python c:/path/to/git_diff_xlsx.py
# 3. Add the following line to the repository's .gitattributes
# *.xlsx diff=zip
# 4. Now, typing [git diff] at the prompt will produce text versions
# of Excel .xlsx files
#
# Copyright William Usher 2013
# Contact: w.usher@ucl.ac.uk
#
import xlrd as xl
import sys
def parse(infile,outfile):
"""
Converts an Excel file into text
Returns a formatted text file for comparison using git diff.
"""
book = xl.open_workbook(infile)
num_sheets = book.nsheets
print book.sheet_names()
# print "File last edited by " + book.user_name + "\n"
outfile.write("File last edited by " + book.user_name + "\n")
def get_cells(sheet, rowx, colx):
return sheet.cell_value(rowx, colx)
# loop over worksheets
for index in range(0,num_sheets):
# find non empty cells
sheet = book.sheet_by_index(index)
outfile.write("=================================\n")
outfile.write("Sheet: " + sheet.name + "[ " + str(sheet.nrows) + " , " + str(sheet.ncols) + " ]\n")
outfile.write("=================================\n")
for row in range(0,sheet.nrows):
for col in range(0,sheet.ncols):
content = get_cells(sheet, row, col)
if content <> "":
outfile.write(" " + unicode(xl.cellname(row,col)) + ": " + unicode(content) + "\n")
print "\n"
# output cell address and contents of cell
def main():
args = sys.argv[1:]
if len(args) != 1:
print 'usage: python git_diff_xlsx.py infile.xlsx'
sys.exit(-1)
outfile = sys.stdout
parse(args[0],outfile)
if __name__ == '__main__':
main()