-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtest_e2e_04_CSV_file_encoding.py
109 lines (83 loc) · 10.3 KB
/
test_e2e_04_CSV_file_encoding.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import os.path
import sys
import textwrap
from src.csvdiff3 import csvdiff
TEST_DATA_DIR = 'data/e2e_04_file_encoding'
def test_file_encoding_utf8(path_to_tests_dir, capfd):
lhs_csv = os.path.join(path_to_tests_dir, TEST_DATA_DIR, 'left_UTF-8.csv')
rhs_csv = os.path.join(path_to_tests_dir, TEST_DATA_DIR, 'right_UTF-8.csv')
sys.argv = ['csvdiff.py', lhs_csv, rhs_csv, '-ac']
csvdiff.main()
out, err = capfd.readouterr()
assert err == ''
assert out == textwrap.dedent('''
============ Report ============
● All
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
left_UTF-8.csv right_UTF-8.csv Column indices with difference
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
2 ['1', '値1−1', '1001', '東京', 'ウナ・セラ・ディ東京'] ! 2 ['1', '値1−1', '1001', '東京', 'ウナ・セラ・デイ東京'] @ [4]
3 ['2', '値1−2', '1002', '大阪', '西長堀アパート'] 3 ['2', '値1−2', '1002', '大阪', '西長堀アパート']
4 ['3', '値1−3', '1003', '横浜', '伊勢佐木町ブルースでも歌って'] ! 4 ['3', '値1−3', '1003', '横浜', '伊勢佐木町ブルーズでも歌って'] @ [4]
5 ['4', '値i−4', '1004', '北海道', '羊蹄山の麓🌱'] ! 5 ['4', '値1−4', '1004', '北海道', '羊蹄山の麓🌱'] @ [1]
6 ['5', '値1−5', '1005', '三重', '三重県伊賀市忍者村'] ! 6 ['5', '値1−5', '1o05', '二重', '三重県伊賀市忍者村'] @ [2, 3]
7 ['6', '値1−6', '1006', '新潟', '星峠の棚田🌙'] ! 7 ['6', '値1−6', '1006', '新烏', '星峠の棚田🌟'] @ [3, 4]
8 ['7', '値1−7', '1007', '京都', '京都府京都市上京区今出川通烏丸東入上る二筋目東入下る相国寺門前町'] 8 ['7', '値1−7', '1007', '京都', '京都府京都市上京区今出川通烏丸東入上る二筋目東入下る相国寺門前町']
● Count & Row number
same lines : 2
left side only (<): 0 :-- Row Numbers -->: []
right side only (>): 0 :-- Row Numbers -->: []
with differences (!): 5 :-- Row Number Pairs -->: [(2, 2), (4, 4), (5, 5), (6, 6), (7, 7)]
''')
def test_file_encoding_shift_jis(path_to_tests_dir, capfd):
lhs_csv = os.path.join(path_to_tests_dir, TEST_DATA_DIR, 'left_Shift_JIS.csv')
rhs_csv = os.path.join(path_to_tests_dir, TEST_DATA_DIR, 'right_Shift_JIS.csv')
sys.argv = ['csvdiff.py', lhs_csv, rhs_csv, '-ac', '-e Shift_JIS']
csvdiff.main()
out, err = capfd.readouterr()
assert err == ''
assert out == textwrap.dedent('''
============ Report ============
● All
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
left_Shift_JIS.csv right_Shift_JIS.csv Column indices with difference
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
2 ['1', '値1-1', '1001', '東京', 'ウナ・セラ・ディ東京'] ! 2 ['1', '値1-1', '1001', '東京', 'ウナ・セラ・デイ東京'] @ [4]
3 ['2', '値1-2', '1002', '大阪', '西長堀アパート'] 3 ['2', '値1-2', '1002', '大阪', '西長堀アパート']
4 ['3', '値1-3', '1003', '横浜', '伊勢佐木町ブルースでも歌って'] ! 4 ['3', '値1-3', '1003', '横浜', '伊勢佐木町ブルーズでも歌って'] @ [4]
5 ['4', '値i-4', '1004', '北海道', '羊蹄山の麓'] ! 5 ['4', '値1-4', '1004', '北海道', '羊蹄山の麓'] @ [1]
6 ['5', '値1-5', '1005', '三重', '三重県伊賀市忍者村'] ! 6 ['5', '値1-5', '1o05', '二重', '三重県伊賀市忍者村'] @ [2, 3]
7 ['6', '値1-6', '1006', '新潟', '星峠の棚田'] ! 7 ['6', '値1-6', '1006', '新烏', '星峠の棚田'] @ [3]
8 ['7', '値1-7', '1007', '京都', '京都府京都市上京区今出川通烏丸東入上る二筋目東入下る相国寺門前町'] 8 ['7', '値1-7', '1007', '京都', '京都府京都市上京区今出川通烏丸東入上る二筋目東入下る相国寺門前町']
● Count & Row number
same lines : 2
left side only (<): 0 :-- Row Numbers -->: []
right side only (>): 0 :-- Row Numbers -->: []
with differences (!): 5 :-- Row Number Pairs -->: [(2, 2), (4, 4), (5, 5), (6, 6), (7, 7)]
''')
def test_file_encoding_euc_jp(path_to_tests_dir, capfd):
lhs_csv = os.path.join(path_to_tests_dir, TEST_DATA_DIR, 'left_EUC-JP.csv')
rhs_csv = os.path.join(path_to_tests_dir, TEST_DATA_DIR, 'right_EUC-JP.csv')
sys.argv = ['csvdiff.py', lhs_csv, rhs_csv, '-ac', '-e EUC-JP']
csvdiff.main()
out, err = capfd.readouterr()
assert err == ''
assert out == textwrap.dedent('''
============ Report ============
● All
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
left_EUC-JP.csv right_EUC-JP.csv Column indices with difference
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
2 ['1', '値1-1', '1001', '東京', 'ウナ・セラ・ディ東京'] ! 2 ['1', '値1-1', '1001', '東京', 'ウナ・セラ・デイ東京'] @ [4]
3 ['2', '値1-2', '1002', '大阪', '西長堀アパート'] 3 ['2', '値1-2', '1002', '大阪', '西長堀アパート']
4 ['3', '値1-3', '1003', '横浜', '伊勢佐木町ブルースでも歌って'] ! 4 ['3', '値1-3', '1003', '横浜', '伊勢佐木町ブルーズでも歌って'] @ [4]
5 ['4', '値i-4', '1004', '北海道', '羊蹄山の麓'] ! 5 ['4', '値1-4', '1004', '北海道', '羊蹄山の麓'] @ [1]
6 ['5', '値1-5', '1005', '三重', '三重県伊賀市忍者村'] ! 6 ['5', '値1-5', '1o05', '二重', '三重県伊賀市忍者村'] @ [2, 3]
7 ['6', '値1-6', '1006', '新潟', '星峠の棚田'] ! 7 ['6', '値1-6', '1006', '新烏', '星峠の棚田'] @ [3]
8 ['7', '値1-7', '1007', '京都', '京都府京都市上京区今出川通烏丸東入上る二筋目東入下る相国寺門前町'] 8 ['7', '値1-7', '1007', '京都', '京都府京都市上京区今出川通烏丸東入上る二筋目東入下る相国寺門前町']
● Count & Row number
same lines : 2
left side only (<): 0 :-- Row Numbers -->: []
right side only (>): 0 :-- Row Numbers -->: []
with differences (!): 5 :-- Row Number Pairs -->: [(2, 2), (4, 4), (5, 5), (6, 6), (7, 7)]
''')