-
-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathtextfile_analysis.py
36 lines (30 loc) · 1004 Bytes
/
textfile_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# -*- cofing: utf-8 -*-
import os
import sys
import collections
import string
script_name = sys.argv[0]
res = {
"total_lines":"",
"total_characters":"",
"total_words":"",
"unique_words":"",
"special_characters":""
}
try:
textfile = sys.argv[1]
with open(textfile, "r", encoding = "utf_8") as f:
data = f.read()
res["total_lines"] = data.count(os.linesep)
res["total_characters"] = len(data.replace(" ","")) - res["total_lines"]
counter = collections.Counter(data.split())
d = counter.most_common()
res["total_words"] = sum([i[1] for i in d])
res["unique_words"] = len([i[0] for i in d])
special_chars = string.punctuation
res["special_characters"] = sum(v for k, v in collections.Counter(data).items() if k in special_chars)
except IndexError:
print('Usage: %s TEXTFILE' % script_name)
except IOError:
print('"%s" cannot be opened.' % textfile)
print(res)