-
Notifications
You must be signed in to change notification settings - Fork 0
/
frequency.py
43 lines (35 loc) · 1.11 KB
/
frequency.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import sys
import json
import os
def main():
import json
tweets = []
tweet_file = open(sys.argv[1])
for line in tweet_file:
try:
tweets.append(json.loads(line))
except:
pass
tweetword=[]
wordcount={}
for tweet in tweets:
#wordlist=[]
if "text" in tweet.keys():
text=tweet["text"].split()
for word in text:
if not (word.encode('utf-8', 'ignore') == ""):
word=word.encode('utf-8','ignore')
word=word.rstrip('_:/\|><@_#$"&%*^()'+"'"+'')
word=word.replace("\n", "")
word=word.lower()
#wordlist.append(word)
if word in wordcount.keys():
wordcount[word]=wordcount[word]+1
else:
wordcount[word]=1
totalsum=sum(wordcount.values())
wordfreq={key:round(float(value)/totalsum,6) for key, value in wordcount.items()}
for key, value in wordfreq.items():
print key,value
if __name__ == '__main__':
main()