/
GetTwitter2CSV.py
174 lines (142 loc) · 6.02 KB
/
GetTwitter2CSV.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
'''
Created on Dec 29, 2014
@author: xuebin
'''
import twitter
# import json
import sys
import logging
import traceback
import csv
import json
import urllib2
# print twitter.api
'''
handle log
'''
# create logger with 'spam_application'
logger = logging.getLogger('GetTwitter')
logger.setLevel(logging.DEBUG)
# create file handler which logs even debug messages
fh = logging.FileHandler('GetTwitter.log')
fh.setLevel(logging.DEBUG)
# create console handler with a higher log level
ch = logging.StreamHandler()
ch.setLevel(logging.ERROR)
# create formatter and add it to the handlers
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
fh.setFormatter(formatter)
ch.setFormatter(formatter)
# add the handlers to the logger
logger.addHandler(fh)
logger.addHandler(ch)
'''
OAUTH
'''
CONSUMER_KEY = ""
CONSUMER_SECRET = ""
OAUTH_TOKEN = ""
OATH_TOKEN_SECRET = ""
auth = twitter.oauth.OAuth(OAUTH_TOKEN,OATH_TOKEN_SECRET,CONSUMER_KEY,CONSUMER_SECRET)
twitter_api = twitter.Twitter(auth=auth)
csvfile = open('tweet.csv', 'w')
fieldnames = ['tweetid','tweetuser','year','month','day','time','timestamp','hashtags','mention','replyuser','retweetuser','text','lat','lon']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
def InsertStatus(search_results):
'''
insert tweet into postgresql table
'''
try:
for statue in search_results:
# print json.dumps(statue)
# print statue
tweetid = statue["id_str"]
tweetuser = statue["user"]["screen_name"].encode('utf-8','ignore')
collected_time = statue["created_at"]
createyear = collected_time.split(" ")[5]
createmonth = collected_time.split(" ")[1]
createday = collected_time.split(" ")[2]
createtime = collected_time.split(" ")[3]
# print statue["entities"]["hashtags"]
if len(statue["entities"]["hashtags"])>0:
# hashtags = json.dumps(statue["entities"]["hashtags"])
hashtags = ''.join(hashtag["text"]+',' for hashtag in statue["entities"]["hashtags"])
else:
hashtags = None
if len(statue["entities"]["user_mentions"])>0:
mention = ''.join(user["screen_name"]+',' for user in statue["entities"]["user_mentions"])
# mention = json.dumps(statue["entities"]["user_mentions"])
else:
mention = None
if statue["in_reply_to_screen_name"]:
replyuser = statue["in_reply_to_screen_name"].encode('utf-8','ignore')
else:
replyuser = None
if "retweeted_status" in statue:
retweetuser = statue["retweeted_status"]["user"]["screen_name"].encode('utf-8','ignore')
else:
retweetuser = None
text = statue["text"].encode('utf-8','ignore')
if statue["geo"]<>None:
lat = statue["geo"]["coordinates"][0]
lon = statue["geo"]["coordinates"][1]
else:
lat = -999.99
lon = -999.99
try:
writer.writerow({'tweetid': tweetid,
'tweetuser': tweetuser,
'year':createyear,
'month':createmonth,
'day':createday,
'time':createtime,
'timestamp':collected_time,
'hashtags':hashtags,
'mention':mention,
'replyuser':replyuser,
'retweetuser':retweetuser,
'text':text,
'lat':lat,
'lon':lon
})
# [tweetid,tweetuser,createyear,createmonth,createday,createtime,hashtags,mention,replyuser,retweetuser,text])
except:
exc_type, exc_value, exc_traceback = sys.exc_info()
lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
print ''.join('!! ' + line for line in lines) # Log it or whatever here
logger.info(''.join('!! ' + line for line in lines) )
continue
except :
exc_type, exc_value, exc_traceback = sys.exc_info()
lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
print ''.join('!! ' + line for line in lines) # Log it or whatever here
logger.info(''.join('!! ' + line for line in lines) )
def getTWeet():
try:
count = 100
lang = "en"
# q = "WhatAFeeling"
geocode = "38.435092,-78.8697548,50mi"
search_results = twitter_api.search.tweets(count=count,lang=lang,geocode=geocode)
statuses = search_results["statuses"]
# print json.dumps(search_results)
next_url = search_results["search_metadata"]["next_results"]
since_id = next_url.split("=")[1].split("&")[0]
# print since_id
# print len(statuses)
for _ in range(0,20):
search_results = twitter_api.search.tweets(count=count,lang=lang,geocode=geocode,max_id =since_id )
next_url = search_results["search_metadata"]["next_results"]
since_id = next_url.split("=")[1].split("&")[0]
# print since_id
statuses+=search_results["statuses"]
# print "Length of statuses", len(statuses)
# print len(statuses)
InsertStatus(statuses)
except :
exc_type, exc_value, exc_traceback = sys.exc_info()
lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
print ''.join('!! ' + line for line in lines) # Log it or whatever here
logger.info(''.join('!! ' + line for line in lines) )
getTWeet()