-
Notifications
You must be signed in to change notification settings - Fork 165
/
Copy pathtwitter_scraper.py
75 lines (63 loc) · 2.94 KB
/
twitter_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#This code searches for tweets with a particuar keyword and writes certain fields into a CSV file
import sys, csv
import twitter
import os
import tweepy
# Replace the API_KEY and API_SECRET with your application's key and secret.
#This code is using AppAuthHandler, not OAuthHandler to get higher limits, 2.5 times.
auth = tweepy.AppAuthHandler('j2UAZfXuk6iitAjnLjbFcmn0y', 'Q9X7g4eAhyElO8u5VI183QwRCUF1sXrZs8m9poGt6Q1pmN4cOw')
api = tweepy.API(auth, wait_on_rate_limit=True,
wait_on_rate_limit_notify=True)
if (not api):
print ("Can't Authenticate")
sys.exit(-1)
def clean(val):
clean = ""
if val:
clean = val.encode('utf-8')
return clean
searchQuery = '#techsytalk' #This is for your hasthag(s), separate by comma
maxTweets = 80000 # Large max nr
tweetsPerQry = 100 # the max the API permits
fName = 'myfile.csv' #The CSV file where your tweets will be stored
csvfile = open(fName, 'w');
csvwriter = csv.writer(csvfile)
count=0
# If results from a specific ID onwards are reqd, set since_id to that ID.
# else default to no lower limit, go as far back as API allows
sinceId = None
# If results only below a specific ID are, set max_id to that ID.
# else default to no upper limit, start from the most recent tweet matching the search query.
max_id = -1
tweetCount = 0
#print("Downloading max {0} tweets".format(maxTweets))
with open(fName, 'w') as csvfile:
while tweetCount < maxTweets:
try:
if (max_id <= 0):
if (not sinceId):
new_tweets = api.search(q=searchQuery, count=tweetsPerQry)
else:
new_tweets = api.search(q=searchQuery, count=tweetsPerQry,
since_id=sinceId)
else:
if (not sinceId):
new_tweets = api.search(q=searchQuery, count=tweetsPerQry,
max_id=str(max_id - 1))
else:
new_tweets = api.search(q=searchQuery, count=tweetsPerQry,
max_id=str(max_id - 1),
since_id=sinceId)
if not new_tweets:
print("No more tweets found")
break
for tweet in new_tweets:
csvwriter.writerow([tweet.created_at, clean(tweet.user.screen_name), clean(tweet.text), tweet.user.created_at, tweet.user.followers_count, tweet.user.friends_count, tweet.user.statuses_count, clean(tweet.user.location), tweet.user.geo_enabled, tweet.user.lang, clean(tweet.user.time_zone), tweet.retweet_count]);
tweetCount += len(new_tweets)
#print("Downloaded {0} tweets".format(tweetCount))
max_id = new_tweets[-1].id
except Exception as e:
# Just exit if any error
print("some error : " + str(e))
pass
print ("Downloaded {0} tweets, Saved to {1}".format(tweetCount, fName))