# Twitter Real Time Data Collection

#### Here, our main objective is to collect real time twitter data and store it on AWS S3 Bucket.

In [1]:
#importing necessary liberaries. 
import tweepy as tw    
import pandas as pd
import os
import boto3         
import botocore
import pickle
import credentials as cc

In [16]:
#Connecting with twitter API
auth = tw.OAuthHandler(cc.consumer_key, cc.consumer_secret)
auth.set_access_token(cc.access_token, cc.access_token_secret)
api = tw.API(auth, wait_on_rate_limit=True)

In [17]:
#the item which we have to search
search_words = "#facebook"
date_since = "2020-09-06"

In [18]:
#keep or remove re-tweets
new_search = search_words + " -filter:retweets"

In [19]:
#Who is Tweeting
tweets = tw.Cursor(api.search, 
                           q=new_search,
                           lang="en",
                           since=date_since).items(15)

users_locs = [[tweet.user.screen_name, tweet.user.location] for tweet in tweets]
users_locs

[['LoettaPaulsen', 'Seattle, WA'],
 ['ThatSmileyLady', 'ATLBGATLNYATL?!'],
 ['NcsVentures', 'Atlanta, GA'],
 ['PaxAutomica', ''],
 ['CashYonny', ''],
 ['nomanrttc', 'Rajshahi, Bangladesh'],
 ['5Hoquat', 'Earth2'],
 ['OhmsLaw78', 'Iowa, USA'],
 ['AdilNadeem347', 'Lahore'],
 ['Tarun_Strings', ''],
 ['vima_marketing', 'Barcelona, Spain'],
 ['AdilNadeem347', 'Lahore'],
 ['ElsicaStar', 'England, United Kingdom'],
 ['bmurphypointman', 'Bay Area, CA'],
 ['MailerLite', 'Global']]

In [20]:
#getting data from twitter and saving it in a list
tweets = tw.Cursor(api.search,
                       q=new_search,
                       lang="en").items(1000)

In [21]:
all_tweets = [tweet.text for tweet in tweets]

In [22]:
# Create an S3 client
S3 = boto3.client('s3')
BUCKET_NAME = 'twitter-analytics-bot'

In [23]:
#storing the list in s3 bucket
#Serialize the object 
serializedListObject = pickle.dumps(all_tweets)
#Write to Bucket named BUCKET_NAME and 
#Store the list using key 'mylist001'
S3.put_object(Bucket=BUCKET_NAME,Key='myList001',Body=serializedListObject)

{'ResponseMetadata': {'RequestId': '1E39B01F9782A0F6',
  'HostId': 'giz0VGgiUhuaKPx4hUuCNUNeL09Pd1iftghxp+dN5Kthc2JmA2A0xmqsahgzmh6S3AXG2HTuc9M=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'giz0VGgiUhuaKPx4hUuCNUNeL09Pd1iftghxp+dN5Kthc2JmA2A0xmqsahgzmh6S3AXG2HTuc9M=',
   'x-amz-request-id': '1E39B01F9782A0F6',
   'date': 'Thu, 24 Sep 2020 12:23:17 GMT',
   'etag': '"f2726648ef389f6c3a7875ee061354c9"',
   'content-length': '0',
   'server': 'AmazonS3'},
  'RetryAttempts': 1},
 'ETag': '"f2726648ef389f6c3a7875ee061354c9"'}

In [24]:
# #Retrieving the object stored in s3 bucket
# #Read the object stored in key 'myList001'
# object = S3.get_object(Bucket=BUCKET_NAME,Key='myList001')
# serializedObject = object['Body'].read()
# #Deserialize the retrieved object
# myList = pickle.loads(serializedObject)

In [25]:
# #getting data from twitter again and saving it in a list
# tweets = tw.Cursor(api.search,
#                        q=new_search,
#                        lang="en").items(10000)

In [26]:
# all_tweets_new = [tweet.text for tweet in tweets]

In [27]:
# #appending the new tweets to the previous tweets
# for i in all_tweets_new:
#     myList.append(i)

In [28]:
# #storing the list in s3 bucket
# #Serialize the object 
# serializedListObject = pickle.dumps(myList)
# #Write to Bucket named BUCKET_NAME and 
# #Store the list using key 'mylist001'
# S3.put_object(Bucket=BUCKET_NAME,Key='myList001',Body=serializedListObject)

{'ResponseMetadata': {'RequestId': '1QBM3J4QCNCWDY7P',
  'HostId': 'JoG5E2MpnOx9JQgm/ggFPfN3G4vxm9qQgYyb6Q7ILWUYoGy8ZwjgpxPE3bhgXPZ++qPR4DvsymY=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'JoG5E2MpnOx9JQgm/ggFPfN3G4vxm9qQgYyb6Q7ILWUYoGy8ZwjgpxPE3bhgXPZ++qPR4DvsymY=',
   'x-amz-request-id': '1QBM3J4QCNCWDY7P',
   'date': 'Thu, 24 Sep 2020 12:53:55 GMT',
   'etag': '"fede37ac528d377b238491e37c401f74"',
   'content-length': '0',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'ETag': '"fede37ac528d377b238491e37c401f74"'}