# Python Problem: python script which can fetch all the tweets done by ​midas@IIITD​ twitter handle and dump the responses into JSONlines file.

#### Importing the libraries

 - jsonlines: As required in the task for saving and loading of data
 - requests: for sending GET request to Twitter API
 - requests_oauthlib: for authentication on Twitter API
 - Pandas for table data
 - BeautifulSoup for webScrapping
 - BeautifulTable for organising JSON data into Tables
 - if needed_ tweepy to extract the tweets 

In [37]:
import sys
import jsonlines
import requests
from requests_oauthlib import OAuth1
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import tweepy
from beautifultable import BeautifulTable

#### Function to authenticate twitter user API

In [3]:
def twi_auth():

    CONSUMER_KEY = ""
    CONSUMER_SECRET = ""
    ACCESS_KEY = ""
    ACCESS_SECRET = ""

    auth = OAuth1(CONSUMER_KEY, CONSUMER_SECRET, ACCESS_KEY, ACCESS_SECRET)

    return auth

#### Function to get tweet count

 - Used Web Scrapping  to get total tweets from a particular twitter handle
 - Returns the tweet count

In [5]:
def twi_count(twi_handle):
    
    temp = requests.get('https://twitter.com/'+twi_handle)
    bs = BeautifulSoup(temp.text,'lxml')
    count=0
    
    try:
        tweet_box = bs.find('li',{'class':'ProfileNav-item ProfileNav-item--tweets is-active'})
        tweets= tweet_box.find('a').find('span',{'class':'ProfileNav-value'})
        count=tweets.get('data-count')

    except:
        print('Account name not found...')
        sys.exit(1)
    
    return count
        

#### Function to get the Tweets in a json response using tweepy

 - Passing the number of tweets in as the function argument so as to fetch All the tweets in real time.

In [48]:
def twitterfeed(auth, no_of_tweets, twi_handle):
   

    part_URL= 'https://api.twitter.com/1.1/statuses/user_timeline.json'
    URL = part_URL + \
        "?screen_name={}&tweet_mode=extended&count={}".format(
            twi_handle, no_of_tweets)

    try:
        response = requests.get(URL, auth=auth)
    except requests.exceptions.RequestException:
        print('No internet connection')
        sys.exit(1)

    data = response.json()

    return data   
   
#     auth = auth
#     handle = twi_handle
#     api = tweepy.API(auth)
#     data = tweepy.Cursor(api.user_timeline, id=twi_handle, tweet_mode='extended').items()
#     return data   
   

    

#### save_tweet_into_file accepts the JSON data and save it into file named tweets.json

In [54]:
def save_tweets_into_file(data):

    data=data
    
    with jsonlines.open('tweets.jsonl', mode='w') as writer:
        for i in data:
            writer.write(i)

#### main function to call the required functions in an oderly fashion to get optimum results

In [62]:
def main():

    twi_handle = "@midasIIITD"
    twi_handle2 = "midasIIITD"
    URL = 'https://api.twitter.com/1.1/statuses/user_timeline.json'

    auth = twi_auth()
    no_of_tweets = twi_count(twi_handle)
    print(no_of_tweets)
    data = twitterfeed(auth, no_of_tweets, twi_handle2)
    save_tweets_into_file(data)


if __name__ == '__main__':
    main()

329


# Displaying the JSON in Tabular Form

In [57]:
Table = {"Text" : [],"Created at" :[],"Number of Favorites" :[],"Number of retweets":[],"Number of images" :[]}

In [59]:
with jsonlines.open('tweets.jsonl') as reader:
    for obj in reader:
        if 'retweeted_status' in obj and "media" in obj["retweeted_status"]["entities"]:
            if "extended_entities" in obj["retweeted_status"]:
                count = len(obj["retweeted_status"]["extended_entities"])
            else:
                count = 1
        elif "media" in obj["entities"]:
            if "extended_entities" in obj:
                count = len(obj["extended_entities"]["media"])
            else:
                count  = 1
            
        else:
            count = "None"
        
        """After counting media, let's add the other info to our Table.
           Read the required info and append it to the table under the suitable column"""
        
        Table["Number of images"].append(count)
        Table["Text"].append(obj["full_text"])
        Table["Created at"].append(obj["created_at"])
        Table["Number of Favorites"].append(obj["favorite_count"])
        Table["Number of retweets"].append(obj["retweet_count"])

#### Pandas Dataframe for table

In [60]:
df = pd.DataFrame(data = Table)   #converted the dictionary to a dataframe using pandas library
pd.set_option('display.max_colwidth', -1)  #setting the display property to show the full(non-truncated) text of tweet 
df.index = np.arange(1, len(df) + 1) #indexing the dataframe from 1 instead of 0


In [61]:
df


Unnamed: 0,Created at,Number of Favorites,Number of images,Number of retweets,Text
1,Sat Apr 06 17:11:29 +0000 2019,0,,2,RT @kdnuggets: Top 8 #Free Must-Read #Books on #DeepLearning #KDN https://t.co/1DtlN91Yjj
2,Sat Apr 06 16:43:27 +0000 2019,12,1,3,@nupur_baghel @PennDATS Congratulation @nupur_baghel on getting admit from @PennDATS. \nShe got 5/10 acceptance. She has worked at @midasIIITD on the #kikichallenge problem. We are glad to know that it inspired her to pursue further research. We wish her a great success ahead. \n#MSc #DataScience #UPenn https://t.co/2daLOHGPBU
3,Fri Apr 05 16:08:37 +0000 2019,8,,1,We have emailed the task details to all candidates who have applied to @midasIIITD internship through IIITD portal. Kindly check your spam folder if you have not received the email. We will evaluate all solutions received until April 10 midnight and announce results by April 14.
4,Fri Apr 05 04:05:11 +0000 2019,0,,16,RT @rfpvjr: Our NAACL paper on polarization in language on Twitter surrounding mass shootings is up on arXiv! https://t.co/g7wiegXxDg\nThis…
5,Fri Apr 05 04:04:43 +0000 2019,0,1,10,RT @kdnuggets: Effective Transfer Learning For NLP https://t.co/Z1m0AzlfVv https://t.co/ccX4Uhxjn8
6,Wed Apr 03 18:31:53 +0000 2019,0,1,57,RT @stanfordnlp: What’s new in @Stanford CS224N Natural Language Processing with Deep Learning for 2019? Question answering—1D CNNs—subword…
7,Wed Apr 03 17:04:32 +0000 2019,0,1,844,"RT @DeepMindAI: Today we're releasing a large-scale extendable dataset of mathematical questions, for training (and evaluating the abilitie…"
8,Wed Apr 03 09:03:40 +0000 2019,0,,16,RT @ylecun: Congratulations Jitendra Malik !\n\nAwards are raining on FAIRies these days... https://t.co/1WNcSeQLZe
9,Wed Apr 03 07:46:02 +0000 2019,0,,4,RT @IIITDelhi: Another chance to take admission in the Ph.D. program in IIIT Delhi. \nAdmissions open for Ph.D. program mathematics \n\nLog on…
10,Tue Apr 02 04:20:13 +0000 2019,8,,1,"Dear @midasIIITD internship candidates who have submitted their solutions to the task, we have sent you an email regarding your results. \n\nStudents who have applied through @IIITDelhi, you will soon be contacted on the internship task. \n\n#MIDAS #Summer #Research #Internship"
