# Python Problem: python script which can fetch all the tweets done by ​midas@IIITD​ twitter handle and dump the responses into JSONlines file.

#### Importing the libraries

 - jsonlines: As required in the task for saving and loading of data
 - requests: for sending GET request to Twitter API
 - requests_oauthlib: for authentication on Twitter API
 - Pandas for table data
 - BeautifulSoup for webScrapping
 - BeautifulTable for organising JSON data into Tables
 - if needed_ tweepy to extract the tweets 

In [37]:
import sys
import jsonlines
import requests
from requests_oauthlib import OAuth1
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import tweepy
from beautifultable import BeautifulTable

#### Function to authenticate twitter user API

In [3]:
def twi_auth():

    CONSUMER_KEY = ""
    CONSUMER_SECRET = ""
    ACCESS_KEY = ""
    ACCESS_SECRET = ""

    auth = OAuth1(CONSUMER_KEY, CONSUMER_SECRET, ACCESS_KEY, ACCESS_SECRET)

    return auth

#### Function to get tweet count

 - Used Web Scrapping  to get total tweets from a particular twitter handle
 - Returns the tweet count

In [5]:
def twi_count(twi_handle):
    
    temp = requests.get('https://twitter.com/'+twi_handle)
    bs = BeautifulSoup(temp.text,'lxml')
    count=0
    
    try:
        tweet_box = bs.find('li',{'class':'ProfileNav-item ProfileNav-item--tweets is-active'})
        tweets= tweet_box.find('a').find('span',{'class':'ProfileNav-value'})
        count=tweets.get('data-count')

    except:
        print('Account name not found...')
        sys.exit(1)
    
    return count
        

#### Function to get the Tweets in a json response using tweepy

 - Passing the number of tweets in as the function argument so as to fetch All the tweets in real time.

In [48]:
def twitterfeed(auth, no_of_tweets, twi_handle):
   

    part_URL= 'https://api.twitter.com/1.1/statuses/user_timeline.json'
    URL = part_URL + \
        "?screen_name={}&tweet_mode=extended&count={}".format(
            twi_handle, no_of_tweets)

    try:
        response = requests.get(URL, auth=auth)
    except requests.exceptions.RequestException:
        print('No internet connection')
        sys.exit(1)

    data = response.json()

    return data   
   
#     auth = auth
#     handle = twi_handle
#     api = tweepy.API(auth)
#     data = tweepy.Cursor(api.user_timeline, id=twi_handle, tweet_mode='extended').items()
#     return data   
   

    

#### save_tweet_into_file accepts the JSON data and save it into file named tweets.json

In [54]:
def save_tweets_into_file(data):

    data=data
    
    with jsonlines.open('tweets.jsonl', mode='w') as f:
        for i in data:
            f.write(i)

#### main function to call the required functions in an oderly fashion to get optimum results

In [55]:
def main():

    twi_handle = "@midasIIITD"
    twi_handle2 = "midasIIITD"
    URL = 'https://api.twitter.com/1.1/statuses/user_timeline.json'

    auth = twi_auth()
    no_of_tweets = twi_count(twi_handle)
    data = twitterfeed(auth, no_of_tweets, twi_handle2)
    save_tweets_into_file(data)


if __name__ == '__main__':
    main()

# Displaying the JSON in Tabular Form

#### Function to count the Media in a Tweet

In [30]:
def mediaCount(itr):
    
    count = 0
    
    if 'extended_entities' in itr:
        try:
            count += len(itr['extended_entities']['media'])
        except KeyError:
            pass
    
    elif 'media' in itr['entities']:
        count += len(itr['entities']['media'])
    
    # Need to check if there is media available in retweet
    if 'retweeted_status' in itr:
        count += mediaCount(itr['retweeted_status'])
    
    return count

#### Using BeautifulTable to add Data and print it.

In [33]:
table = BeautifulTable()
table.column_headers = ['Text', 'Date and Time', 'number of favorites/likes', 'number of retweets', 'No. of images']

with open('tweets.json', 'r') as f:
    json_data = f.read()

data = json.loads(json_data)

for itr in data:
        
    count = mediaCount(itr)
    if count == 0:
        count = None

    tweet_data = [itr['full_text'], itr['created_at'], itr['favorite_count'], itr['retweet_count'],
                  count]
    
    table.append_row(tweet_data)
    
print(table)

+--------------------------------------------------+-------+------+-----+------+
|                       Text                       | Date  | numb | num | No.  |
|                                                  | and T | er o | ber | of i |
|                                                  |  ime  | f fa |  of | mage |
|                                                  |       | vori |  re |  s   |
|                                                  |       | tes/ | twe |      |
|                                                  |       | like | ets |      |
|                                                  |       |  s   |     |      |
+--------------------------------------------------+-------+------+-----+------+
| RT @kdnuggets: Top 8 #Free Must-Read #Books on # | Sat A |  0   |  2  | None |
|    DeepLearning #KDN https://t.co/1DtlN91Yjj     | pr 06 |      |     |      |
|                                                  |  17:1 |      |     |      |
|                           