In [None]:
from selenium import webdriver
from time import sleep
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
import csv
from getpass import getpass
import pandas as pd 
import dash
import dash_core_components as dcc
import dash_html_components as html
import dash_table
import dash_renderer
from dash.dependencies import Input, Output, State
import re
import sys
import os

In [None]:
def get_tweet_info(tweet):
    
    #Extract data from tweet element
    
    #since username is the first span in the tree using the simple way
    username = tweet.find_element_by_xpath('.//span').text
    postdate = tweet.find_element_by_xpath('.//time').get_attribute('datetime')
    
    #grab first  2 divs seperately and concatenate them for the text
    comment = tweet.find_element_by_xpath('.//div[2]/div[2]/div[1]').text
    responding = tweet.find_element_by_xpath('.//div[2]/div[2]/div[2]').text
    text = comment + responding
    #fetching reply , retweet and like count bt just simple data-testid attribute
    reply_cnt = tweet.find_element_by_xpath('.//div[@data-testid="reply"]').text
    retweet_cnt = tweet.find_element_by_xpath('.//div[@data-testid="retweet"]').text
    like_cnt = tweet.find_element_by_xpath('.//div[@data-testid="like"]').text
    
    tweet = (username, postdate, text,reply_cnt, retweet_cnt, like_cnt)
    return tweet

In [None]:
# application variables

user = input('username: ')
my_password = getpass('Password: ')

# create instance of web driver
browser = webdriver.Firefox()

# navigate to login screen
browser.get('https://www.twitter.com/login')

#login
username = browser.find_element_by_xpath('//input[@name="session[username_or_email]"]')
username.send_keys(user)

password = browser.find_element_by_xpath('//input[@name="session[password]"]')
password.send_keys(my_password)
password.send_keys(Keys.RETURN)
sleep(5)

# find search input and search for term
search_input = browser.find_element_by_xpath('//input[@aria-label="Search query"]')
search_input.send_keys("#requestforstartup")
search_input.send_keys(Keys.RETURN)
sleep(2)

In [None]:
data = []
tweet_ids = set() #preventing the duplicate tweets 
last_position = browser.execute_script("return window.pageYOffset;")
scrolling = True

while scrolling:
    page_cards = browser.find_elements_by_xpath('//div[@data-testid="tweet"]')
    for card in page_cards[-15:]:#we dont have to check all tweets while scrolling long process
        tweet = get_tweet_info(card)
        if tweet:
            tweet_id = ''.join(tweet)
            if tweet_id not in tweet_ids:
                tweet_ids.add(tweet_id)
                data.append(tweet)
            
    scroll_attempt = 0
    while True:
        # check scroll position
        browser.execute_script('window.scrollTo(0, document.body.scrollHeight);')
        sleep(2) #wait for the page load
        curr_position = browser.execute_script("return window.pageYOffset;")
        if last_position == curr_position:
            scroll_attempt += 1
            
            # end of scroll region
            if scroll_attempt >= 3:
                scrolling = False
                break
            else:
                sleep(2) # attempt another scroll
        else:
            last_position = curr_position
            break

# close the web driver
browser.close()

In [None]:
#saving data in a csv file
with open('allTweets.csv', 'w', newline='', encoding='utf-8') as f:
    header =['Username', 'postdate', 'text','comments','Retweets','Likes']
    writer = csv.writer(f)
    writer.writerow(header)
    writer.writerows(data)

In [None]:
#creating a dataframe from csv 
data =pd.read_csv('allTweets.csv')
df = pd.DataFrame(data,
                   columns=['Username', 'postdate', 'text','comments','Retweets','Likes'])
#filling NaN values with 0
df = df.fillna(0)

#converting "1K or 1M" values to integer 
df['comments'] = df['comments'].replace({'K': '*1e3', 'M': '*1e6'}, regex=True).map(pd.eval).astype(int)
#converting entire column to integer to use with sort function correctly
df['comments'] = df['comments'].astype(int)

#Retweets
df['Retweets'] = df['Retweets'].replace({'K': '*1e3', 'M': '*1e6'}, regex=True).map(pd.eval).astype(int)
df['Retweets'] = df['Retweets'].astype(int)

#likes 
df['Likes'] = df['Likes'].replace({'K': '*1e3', 'M': '*1e6'}, regex=True).map(pd.eval).astype(int)
df['Likes'] = df['Likes'].astype(int)

#sorting  df by multiple keys comment , retweets and likes
df = df.sort_values(['comments','Likes','Retweets'], ascending=False)
df.head(10)

In [None]:
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(_name_, external_stylesheets=external_stylesheets)



app.layout = html.Div([
    html.Div([
       
                   dcc.Markdown('''
              > This  software searches Twitter for the term *request for startup* and extracts relevant information about tweets,
              >such as author, date, tweet contents, number of likes, number of retweets and number of discussions.
              > *Tweets are sorted by total number of retweets, likes, and discussions.*
              >Created by Aysel Havutcu.
         ''')  
    ],style={'background':' #c4e3ed'}),

    dash_table.DataTable(
     style_data={
        'whiteSpace': 'normal',
        'height': 'auto',
        'lineHeight': '15px'
    },
    data=df.to_dict('records'),
    columns=[{'id': c, 'name': c} for c in df.columns],
    style_table={'minWidth': '100%','height': 500,'overflowY': 'auto'},
    css=[{
        'selector': '.dash-spreadsheet td div',
        'rule': '''
            line-height: 15px;
            max-height: 30px; min-height: 30px; height: 30px;
            display: block;
            overflow-y: hidden;
        '''
    }],
    
    tooltip_data=[#the tooltip will persist as long as the mouse pointer is above the cell, and it will disappear when the pointer moves away.
        {
            column: {'value': str(value), 'type': 'markdown'}
            for column, value in row.items()
        } for row in df.to_dict('rows')
    ],
    tooltip_duration=None,
    #styling the table
    style_cell={'textAlign': 'left'}, # left align text in columns for readability
     style_cell_conditional=[
        {
            'if': {'column_id': c},
            'textAlign': 'left'
        } for c in ['Date', 'Region']
    ],
    style_data_conditional=[
        {
            'if': {'row_index': 'odd'},
            'backgroundColor': 'rgb(248, 248, 248)'
        }
    ],
    style_header={
        'backgroundColor': 'rgb(230, 230, 230)',
        'fontWeight': 'bold'
    }
)

])

if _name_ == '_main_':
    app.run_server(debug=False)