In [1]:
# Import relevant libraries
import pandas as pd
import panel as pn
pn.extension('tabulator')
import hvplot.pandas
import datetime as dt
from datetime import datetime

import warnings
warnings.filterwarnings("ignore")

# DASHBOARD


In [2]:
# Read trend data to create interactive panels
filepath = "tweets_output/tweets_trend.csv"
tweets_data = pd.read_csv(filepath, parse_dates = True, index_col = 'timestamp')

In [3]:
# Reset index
tweets_data.reset_index(inplace=True)

In [5]:
# Get last 5 results
tweets_data.tail()

Unnamed: 0,timestamp,Negative,Neutral,Positive,score
1315,2009-06-01 01:00:00,809.0,1571.166667,2083.833333,0.285581
1316,2009-06-01 02:00:00,819.333333,1578.541667,2094.083333,0.283785
1317,2009-06-01 03:00:00,829.4375,1587.166667,2104.458333,0.282018
1318,2009-06-01 04:00:00,837.125,1591.166667,2113.625,0.281049
1319,2009-06-01 05:00:00,840.604167,1587.458333,2113.041667,0.280204


In [6]:
# Define sentiment datafrane
db_sentiment = tweets_data.drop(columns=['score'])

In [7]:
# melt dataframe to get each sentiment per hour
db_sentiment = pd.melt(db_sentiment, id_vars = ['timestamp']).sort_values(by=['timestamp','variable'], ignore_index=True)

In [8]:
# show first 5 rows
db_sentiment.head()

Unnamed: 0,timestamp,variable,value
0,2009-04-07 06:00:00,Negative,367.741165
1,2009-04-07 06:00:00,Neutral,643.466087
2,2009-04-07 06:00:00,Positive,818.376612
3,2009-04-07 07:00:00,Negative,365.718303
4,2009-04-07 07:00:00,Neutral,640.635416


In [12]:
# Define interactive DataFrame
idb_sentiment = db_sentiment.interactive()
idb_sentiment.head()



In [13]:
# Rename columns
idb_sentiment = idb_sentiment.rename(columns = {'variable':'Sentiment','value':'Counts', 'timestamp':'Timestamp'})

In [14]:
# Get first datetime recorded
idb_sentiment.Timestamp.min()



In [15]:
# Get last datetime recorded
idb_sentiment.Timestamp.max()



In [16]:
# Define Panel widgets date slicer with min and max datetime recorded
day_slider = pn.widgets.DateSlider(name='day slider', start=dt.datetime(2009,4,7), end=dt.datetime(2009,6,1), step=1, value=dt.date(2009,5,7))



In [17]:
# Show slider
day_slider

In [18]:
# Create pipeline for sentiment count over time
sentiment_pipeline = (
    idb_sentiment[
        (idb_sentiment.Timestamp.dt.date <= day_slider)
    ]
    .groupby(['Sentiment', 'Timestamp'])['Counts'].mean()
    .to_frame()
    .reset_index()
    .sort_values(by='Timestamp')  
    .reset_index(drop=True)
)

In [19]:
# Show results
sentiment_pipeline.head()

In [20]:
# Define sentiment count plot
sentiment_plot = sentiment_pipeline.hvplot(x='Timestamp',
                                           by='Sentiment',
                                           y='Counts',
                                           line_width=1,
                                           title="sentiment over time",
                                           color=['#fc4f30', '#e5ae38', '#30a2da'])

In [21]:
# Plot sentiment count over time
sentiment_plot

In [22]:
# Define sentiment score trend over time
tweets_trend = tweets_data[['timestamp', 'score']]
tweets_trend.head()

Unnamed: 0,timestamp,score
0,2009-04-07 06:00:00,0.246305
1,2009-04-07 07:00:00,0.246367
2,2009-04-07 08:00:00,0.246429
3,2009-04-07 09:00:00,0.246492
4,2009-04-07 10:00:00,0.246555


In [23]:
# Define interactive dataframe for sentiment score data
idf_trend = tweets_trend.interactive()

In [24]:
# Rename columns
idf_trend = idf_trend.rename(columns = {'score':'Score', 'timestamp':'Timestamp'})

In [25]:
# show first 5 results
idf_trend.head()



In [26]:
# Create pipeline for sentiment score over time
trend_pipeline = (
    idf_trend[
        (idf_trend.Timestamp.dt.date <= day_slider)
    ]
    .groupby(['Timestamp'])['Score'].sum()
    .reset_index()
    .sort_values(by='Timestamp')  
    .reset_index(drop=True)
)

In [27]:
# Show results
trend_pipeline.head()

In [28]:
# Define trend plot
trend_plot = trend_pipeline.hvplot(x = 'Timestamp', y='Score',line_width=1, title="trend over time", color='teal')

In [29]:
# plot trend data over time
trend_plot

In [30]:
# Define sentiment totals dataframe
db_sentiment_sum = tweets_data.set_index('timestamp').resample('D').sum()
db_sentiment_sum.drop(columns=['score'], inplace=True)

In [31]:
# Reset index and show results
db_sentiment_sum = db_sentiment_sum.reset_index()
db_sentiment_sum.head()

Unnamed: 0,timestamp,Negative,Neutral,Positive
0,2009-04-07,6335.356322,11175.943748,14153.690556
1,2009-04-08,7428.065476,13479.976687,16788.874008
2,2009-04-09,6853.714286,12513.571429,15583.428571
3,2009-04-10,6305.142857,11578.714286,14422.285714
4,2009-04-11,5756.571429,10643.857143,13261.142857


In [32]:
# Melt sentiment counts to show sentiment for each hour
db_sentiment_sum = pd.melt(db_sentiment_sum, id_vars='timestamp').sort_values(by=['timestamp','variable'], ignore_index=True)
db_sentiment_sum.head()

Unnamed: 0,timestamp,variable,value
0,2009-04-07,Negative,6335.356322
1,2009-04-07,Neutral,11175.943748
2,2009-04-07,Positive,14153.690556
3,2009-04-08,Negative,7428.065476
4,2009-04-08,Neutral,13479.976687


In [33]:
# Define interactive dataframe for sentiment counts
idb_sentiment_sum = db_sentiment_sum.interactive()
idb_sentiment_sum.head()



In [34]:
# Rename columns and show results
idb_sentiment_sum = idb_sentiment_sum.rename(columns = {'variable':'Sentiment', 'timestamp':'Timestamp', 'value':'Totals'})
idb_sentiment_sum.head()



In [36]:
# Create pipeline for sentiment counts
totals_pipeline = (
    idb_sentiment_sum[
        (idb_sentiment_sum.Timestamp.dt.date == day_slider)
    ]
    .groupby(['Timestamp','Sentiment'])['Totals']
    .sum()
    .to_frame()     
    )

In [37]:
# show results
totals_pipeline

In [38]:
# Define sentinment totals plot
totals_plot = totals_pipeline.hvplot(kind='bar',
                                     title='Sentiment Total counts per day',
                                     color=['#fc4f30', '#e5ae38', '#30a2da']
                                    )


In [39]:
# plot sentiment totals 
totals_plot

In [40]:
# Read forecasting models data

filepath = "tweets_output/tweets_pred.csv"
df_pred = pd.read_csv(filepath, parse_dates = True, index_col = 'timestamp')

In [41]:
# Explore data structure
df_pred.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 589 entries, 2009-06-01 06:00:00 to 2009-06-25 18:00:00
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Unnamed: 0    589 non-null    int64  
 1   Actual Value  589 non-null    float64
 2   Arma          589 non-null    float64
 3   Arima         589 non-null    float64
 4   Forecaster    589 non-null    float64
dtypes: float64(4), int64(1)
memory usage: 27.6 KB


In [42]:
# Drop unnecessary columns
df_pred = df_pred.drop(columns=['Unnamed: 0'])

In [43]:
# reset index
df_pred = df_pred.reset_index()

In [44]:
# Melt dataframe to show each model predicted data
df_pred_melt = pd.melt(df_pred, id_vars = ['timestamp']).sort_values(by=['timestamp','variable'], ignore_index=True)

In [45]:
# Define interactive dataframe
idf_pred = df_pred_melt.interactive()

In [46]:
# Show results
idf_pred.head()



In [47]:
# Rename columns
idf_pred = idf_pred.rename(columns = {'variable':'Model', 'value':'Predicted Score', 'timestamp': 'Prediction timestamp'})

In [48]:
# Create pipeline for predicted values over time
pred_pipeline = (
    idf_pred
    .groupby(['Model','Prediction timestamp'])['Predicted Score'].sum()
    .to_frame()
    .reset_index()
    .sort_values(by='Prediction timestamp')  
    .reset_index(drop=True)
)


In [49]:
# Define predicted sentiment plot
pred_plot= pred_pipeline.hvplot(x='Prediction timestamp', 
                                by='Model', 
                                y='Predicted Score', 
                                line_width=1, 
                                title="Prediction for 3 weeks going forward"
                               )

In [50]:
# Plot results for predicted values over time
pred_plot

In [55]:
# Create template for interactive dashboard and deploy in the local server
template = pn.template.FastListTemplate(
    title='Interactive Dashboard', 
    sidebar=[pn.pane.Markdown("# Twitter Sentiment Analysis"),
             pn.pane.Markdown("## Word Cloud"),
             pn.pane.PNG('tweets_output/WordCloud.png', sizing_mode='scale_both'),
             pn.pane.PNG('tweets_output/WordCloud.png', sizing_mode='scale_both'),
             pn.pane.Markdown("## Settings"),   
             day_slider],
    main=[pn.Row(pn.Column(sentiment_plot.panel(width=900, height=400), margin=(0,1)),pn.Column(totals_plot.panel(width=500, height=400))),
          pn.Row(pn.Column(trend_plot.panel(width=700, height=400), margin=(0,1)), pn.Column(pred_plot.panel(width=700, height=400)))],
    header_background="#bbc4c8",
)
template.show()
template.servable();



Launching server at http://localhost:64631
