# Analysis on the number of twitters in different states of US in first 30 months

In this notebook, we would plot the choropleths number of the tweets in each state states in each week. We would also plot the percentage of change in the number of tweets in certain weeks compare to the average number of tweets in that states <br>
The significant increase in the number of tweets during the US presidents election which would affects the average number of tweets. We would only use 30 weeks as the total time period range from March 19 to the middle of October and the average number of tweets in each states would be calculated for the 30 weeks.

In [2]:
#import the important library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json as js
import ast
import folium
import re
from datetime import datetime
from datetime import timedelta
import branca.colormap as cm
from branca.colormap import linear
from folium.plugins import TimeSliderChoropleth
import geopandas as gpd

In [3]:
#use the written gird geojson data
state_df = gpd.read_file('us-state.json')

In [6]:
#read the file of tweets by country
covid_tweets_states_df = pd.read_csv("us_states_weekly_counts.csv")
del covid_tweets_states_df['Unnamed: 0']

In [7]:
joined_df = pd.merge(covid_tweets_states_df,state_df,how = 'outer',left_on = 'States',right_on = 'id')
del joined_df['id']
joined_df =joined_df[joined_df['Week_index']<31]
joined_df

Unnamed: 0,States,Count,Week_index,name,geometry
0,CA,691.000000,0,California,"POLYGON ((-123.23326 42.00619, -122.37885 42.0..."
1,CA,531.571429,1,California,"POLYGON ((-123.23326 42.00619, -122.37885 42.0..."
2,CA,383.428571,2,California,"POLYGON ((-123.23326 42.00619, -122.37885 42.0..."
3,CA,229.285714,3,California,"POLYGON ((-123.23326 42.00619, -122.37885 42.0..."
4,CA,824.857143,4,California,"POLYGON ((-123.23326 42.00619, -122.37885 42.0..."
...,...,...,...,...,...
2376,WY,6.285714,26,Wyoming,"POLYGON ((-109.08084 45.00207, -105.91517 45.0..."
2377,WY,5.428571,27,Wyoming,"POLYGON ((-109.08084 45.00207, -105.91517 45.0..."
2378,WY,11.142857,28,Wyoming,"POLYGON ((-109.08084 45.00207, -105.91517 45.0..."
2379,WY,11.285714,29,Wyoming,"POLYGON ((-109.08084 45.00207, -105.91517 45.0..."


In [8]:
#function to get the time in second for the area
def get_date_index(df):
    if df['Week_index'] == 0:
        return int(datetime.strptime('2020-03-19', '%Y-%m-%d').timestamp())
    else:
        timedel = timedelta(weeks = df['Week_index'] )
        time_week = datetime.strptime('2020-03-19', '%Y-%m-%d') + timedel
        return int(time_week.timestamp())
    
joined_df['Date_Sec'] = joined_df.apply(get_date_index,axis = 1)

In [9]:
#create a dictionary to store the average of each grid
state_avg_dict = {}
state_list = joined_df['States'].unique().tolist()
state_idx = range(len(state_list))

for i in state_idx:
    id = state_list[i]
    temp_df = joined_df[joined_df['States'] == id]
    temp_avg = temp_df['Count'].mean()
    state_avg_dict[id] = temp_avg
    
joined_df['State_Avg'] = joined_df['States'].apply(lambda x: state_avg_dict[x])                                       

joined_df

Unnamed: 0,States,Count,Week_index,name,geometry,Date_Sec,State_Avg
0,CA,691.000000,0,California,"POLYGON ((-123.23326 42.00619, -122.37885 42.0...",1584590400,1411.824885
1,CA,531.571429,1,California,"POLYGON ((-123.23326 42.00619, -122.37885 42.0...",1585195200,1411.824885
2,CA,383.428571,2,California,"POLYGON ((-123.23326 42.00619, -122.37885 42.0...",1585800000,1411.824885
3,CA,229.285714,3,California,"POLYGON ((-123.23326 42.00619, -122.37885 42.0...",1586404800,1411.824885
4,CA,824.857143,4,California,"POLYGON ((-123.23326 42.00619, -122.37885 42.0...",1587009600,1411.824885
...,...,...,...,...,...,...,...
2376,WY,6.285714,26,Wyoming,"POLYGON ((-109.08084 45.00207, -105.91517 45.0...",1600315200,8.018433
2377,WY,5.428571,27,Wyoming,"POLYGON ((-109.08084 45.00207, -105.91517 45.0...",1600920000,8.018433
2378,WY,11.142857,28,Wyoming,"POLYGON ((-109.08084 45.00207, -105.91517 45.0...",1601524800,8.018433
2379,WY,11.285714,29,Wyoming,"POLYGON ((-109.08084 45.00207, -105.91517 45.0...",1602129600,8.018433


In [16]:
#create a function to get the percentage change in the value
def get_the_change_percentage(df):
    
    current_state_avg = df['State_Avg']
    
    #if the avg is zero which means there is nothing in the places
    if current_state_avg == 0.0:
        return 0.0
    else:
        current_state_count =df['Count']
        delta = (current_state_count - current_state_avg)/current_state_avg*100
        return delta
    
joined_df['Normalized_change']=joined_df.apply(get_the_change_percentage,axis = 1) 

In [17]:
#create geo info stirng
pls_df = joined_df[['geometry']]
pls_gdf = gpd.GeoDataFrame(pls_df)
pls_gdf = pls_gdf.drop_duplicates().reset_index() 

In [18]:
#function to get the opacity of the area
def get_opacity(df):
    if df['Count'] == 0:
        return 0.00
    else:
        return 0.7
joined_df['Opacity'] = joined_df.apply(get_opacity,axis = 1)

#get the color code
max_colour = max(joined_df['Count'])
min_colour = min(joined_df['Count'])
cmap = cm.linear.YlOrRd_07.scale(min_colour, max_colour)
joined_df['Colour'] = joined_df['Count'].map(cmap)

#create style dictionary
idf_list = joined_df['States'].unique().tolist()
idf_idx = range(len(idf_list))

style_dict = {}
for i in idf_idx:
    id = idf_list[i]
    result = joined_df[joined_df['States'] == id]
    inner_dict = {}
    for c, r in result.iterrows():
        inner_dict[r['Date_Sec']] = {'color': r['Colour'], 'opacity': r['Opacity']}
    style_dict[str(i)] = inner_dict


In [19]:
#create and add the info into the bar
slider_map = folium.Map(location=[36.648693, -95.8974445],max_bounds=True,tiles = 'cartodbpositron',zoom_control=False, zoom_start=4)

widget = TimeSliderChoropleth( 
    data=pls_gdf.to_json(),
    styledict=style_dict,

).add_to(slider_map)
cmap.caption = 'Number of tweets in the area'
widget = cmap.add_to(slider_map)

In [20]:
slider_map

In [23]:
slider_map.save("30_weeks_states_total.html")

In [24]:
#function to get the opacity of the area
def get_opacity(df):
    if df['Normalized_change'] == 0:
        return 0.00
    else:
        return 0.7
joined_df['Opacity'] = joined_df.apply(get_opacity,axis = 1)
joined_df


#get the color code
max_colour = max(joined_df['Normalized_change'])
min_colour = min(joined_df['Normalized_change'])
cmap = cm.linear.RdBu_11.scale(min_colour, max_colour)
joined_df['Colour'] = joined_df['Normalized_change'].map(cmap)

#create style dictionary
idf_list = joined_df['States'].unique().tolist()
idf_idx = range(len(idf_list))

style_dict = {}
for i in idf_idx:
    id = idf_list[i]
    result = joined_df[joined_df['States'] == id]
    inner_dict = {}
    for c, r in result.iterrows():
        inner_dict[r['Date_Sec']] = {'color': r['Colour'], 'opacity': r['Opacity']}
    style_dict[str(i)] = inner_dict


In [25]:
#create and add the info into the bar
slider_map = folium.Map(location=[36.648693, -95.8974445],max_bounds=True,tiles = 'cartodbpositron',zoom_control=False, zoom_start=4)

widget = TimeSliderChoropleth( 
    data=pls_gdf.to_json(),
    styledict=style_dict,

).add_to(slider_map)
cmap.caption = 'Pertcent of change in the number of tweets in the area'
widget = cmap.add_to(slider_map)

In [26]:
slider_map

In [27]:
slider_map.save("30_weeks_states_avg.html")