In [7]:
#Imports
import plotly.graph_objects as go
import pandas as pd
import json
from collections import Counter
import plotly
import plotly.offline as offline

In [8]:
#Preprocessing functions
def load_file(name):
    with open(name) as f:
      file_data = [json.loads(line) for line in f]
    return file_data

def load_data(file_names):
    data = []
    for name in file_names:
        data = data + load_file('data/'+name)
    return data

def count_tweets(data, df):
    c = Counter(line['location']['country'] for line in data if line['location'] is not None)
    for line in range(len(df)):
        name = df.at[line,"COUNTRY"]
        df.at[line,"COUNT"] = c[name]
        
def count_keywords(data, df):    
    for line in range(len(df)):
        name = df.at[line,"COUNTRY"]
        c2 =Counter(line['keywords'][0] for line in data if line['location'] is not None and line['location']['country'] == name)
        for key in c2:
            df.at[line,key] = c2[key]


In [9]:
#Data load and preprocessing
file_names = {'covid1.json', 'covid2.json', 'covid3.json',
             'covid4.json', 'covid5.json', 'covid6.json',
             'covid7.json','covid8.json'}
df = pd.read_csv('data/world.csv',header=0, sep=r'\s*;\s*', engine='python')
data_tab = []
for name in file_names:
    data = load_file('data/' + name)
    count_tweets(data, df)
    count_keywords(data, df)
    name = 'out'+ name[5]
    df.to_csv(name + '.csv', index=False)
    out = pd.read_csv(name +'.csv', sep=r'\s*,\s*', engine='python')
    data_tab.append(out)
data = load_data(file_names)
count_tweets(data, df)
count_keywords(data, df)
df.to_csv('total.csv', index=False)
out = pd.read_csv('total.csv', sep=r'\s*,\s*', engine='python')
data_tab.append(out)

In [10]:
#Plot
data_slider = []
for out in data_tab:
    for col in out.columns:
        out[col] = out[col].astype(str)
    words = {'coronavirus','sars','wuhan','2019ncov','mers','wuflu','sars2',
             'covid19','covid','covid-19','#coronavirus','sarscov19','2019-ncov'}
    out['text'] = ""
    for word in words:
        if word in out.columns:
            out['text'] +='<br>' + word +': ' + out[word]

    data_one_day=dict(
        type='choropleth',
        locations = out['CODE'],
        z = out['COUNT'],
        text = out['text'],
        colorscale = 'Reds',
        autocolorscale=False,
        marker_line_color='black',
        marker_line_width=0.5,
        colorbar_title = 'Number of tweets',
    )
    data_slider.append(data_one_day)
    
steps = []

for i in range(len(data_slider)-1):
    step = dict(method='restyle',
                args=['visible', [False] * len(data_slider)],
                label='Day {}'.format(i)) 
    step['args'][1][i] = True
    steps.append(step)

step = dict(method='restyle',
            args=['visible', [False] * len(data_slider)],
            label='Total') 
step['args'][1][len(data_slider)-1] = True
steps.append(step)

sliders = [dict(active=0, pad={"t": 1}, steps=steps)]  
layout = dict(
    title_text='COVID TWEETS',
    sliders=sliders,
    geo=dict(
        showframe=False,
        showcoastlines=True,
        projection_type='equirectangular'
    ),
    annotations = [dict(
        x=0.55,
        y=0.1,
        text='Source: <a href="https://zenodo.org/record/4053573#.X33rQmgzbIX">\
            Zenodo.org</a>',
        showarrow = False
    )]
)
fig = dict(data=data_slider, layout=layout) 
#plotly.offline.iplot(fig)
offline.plot(fig, auto_open=True, filename='html/index.html', validate=True)



'html/index2.html'