In [None]:
import re

import pandas as pd
pd.options.display.max_rows = 6
pd.set_option('display.precision', 2)

from IPython.display import display, Markdown

import plotly.express as px
import plotly.io as pio
pio.templates.default = 'plotly_white'

In [None]:
try:
    pd.read_csv('data/vast challenge 2011/Microblogs.csv')
except FileNotFoundError:
    VASTOPOLIS_URL = 'https://drive.google.com/file/d/1Y5xWPRKk8D_TNYmZ0ny4cQ83shGM5YqI/view?usp=sharing'
    VASTOPOLIS_URL = 'https://drive.usercontent.google.com/download?id={}&export=download&authuser=0&confirm=t'.format(VASTOPOLIS_URL.split('/')[-2])

    Messages = pd.read_csv(VASTOPOLIS_URL, encoding='latin', on_bad_lines='skip')

Messages.Created_at = pd.to_datetime(Messages.Created_at, format='%m/%d/%Y %H:%M', errors='coerce')

Messages.dropna(inplace=True)  # Created_at でエラーを生じるデータを捨てる
Messages.sort_values(by=['Created_at', 'ID'], inplace=True)

Messages[['latitude', 'longitude']] = Messages.Location.str.split(' ', n=1, expand=True).astype('float')
Messages.drop('Location', axis='columns', inplace=True)
Messages.longitude = -Messages.longitude

# Indexing the messages by the date/time of creation
Messages.index = Messages.Created_at

Messages

# Symptom-related Message Counts

In [None]:
Symptoms = ['fever', 'chill', 'sweat', 'ache', 'pain', 'fatigue', 'cough', 'breath', 'nausea', 'vomit']

for symptom in Symptoms:
    Messages[symptom] = Messages.text.str.contains(symptom)
Messages['symptoms'] = Messages[Symptoms].any(axis='columns')
Messages

In [None]:
ST_Fever = ['fever']
ST_Chill = 'chill, sweat, ache, fatigue, breath'.split(', ')
ST_Pain = 'pain, cough,v omit'.split(', ')
ST_Nausea = ['nausea']

In [None]:
import plotly.graph_objects as go

def symptom_trend(symptoms, startdate='2011-05-15'):
    messages = Messages.loc[startdate:].query('symptoms == True')

    fig = go.Figure(layout=go.Layout(title='Trend of symptoms', legend_title_text='Symptoms', xaxis={'title': 'Date-Hour'}, yaxis={'title': 'Symptoms'}, width=1600, height=1000))

    for symptom in symptoms:
        counts = messages.query(f'{symptom} == True').groupby(pd.Grouper(key='Created_at', freq='h')).size()
        fig.add_trace(go.Scatter(x=counts.index, y=counts.values / counts.values.max(), mode='lines', name=symptom))

    fig.show()

symptom_trend(Symptoms)

The first spike of fever, pain, ache is found on May 18.  However in comparison with previous days the counts of pain and ache are not that noticeable.

Fever and cough seem to be the right words to search for the first signs of the outbreak.

In [None]:
symptom_trend(['fever', 'cough'], startdate='2011-05-17')

# Analysis of the Geographic Distribution

In [None]:
Messages

In [None]:
FeverCough = Messages.loc['2011-05-18':].query('fever | cough').copy()
#FeverCough['time'] = FeverCough.Created_at.dt.time

bounds = dict(east=Messages.longitude.max(), west=Messages.longitude.min(), north=Messages.latitude.max(), south=Messages.latitude.min())

fig = px.scatter_mapbox(FeverCough, lat='latitude', lon='longitude')
fig.update_mapboxes(bounds=bounds)
fig.update_layout(mapbox_style='white-bg')
fig.show()

In [None]:
import datetime
origin = datetime.datetime(2011, 5, 18, 0, 0, 0)

FeverCough['hours'] = (FeverCough.Created_at - origin).dt.days * 24 + (FeverCough.Created_at - origin).dt.seconds // 3600
fig = px.scatter_mapbox(FeverCough, lat='latitude', lon='longitude', color='hours')
fig.update_mapboxes(bounds=bounds)
fig.update_layout(mapbox_style='white-bg')
fig.show()

In [None]:
FeverCough18 = Messages.loc['2011-05-18 00:00:00':].query('fever | cough').copy()

FeverCough18['hours'] = (FeverCough18.Created_at - origin).dt.seconds // 3600
fig = px.scatter_mapbox(FeverCough18.iloc[::-1], lat='latitude', lon='longitude', color='hours')
fig.update_mapboxes(bounds=bounds)
fig.update_layout(mapbox_style='white-bg')
fig.show()

In [None]:
def show_map(symptoms, start='2011-05-18 00:00:00', end='2011-05-20 23:59:59'):
    origin = datetime.datetime(2011, 5, 18, 0, 0, 0)

    symptoms = ' | '.join(symptoms)
    messages = Messages.loc[start:end].query(symptoms).copy()

    messages['hours'] = (messages.Created_at - origin).dt.days * 24 + (messages.Created_at - origin).dt.seconds // 3600
    fig = px.scatter_mapbox(messages.iloc[::-1], lat='latitude', lon='longitude', color='hours',
                            hover_name='ID', hover_data=['ID', 'text', 'hours'])
    print(fig.data[0].hovertemplate)
    fig.update_traces(hovertemplate='Hour %{customdata[2]} (ID: %{customdata[0]}):<br>%{customdata[1]}')
    fig.update_mapboxes(bounds=bounds)
    fig.update_layout(mapbox_style='white-bg')
    fig.show()

In [None]:
show_map(['fever', 'cough'], end='2011-05-18 09:00:00')

In [None]:
show_map(['vomit'])

In [None]:
def animate_map(symptoms, start='2011-05-18 00:00:00', end='2011-05-20 23:59:59'):
    origin = datetime.datetime(2011, 5, 18, 0, 0, 0)

    symptoms = ' | '.join(symptoms)
    messages = Messages.loc[start:end].query(symptoms).copy()

    messages['hours'] = (messages.Created_at - origin).dt.days * 24 + (messages.Created_at - origin).dt.seconds // 3600
    fig = px.scatter_mapbox(messages, lat='latitude', lon='longitude', #color='hours',
                            hover_name='ID', hover_data=['text', 'Created_at'],
                            animation_frame='hours')
    fig.update_mapboxes(bounds=bounds)
    fig.update_layout(mapbox_style='white-bg')
    # Adjustment of animation speed
    duration = 2000
    fig.layout.updatemenus[0].buttons[0].args[1]['frame']['duration'] = duration
    fig.layout.updatemenus[0].buttons[0].args[1]['transition']['duration'] = duration
    fig.show()

In [None]:
animate_map(['fever', 'cough'], end='2011-05-18 12:00:00')

Plotly mapbox layer not displaying image

https://stackoverflow.com/questions/78166289/plotly-mapbox-layer-not-displaying-image

In [None]:
animate_map(['vomit'], start='2011-05-19 18:00')