# Data Visualization 2 - Madrid Weather Data Visualization

In [1]:
 # import libs
import plotly.express as px
import pandas as pd
import numpy as np
import plotly.io as pio
from plotly.subplots import make_subplots
from dash import Dash 
from dash import html, dcc 
from dash import callback, Output, Input
from datetime import datetime
pio.templates.default = 'plotly_white'

In [2]:
# create df for the wine tasting dataset
df = pd.read_csv('https://raw.githubusercontent.com/viethngn/CEU_MSc_BA_ECBS5251_Data_Visualization_2/main/final_project/Madrid_Daily_Weather%201997-2015.csv')
df.head().T;

In [3]:
# filter missing data
df = df[(df['Mean TemperatureC'].notna())]

# add time columns
df['month'] = df.CET.apply(lambda x: int(datetime.strptime(x, '%Y-%m-%d').strftime('%m')))
df['year'] = df.CET.apply(lambda x: int(datetime.strptime(x, '%Y-%m-%d').strftime('%Y')))
df['month_year'] = df.CET.apply(lambda x: datetime.strptime(x, '%Y-%m-%d').strftime('%b-%Y'))
df['event_cat'] = df[' Events']
df.head().T

Unnamed: 0,0,1,2,3,4
CET,1997-01-01,1997-01-02,1997-01-03,1997-01-04,1997-01-05
Max TemperatureC,7.0,7.0,5.0,7.0,2.0
Mean TemperatureC,4.0,3.0,3.0,3.0,0.0
Min TemperatureC,2.0,0.0,2.0,-1.0,-1.0
Dew PointC,5.0,6.0,5.0,-2.0,2.0
MeanDew PointC,3.0,3.0,1.0,-3.0,0.0
Min DewpointC,2.0,0.0,-1.0,-4.0,-3.0
Max Humidity,100.0,100.0,100.0,86.0,100.0
Mean Humidity,95.0,92.0,85.0,63.0,95.0
Min Humidity,76.0,71.0,70.0,49.0,86.0


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6809 entries, 0 to 6811
Data columns (total 27 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   CET                          6809 non-null   object 
 1   Max TemperatureC             6809 non-null   float64
 2   Mean TemperatureC            6809 non-null   float64
 3   Min TemperatureC             6809 non-null   float64
 4   Dew PointC                   6809 non-null   float64
 5   MeanDew PointC               6809 non-null   float64
 6   Min DewpointC                6809 non-null   float64
 7   Max Humidity                 6809 non-null   float64
 8    Mean Humidity               6809 non-null   float64
 9    Min Humidity                6809 non-null   float64
 10   Max Sea Level PressurehPa   6809 non-null   int64  
 11   Mean Sea Level PressurehPa  6809 non-null   int64  
 12   Min Sea Level PressurehPa   6809 non-null   int64  
 13   Max VisibilityKm      

In [5]:
event_df = df.groupby(['month_year', ' Events'])['event_cat'].count()
event_df = event_df.reset_index()
event_df['ln_count'] = np.log(event_df['event_cat'])
event_df['year'] = event_df.month_year.apply(lambda x: int(str(x).split('-')[1]))
event_df['month_year'] = pd.to_datetime(event_df['month_year'], format="%b-%Y")
# event_df = event_df[(event_df[' Events'].isin(['Rain', 'Thunderstorm', 'Fog', 'Snow', 'Tornado']))]
event_df.head().T

Unnamed: 0,0,1,2,3,4
month_year,1997-04-01 00:00:00,1997-04-01 00:00:00,1998-04-01 00:00:00,1998-04-01 00:00:00,1998-04-01 00:00:00
Events,Rain,Rain-Thunderstorm,Fog,Rain,Rain-Hail-Thunderstorm
event_cat,7,1,2,12,1
ln_count,1.94591,0.0,0.693147,2.484907,0.0
year,1997,1997,1998,1998,1998


In [6]:
# support function to create dual y-aixs plot
def plotly_dual_axis(data, title="", y1="", y2="", x=""):
    # Create subplot with secondary axis
    subplot_fig = make_subplots(specs=[[{"secondary_y": True}]])

    #Put Dataframe in fig1 and fig2
    fig1 = px.line(data, x=x, y=y1, markers = True)
    fig2 = px.line(data, x=x, y=y2, markers = True)
    #Change the axis for fig2
    fig2.update_traces(yaxis="y2")

    #Add the figs to the subplot figure
    subplot_fig.add_traces(fig1.data + fig2.data)

    #FORMAT subplot figure
    subplot_fig.update_layout(title=title, yaxis=dict(title=y1), yaxis2=dict(title=y2), xaxis=dict(title=x))

    #RECOLOR so as not to have overlapping colors
    subplot_fig.for_each_trace(lambda t: t.update(line=dict(color=t.marker.color)))


    return subplot_fig

In [13]:
# create the Dash app
temp_title = "Temperature (Celcius)"
wind_title = "Wind Speed (Km/h)"
time_title = "Date"

fig = plotly_dual_axis(df, title="Temperature & Wind Speed by Month in 2005", x='CET', y1='Mean TemperatureC', y2=' Mean Wind SpeedKm/h')
fig2 = px.bar(event_df, x='month_year', y='event_cat', color=' Events',
              title="Weather Events by Month in 2005",
                labels={
                          'month_year': 'Month'
                        },
                barmode = 'group')
fig2.update_layout(title_x = 0.5, xaxis_tickformat="%b %Y")

app = Dash(__name__)

app.layout = html.Div([
    html.H1('Madrid Weather Dashboard', style={'textAlign':'center', "font-family": "sans-serif", "font-size":"36px"}),
    html.Label('Set the year:', style={'font-weight': 'bold', "text-align": "center", 
                                                "font-family": "sans-serif"}),
    dcc.Dropdown(id='year', options=df.year.unique(), value=2005, style={'width':'30%'}),
    html.Label('Set the range for month:', style={'font-weight': 'bold', "text-align": "center", 
                                                "font-family": "sans-serif"}),
    dcc.RadioItems( id='plot_mode', options=['Tempurature', 'Wind speed', 'Dual Temp & Wind'], value='Dual Temp & Wind', inline=True, style={"font-family": "sans-serif"}),
    dcc.Graph(id = 'temp_wind_graph', figure = fig),
    dcc.Graph(id = 'event_graph', figure = fig2),
])

@app.callback(
    Output('temp_wind_graph', 'figure'),
    Output('event_graph', 'figure'),
    Input('year','value'),
    Input('plot_mode','value')
)

def update(_year, _plot_mode):
    if _plot_mode == 'Dual Temp & Wind':
        fig = plotly_dual_axis(df[(df.year == _year)], 
                               title="test", x='CET', y1='Mean TemperatureC', y2=' Mean Wind SpeedKm/h')
        fig.update_layout(title = f"Temperature & Wind Speed by Month in {_year}", title_x = 0.5)
        fig.update_layout(yaxis=dict(title=temp_title), yaxis2=dict(title=wind_title), xaxis=dict(title=time_title))
    elif _plot_mode == 'Tempurature':
        fig = px.line(df[(df.year == _year)], 
                      x='CET', y='Mean TemperatureC', 
                      labels={
                          'CET': time_title,
                          'Mean TemperatureC': temp_title
                      }, markers = True)
        fig.update_layout(title = f"Temperature by Month in {_year}", title_x = 0.5)
    else:
        fig = px.line(df[(df.year == _year)], 
                      x='CET', y=' Mean Wind SpeedKm/h',
                      labels={
                          'CET': time_title,
                          ' Mean Wind SpeedKm/h': wind_title
                      }, markers = True)
        fig.update_traces(line_color='red')
        fig.update_layout(title = f"Wind Speed by Month in {_year}", title_x = 0.5)
    
    fig2 = px.bar(event_df[(event_df.year == _year)], x='month_year', y='event_cat', color=' Events',
                  title=f"Weather Events by Month in {_year}",
                  labels={
                        'month_year': 'Month',
                        'event_cat': 'Number of Events'
                      },
                barmode = 'group')
    fig2.update_layout(title_x = 0.5, xaxis_tickformat="%b %Y")
    
    return fig, fig2

app.run(debug = True)