# Data Visualization 2 - Madrid Weather Data Visualization

### Overview
This Dash app tracks and provides the weather reports for the city of Madrid from 1997 to 2015.

The main data to track in the app are temperature, wind speed and weather events throughout each month of each year in the dashboard.

___
### Business Questions
The main questions that this Dash app tries to answer are:
1. When is the best time or season in the year to vist Madrid?

> From the dashboard, it seems like from April to May and from September to October each year are the best times to visit Madrid because:
> - The temperature is pretty warm
> - The wind is not too strong
> - Less rain & other extreme weather events

2. How often do extreme weather events happen in Madrid throughout the year?

> From the dashboard, it seems like there is no significant increase in the number of extreme weather occurrences.
> Still, rainy weather occurs pretty often from October to March every year.

___
### Data Source
The `Madrid Daily Weather` data source is downloaded from [Maven Analytics Data Playground](https://www.mavenanalytics.io/data-playground) and made available on Github to be integrated into this Jupiter notebook.

___
### Dash App Features
The Dash app provides some interactive features for granular viewing experience:
- A `Year` slider to quickly scroll through each year weather report
- A `Plot Mode` radio selection to toggle between singular or dual axes view of temperature and wind speed 


In [1]:
 # import libs
import plotly.express as px
import pandas as pd
import numpy as np
import plotly.io as pio
from plotly.subplots import make_subplots
from dash import Dash 
from dash import html, dcc 
from dash import callback, Output, Input
from datetime import datetime
pio.templates.default = 'plotly_white'

In [2]:
# create df for the Madrid Weather dataset
df = pd.read_csv('https://raw.githubusercontent.com/viethngn/CEU_MSc_BA_ECBS5251_Data_Visualization_2/main/final_project/Madrid_Daily_Weather%201997-2015.csv')
df.head().T;

In [3]:
# filter missing data
df = df[(df['Mean TemperatureC'].notna())]

# add time columns
df['month'] = df.CET.apply(lambda x: int(datetime.strptime(x, '%Y-%m-%d').strftime('%m')))
df['year'] = df.CET.apply(lambda x: int(datetime.strptime(x, '%Y-%m-%d').strftime('%Y')))
df['month_year'] = df.CET.apply(lambda x: datetime.strptime(x, '%Y-%m-%d').strftime('%b-%Y'))
# rename columns to remove spaces
df.rename(columns={" Events": "event"}, inplace=True)
# add duplicated columns for groupby
df['event_cat'] = df['event']
df.head().T;

In [4]:
# create a df for weather events
event_df = df.groupby(['month_year', 'event'])['event_cat'].count()
event_df = event_df.reset_index()
# add the time columns
event_df['year'] = event_df.month_year.apply(lambda x: int(str(x).split('-')[1]))
event_df['month_year'] = pd.to_datetime(event_df['month_year'], format="%b-%Y")
event_df.head().T;

In [5]:
# Support function to create dual y-axis plot
def plotly_dual_axis(data, title="", y1="", y2="", x=""):
    # Create subplot with secondary axis
    subplot_fig = make_subplots(specs=[[{"secondary_y": True}]])

    #Put Dataframe in fig1 and fig2
    fig1 = px.line(data, x=x, y=y1, markers = True)
    fig2 = px.line(data, x=x, y=y2, markers = True)
    #Change the axis for fig2
    fig2.update_traces(yaxis="y2")

    #Add the figs to the subplot figure
    subplot_fig.add_traces(fig1.data + fig2.data)

    #FORMAT subplot figure
    subplot_fig.update_layout(title=title, yaxis=dict(title=y1), yaxis2=dict(title=y2), xaxis=dict(title=x))

    #RECOLOR so as not to have overlapping colors
    subplot_fig.for_each_trace(lambda t: t.update(line=dict(color=t.marker.color)))


    return subplot_fig

In [6]:
# create the Dash app
# Store titles
temp_title = "Temperature (Celcius)"
wind_title = "Wind Speed (Km/h)"
time_title = "Date"

# Initialize the 2 plots
fig = plotly_dual_axis(df, title="Temperature & Wind Speed by Month in 2005", x='CET', y1='Mean TemperatureC', y2=' Mean Wind SpeedKm/h')
fig2 = px.bar(event_df, x='month_year', y='event_cat', color='event',
              title="Weather Events by Month in 2005",
                labels={
                          'month_year': 'Month'
                        },
                barmode = 'group')
fig2.update_layout(title_x = 0.5, xaxis_tickformat="%b %Y")

# Start Dash app
app = Dash(__name__)

app.layout = html.Div([
    # Header
    html.H1('Madrid Weather Dashboard', style={'textAlign':'center', "font-family": "sans-serif", "font-size":"36px"}),
    # Label for the Year slider
    html.Label('Set the year:', style={'font-weight': 'bold', "text-align": "center", 
                                                "font-family": "sans-serif"}),
    # Create the Year slider
    dcc.Slider(id='year', min=df.year.min(), max=df.year.max(), value=2005, step=1, included=False, marks={i: '{}'.format(i) for i in range(df.year.min(),df.year.max()+1)}),
    # Label for the plot mode
    html.Label('Set plot mode:', style={'font-weight': 'bold', "text-align": "center", 
                                                "font-family": "sans-serif"}),
    # Radio for plot mode selection
    dcc.RadioItems( id='plot_mode', options=['Tempurature', 'Wind speed', 'Dual Temp & Wind'], value='Dual Temp & Wind', inline=True, style={"font-family": "sans-serif"}),
    # Add the 2 plots
    dcc.Graph(id = 'temp_wind_graph', figure = fig),
    dcc.Graph(id = 'event_graph', figure = fig2),
])

# Create callback action
@app.callback(
    # Output for the 2 plots
    Output('temp_wind_graph', 'figure'),
    Output('event_graph', 'figure'),
    # Input from the slider and radio
    Input('year','value'),
    Input('plot_mode','value')
)

# Define the update function
def update(_year, _plot_mode):
    # Show dual y-axis plot
    if _plot_mode == 'Dual Temp & Wind':
        fig = plotly_dual_axis(df[(df.year == _year)], 
                               title="test", x='CET', y1='Mean TemperatureC', y2=' Mean Wind SpeedKm/h')
        fig.update_layout(title = f"Temperature & Wind Speed by Month in {_year}", title_x = 0.5)
        fig.update_layout(yaxis=dict(title=temp_title), yaxis2=dict(title=wind_title), xaxis=dict(title=time_title))
        
    # Show only plot for Temperature
    elif _plot_mode == 'Tempurature':
        fig = px.line(df[(df.year == _year)], 
                      x='CET', y='Mean TemperatureC', 
                      labels={
                          'CET': time_title,
                          'Mean TemperatureC': temp_title
                      }, markers = True)
        fig.update_layout(title = f"Temperature by Month in {_year}", title_x = 0.5)
        
    # Show only plot for Wind Speed
    else:
        fig = px.line(df[(df.year == _year)], 
                      x='CET', y=' Mean Wind SpeedKm/h',
                      labels={
                          'CET': time_title,
                          ' Mean Wind SpeedKm/h': wind_title
                      }, markers = True)
        fig.update_traces(line_color='red')
        fig.update_layout(title = f"Wind Speed by Month in {_year}", title_x = 0.5)
    
    # Update the Weather Event plot
    fig2 = px.bar(event_df[(event_df.year == _year)], x='month_year', y='event_cat', color='event',
                  title=f"Weather Events by Month in {_year}",
                  labels={
                        'month_year': 'Month',
                        'event_cat': 'Number of Events',
                        'event': 'Events'
                      },
                barmode = 'group')
    fig2.update_layout(title_x = 0.5, xaxis_tickformat="%b %Y")
    
    return fig, fig2

app.run(debug = True)