<a href="https://colab.research.google.com/github/utkuatay/Plotly-Dash-Visualization/blob/main/Plotly_Dash_Visualization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install darts

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import time
import plotly.tools as tls
from plotly.subplots import make_subplots
from plotly.offline import iplot
import plotly.graph_objects as go
#import plotly.offline as py
import plotly.express as px
from pandas.core.reshape.melt import melt
import plotly.io as pio

from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from darts import TimeSeries
from darts.models import *
from darts.metrics import *
from darts.dataprocessing.transformers import Scaler

In [None]:
ghg_emission = pd.read_excel("owid-co2-data.xlsx")
temp_data = pd.read_csv("temperature-anomaly.csv")
co2_data = pd.read_csv("CO2_Emissions.csv")

In [None]:
# Country & GHG Emission data cleaning
emission = ghg_emission[['country', 'year', 'co2', 'methane', 'nitrous_oxide', 'total_ghg']]
emission = emission[emission['year'] >= 1950]

emission_yearly = emission.groupby('year')['co2','methane', 'nitrous_oxide', 'total_ghg'].sum().reset_index()
print(emission_yearly)

    year         co2   methane  nitrous_oxide   total_ghg
0   1950   42047.473      0.00           0.00       0.000
1   1951   44816.864      0.00           0.00       0.000
2   1952   45556.830      0.00           0.00       0.000
3   1953   46842.317      0.00           0.00       0.000
4   1954   48173.601      0.00           0.00       0.000
..   ...         ...       ...            ...         ...
67  2017  234500.508  32843.70       12421.13  188757.789
68  2018  238741.382  33484.96       12361.10  193107.589
69  2019  239925.740  33980.52       12457.14  194614.819
70  2020  228766.484      0.00           0.00       0.000
71  2021  240609.645      0.00           0.00       0.000

[72 rows x 5 columns]



Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



In [None]:
temp_data.head()

Unnamed: 0,Entity,Code,Year,Global average temperature anomaly relative to 1961-1990,Upper bound (95% confidence interval) of the annual temperature anomaly,Lower bound (95% confidence interval) of the annual temperature anomaly
0,Global,,1850,-0.417659,-0.246115,-0.589203
1,Global,,1851,-0.23335,-0.054832,-0.411868
2,Global,,1852,-0.229399,-0.049416,-0.409382
3,Global,,1853,-0.270354,-0.1107,-0.430009
4,Global,,1854,-0.29163,-0.150436,-0.432824


In [None]:
# Temperature anomaly data cleaning
temp = temp_data[temp_data['Entity'] == 'Global']
temp = temp.rename(columns={'Year':'year', 'Global average temperature anomaly relative to 1961-1990':'anomaly'})
temp = temp[['year', 'anomaly']]
temp = temp[temp['year'] >= 1950]

In [None]:
# Co2 and Temperature Anomaly graph
fig = make_subplots(specs=[[{'secondary_y': True}]])

fig.add_trace(go.Scatter(x=emission_yearly['year'], y=emission_yearly['co2'], mode='lines+markers', name='Co2'),secondary_y=False)
fig.add_trace(go.Scatter(x=temp['year'], y=temp['anomaly'], mode='lines+markers', name='Temperature'),secondary_y=True)

fig.update_layout(title_text='Average GHG emissions (CO2) & Temperature Anomaly')
fig.update_xaxes(title_text= 'Year')
fig.update_yaxes(title_text='GHG emissions in million tonnes of Co2-equivalents', secondary_y=False)
fig.update_yaxes(title_text='Temperature Anomaly in Celicious', secondary_y=True)
fig.update_layout(legend=dict(x=0.05,y=0.9))
pio.write_html(fig, file=f'Co2 & anomaly.html', auto_open=True)
fig.show()



In [None]:
# N20 CH4 & Temperature Anomaly Graph
emission_yearly_2 = emission_yearly[(emission_yearly['year'] >= 1990) & (emission_yearly['year'] < 2020)]
temp_2 = temp[(temp['year'] >= 1990) & (temp['year'] < 2020)]

fig2 = make_subplots(specs=[[{'secondary_y': True}]])
fig2.add_trace(go.Scatter(x=emission_yearly_2['year'], y=emission_yearly_2['methane'], mode='lines+markers', name='Methane'),secondary_y=False)
fig2.add_trace(go.Scatter(x=emission_yearly_2['year'], y=emission_yearly_2['nitrous_oxide'], mode='lines+markers', name='N2O'),secondary_y=False) 
fig2.add_trace(go.Scatter(x=temp_2['year'], y=temp_2['anomaly'], mode='lines+markers', name='Temperature'),secondary_y=True)

fig2.update_layout(title_text='Average GHG emissions (N20, CH4) & Temperature Anomaly')
fig2.update_xaxes(title_text= 'Year')
fig2.update_yaxes(title_text='GHG emissions in million tonnes of Co2-equivalents', secondary_y=False)
fig2.update_yaxes(title_text='Temperature Anomaly in Celicious', secondary_y=True)
fig2.update_layout(legend=dict(x=0.05,y=0.95))
pio.write_html(fig2, file=f'N20_CH4 & anomaly.html', auto_open=True)
fig2.show()

In [None]:
co2_data.head()

Unnamed: 0,country,1751,1752,1753,1754,1755,1756,1757,1758,1759,...,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014
0,Afghanistan,,,,,,,,,,...,1330.0,1650.0,2270.0,4210.0,6770.0,8460.0,12200.0,10800.0,10000.0,9810.0
1,Albania,,,,,,,,,,...,4250.0,3900.0,3930.0,4370.0,4380.0,4600.0,5240.0,4910.0,5060.0,5720.0
2,Algeria,,,,,,,,,,...,107000.0,101000.0,109000.0,110000.0,121000.0,119000.0,121000.0,130000.0,134000.0,145000.0
3,Andorra,,,,,,,,,,...,576.0,546.0,539.0,539.0,517.0,517.0,491.0,488.0,477.0,462.0
4,Angola,,,,,,,,,,...,19200.0,22300.0,25200.0,25700.0,27800.0,29100.0,30300.0,33400.0,32600.0,34800.0


In [None]:
# CO2 emission with proper countries map 
melt_co2 = co2_data.melt(id_vars=['country'], var_name='year', value_name='value')
melt_co2['year'] = melt_co2['year'].astype(int)
melt_co2 = melt_co2[melt_co2['year'] >= 1950]
melt_co2.head()

Unnamed: 0,country,year,value
38208,Afghanistan,1950,84.3
38209,Albania,1950,297.0
38210,Algeria,1950,3790.0
38211,Andorra,1950,
38212,Angola,1950,187.0


In [None]:
# Plot the values on the earth map
fig3=px.choropleth(melt_co2, locations='country',
locationmode='country names',animation_frame='year',
animation_group='country',color='value',
color_continuous_scale= 'blues' , hover_name='country', 
title = 'Global CO2 Emission')
pio.write_html(fig3, file=f'Co2_map.html', auto_open=True)
fig3.show()

In [None]:
# Mauna Loa monthly emission data
co2_df = pd.read_csv('co2_mm_mlo.csv')
df = co2_df

In [None]:
co2_df.head()

Unnamed: 0,year,month,decimal date,average,deseasonalized,ndays,sdev,unc
0,1958,3,1958.2027,315.7,314.43,-1,-9.99,-0.99
1,1958,4,1958.2877,317.45,315.16,-1,-9.99,-0.99
2,1958,5,1958.3699,317.51,314.71,-1,-9.99,-0.99
3,1958,6,1958.4548,317.24,315.14,-1,-9.99,-0.99
4,1958,7,1958.537,315.86,315.18,-1,-9.99,-0.99


In [None]:
#Plot the monthly data
fig4 = px.line(df, x='decimal date', y='average',
              color_discrete_sequence = ['blueviolet'],
              markers=True,
              hover_data=['year','month','average'],
              title='CO2 Parts Per Million',
              labels={'decimal date':'year', 'average':'CO2 ppm'})
pio.write_html(fig4, file=f'Co2_seasonal.html', auto_open=True)
fig4.show()

In [None]:
# merge 'year' and 'month' columns into a datetime column
df['datetime'] = pd.to_datetime(df[['year', 'month']].assign(day=1))

series = TimeSeries.from_dataframe(df, 'datetime', 'average')
series.head()

In [None]:
#Functions for plotting forecasts and save the metrics
metrics = pd.DataFrame()
start = pd.Timestamp('010116')

def forecast_plot(series, forecast, model_name):

    id = 120
    my_series = series.pd_dataframe()
    my_series = my_series.tail(id)

    my_forecast = forecast.pd_dataframe()
    my_forecast = my_forecast.tail(id)
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=my_series.index, y=my_series['average'], mode='lines', name='Actual Values'))
    fig.add_trace(go.Scatter(x=my_forecast.index, y=my_forecast['average'], mode='lines', name='Forecast'))
    fig.update_layout(title=model_name)
    fig.update_xaxes(title_text= 'Year')
    fig.update_yaxes(title_text='CO2 mole fraction (ppm)')
    fig.update_layout(legend=dict(x=0.05,y=0.9))
    pio.write_html(fig, file=f'{model_name}.html', auto_open=True)
    fig.show()

    return fig
    
def print_metrics(series, forecast, model_name):
    mae_ = mae(series, forecast)
    rmse_ = rmse(series, forecast)
    mape_ = mape(series, forecast)
    smape_ = smape(series, forecast)
    rmsle_ = rmsle(series, forecast)
    r2_score_ = r2_score(series, forecast)
    
    dict_ = {'MAE': mae_, 'RMSE': rmse_,
             'MAPE': mape_, 'SMAPE': smape_, 
             'RMSLE': rmsle_ ,'R2': r2_score_ }
    
    df = pd.DataFrame(dict_, index = [model_name])
    
    return(df.round(decimals = 2))  

In [None]:
model = NaiveSeasonal(K=12)
model_name = 'Naive Seasonal'

forecast = model.historical_forecasts(series, start=start, forecast_horizon=12, verbose=True)
forecast_plot(series, forecast, model_name)

naive = print_metrics(series, forecast, model_name)
metrics = metrics.append(naive)

naive

100%|██████████| 75/75 [00:02<00:00, 31.13it/s]



The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



Unnamed: 0,MAE,RMSE,MAPE,SMAPE,RMSLE,R2
Naive Seasonal,2.33,2.39,0.56,0.57,0.01,0.74


In [None]:
model = ExponentialSmoothing(seasonal_periods = 12)
model_name = 'Exponential Smoothing'

forecast = model.historical_forecasts(series, start=start, forecast_horizon=12, verbose=True)
forecast_plot(series, forecast, model_name)

exp_smoothing = print_metrics(series, forecast, model_name)
metrics = metrics.append(exp_smoothing)

exp_smoothing


100%|██████████| 75/75 [00:23<00:00,  3.15it/s]



The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



Unnamed: 0,MAE,RMSE,MAPE,SMAPE,RMSLE,R2
Exponential Smoothing,0.44,0.55,0.11,0.11,0.0,0.99


In [None]:
model = LinearRegressionModel(lags = 12)
model_name = 'Linear Regression'

forecast = model.historical_forecasts(series, start=start, forecast_horizon=12, verbose=True)
forecast_plot(series, forecast, model_name)

lr = print_metrics(series, forecast, model_name)
metrics = metrics.append(lr)

lr

100%|██████████| 75/75 [00:03<00:00, 22.87it/s]



The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



Unnamed: 0,MAE,RMSE,MAPE,SMAPE,RMSLE,R2
Linear Regression,0.48,0.62,0.12,0.12,0.0,0.98


In [None]:
model = Prophet()
model_name = 'Prophet'

forecast = model.historical_forecasts(series, start=start, forecast_horizon=12, verbose=True)
forecast_plot(series, forecast, model_name)

prophet = print_metrics(series, forecast, model_name)
metrics = metrics.append(prophet)

prophet

  0%|          | 0/75 [00:00<?, ?it/s]INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpamwh_514/2x_u06g8.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpamwh_514/cwa8jd3i.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.9/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=91640', 'data', 'file=/tmp/tmpamwh_514/2x_u06g8.json', 'init=/tmp/tmpamwh_514/cwa8jd3i.json', 'output', 'file=/tmp/tmpamwh_514/prophet_modelsx8hsi9g/prophet_model-20230316212517.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
21:25:17 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
21:25:18 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
  


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



Unnamed: 0,MAE,RMSE,MAPE,SMAPE,RMSLE,R2
Prophet,0.96,1.11,0.23,0.23,0.0,0.94


In [None]:
print(metrics)

                        MAE  RMSE  MAPE  SMAPE  RMSLE    R2
Naive Seasonal         2.33  2.39  0.56   0.57   0.01  0.74
Exponential Smoothing  0.44  0.55  0.11   0.11   0.00  0.99
Linear Regression      0.48  0.62  0.12   0.12   0.00  0.98
Prophet                0.96  1.11  0.23   0.23   0.00  0.94


In [None]:
model = ExponentialSmoothing(seasonal_periods = 12)
model_name = 'Exponential Smoothing 2 Year Forecast'

model.fit(series)
forecast = model.predict(24)

forecast_plot(series, forecast, model_name)