# Time Series Decomposition & Analysis Assignment

In [None]:
import pandas as pd
import statsmodels.api as sm
import plotly.express as px

### Import the monthly sunspots data set into a Pandas dataframe, and convert the Month field to a datetime data type.

In [None]:
data = pd.read_csv('https://tf-assets-prod.s3.amazonaws.com/tf-curric/data-science/Data%20Sets%20Time%20Series%20Analysis/Time%20Series%20-%20Day%203/monthly-sunspots.csv')

In [None]:
data['Month'] = pd.to_datetime(data['Month'])

### Use a line chart to plot the time series.

In [None]:
def ilinechart(df, x, y, groups=None, title=''):
    fig = px.line(df, x=x, y=y, color=groups, title=title, 
                  template='none').update(layout=dict(title=dict(x=0.5)))

    # for item in range(len(fig.data)):
    #     fig.data[item].update(name=fig.data[item]['name'].split('=')[1])
    
    fig.show()

In [None]:
ilinechart(data, 'Month', 'Sunspots', title='Sunspots by Month')

### Decompose the time series and add columns for the trend, seasonality, and residuals to the data set.

In [None]:
series = data.set_index('Month')
decomposition = sm.tsa.seasonal_decompose(series, model='additive')

trend = decomposition.trend.reset_index()
seasonality = decomposition.seasonal.reset_index()
residuals = decomposition.resid.reset_index()

merged = data.merge(trend, on='Month')
merged = merged.merge(seasonality, on='Month')
merged = merged.merge(residuals, on='Month')
merged.columns = ['Month', 'Observed', 'Trend', 'Seasonality', 'Residuals']
merged

### Plot the observed values, trend, seasonality, and residuals on a multi-line chart. You should have a line for each column.

**Hint:** You may need to melt the data so that all the categories are in a single column and all the values are in a single column.

In [None]:
melted = pd.melt(merged, id_vars='Month', value_vars=['Observed', 'Trend', 'Seasonality', 'Residuals'])
melted

In [None]:
def multilinechart(df, x, y, groups=None, title=''):
    fig = px.line(df, x=x, y=y, color=groups, title=title, 
                  template='none').update(layout=dict(title=dict(x=0.5)))

    for item in range(len(fig.data)):
        fig.data[item].update(name=fig.data[item]['name'].split('=')[1])
    
    fig.show()

In [None]:
multilinechart(melted, 'Month', 'value', groups='variable', title='Observed vs. Components')

### Add two columns to the data set - one that calculates a rolling mean and another that calculates a rolling standard deviation.

In [None]:
merged['Rolling_Mean'] = merged['Observed'].rolling(window=12).mean()
merged['Rolling_Std'] = merged['Observed'].rolling(window=12).std()

### Plot the rolling mean and standard deviation on a multi-line chart along with the observed values.

In [None]:
rolling = pd.melt(merged, id_vars='Month', value_vars=['Observed', 'Rolling_Mean', 'Rolling_Std'])

multilinechart(rolling, 'Month', 'value', groups='variable', title='Rolling Mean & Standard Deviation')

### Perform an Augmented Dickey Fuller Test on the data set and determine whether this time series is stationary.

In [None]:
adf_test = sm.tsa.stattools.adfuller(merged['Observed'])

results = pd.Series(adf_test[0:4], index=['ADF Test Statistic', 'P-Value', '# Lags Used', '# Obs Used'])

for key, value in adf_test[4].items():
  results[f'Critical Value ({key})'] = value

results

### Generate an autocorrelation plot for the data set to determine the level of autocorrelation in this time series.

In [None]:
from statsmodels.graphics.tsaplots import plot_acf
from pylab import rcParams

In [None]:
rcParams['figure.figsize'] = 16, 8

In [None]:
series = data.set_index('Month')
fig = plot_acf(series, lags=300)

In [None]:
def wma(df, field, window):
  weights=np.arange(1, window+1)
  ma = df[field].rolling(window)
  wma = ma.apply(lambda x: np.dot(x, weights)/weights.sum(), raw=True)
  return wma