# Read & Plot Time Series Data

In [None]:
import pandas as pd
import datetime as dt
import numpy as np
from sklearn.metrics import mean_squared_error
from math import sqrt
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 15, 6
from patsy import dmatrices
from pandas import Series
from matplotlib import pyplot
from datetime import datetime
from matplotlib.pyplot import figure
figure(num=None, figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k')

# Read yearly time series: USGDP

In [None]:
USGDP = pd.read_csv('GDPUS.csv', header=0)
USGDP.head(8)

In [None]:
USGDP.tail(5)

In [None]:
date_rng = pd.date_range(start='1/1/1929', end='31/12/1991', freq='Y')
date_rng

In [None]:
USGDP['TimeIndex'] = pd.DataFrame(date_rng, columns=['Year'])
USGDP.head(8)

In [None]:
plt.plot(USGDP.TimeIndex, USGDP.GDP)
plt.legend(loc='best')
pyplot.show()

# Task 1: Read GDP India series

# Task 2: Read English Daily Wage series

# Read Quarterly Time Series: Retail Turnover

In [None]:
Turnover = pd.read_csv('RetailTurnover.csv')
#Turnover.head()

In [None]:
from datetime import datetime
date_rng = pd.date_range(start='1/7/1982', end='31/3/1992', freq='Q')

In [None]:
date_rng

In [None]:
Turnover['TimeIndex'] = pd.DataFrame(date_rng, columns=['Quarter'])
Turnover.head()

In [None]:
plt.plot(Turnover.TimeIndex, Turnover.Turnover)
plt.legend(loc='best')
pyplot.show()

# Task 3: Read Portugal Port Series

# Task 4: Read Petrol Consumption Series

# Read Monthly Time Series: Average Monthly Temperature

In [None]:
AirTemp = pd.read_csv('AirTemp.csv')
#AirTemp.head()

In [None]:
date_rng = pd.date_range(start='1/1/1920', end='31/12/1939', freq='M')
#date_rng

In [None]:
AirTemp['TimeIndex'] = pd.DataFrame(date_rng, columns=['Month'])
AirTemp.head()

In [None]:
plt.plot(AirTemp.TimeIndex, AirTemp.AvgTemp)
plt.legend(loc='best')
pyplot.show()

# Task 5: Read Champagne Series

# Task 6: Read International Air Passengers Data

# Task 7: Read Tractor Sales Series

In [None]:
Tractor = pd.read_csv("TractorSales.csv")
Tractor.head(5)

In [None]:
dates = pd.date_range(start='2003-01-01', freq='MS', periods=len(Tractor))

In [None]:
import calendar
Tractor['Month'] = dates.month
Tractor['Month'] = Tractor['Month'].apply(lambda x: calendar.month_abbr[x])
Tractor['Year'] = dates.year

In [None]:
#Tractor.drop(['Month-Year'], axis=1, inplace=True)
Tractor.rename(columns={'Number of Tractor Sold':'Tractor-Sales'}, inplace=True)
Tractor = Tractor[['Month', 'Year', 'Tractor-Sales']]

In [None]:
Tractor.set_index(dates, inplace=True)

In [None]:
Tractor.head(5)

In [None]:
# extract out the time-series
sales_ts = Tractor['Tractor-Sales']

In [None]:
plt.figure(figsize=(8, 4))
plt.plot(sales_ts)
plt.xlabel('Years')
plt.ylabel('Tractor Sales')

# Task 8: Read Environmental CO2 Emission Series

# Task 9: Reading Retal Food Sales Series

# Task 10: Reading Mauna Loa Emission Series

# Reading Weekly Time Series: Dow Jones Closing

In [None]:
DJI = pd.read_csv("WeeklyClosing.csv")
DJI.head(5)

In [None]:
date_rng = pd.date_range(start='7/4/1971', end='8/5/1974', freq='W')
DJI['TimeIndex'] = pd.DataFrame(date_rng, columns=['Week'])
DJI.rename(columns={'DJIA Closing':'Closing'}, inplace=True)
DJI.head()

In [None]:
plt.plot(DJI.TimeIndex, DJI.Closing)
plt.legend(loc='best')
pyplot.show()

# Task 11: Reading Gasoline Price Data

# Read Week-Day Time Series: Crude Oil Production

In [None]:
CrdOil = pd.read_csv('CrudeOil.csv')
CrdOil.head()

In [None]:
date_rng = pd.date_range(start='4/4/1983', end='27/11/2012', freq='B')
#date_rng

In [None]:
CrdOil['TimeIndex'] = pd.DataFrame(date_rng, columns=['Weekday'])
CrdOil.head()

In [None]:
plt.plot(CrdOil.TimeIndex, CrdOil.Price)
plt.legend(loc='best')
pyplot.show()

# Simple Forecasts: Average and Moving Average

# AirTemp, US GDP, India GDP, Eng Daily Wage

In [None]:
temp_avg = AirTemp.copy()
temp_avg['avg_forecast'] = AirTemp['AvgTemp'].mean()

In [None]:
plt.figure(figsize=(12,8))
plt.plot(AirTemp['AvgTemp'], label='Data')
plt.plot(temp_avg['avg_forecast'], label='Average Forecast')
plt.legend(loc='best')

In [None]:
mvg_avg = AirTemp.copy()
mvg_avg['moving_avg_forecast'] = AirTemp['AvgTemp'].rolling(12).mean()
#mvg_avg.head(15)
#mvg_avg.tail(6)

In [None]:
#plt.figure(figsize=(16,8))
plt.plot(AirTemp['AvgTemp'], label='Average Temperature')
plt.plot(mvg_avg['moving_avg_forecast'], label='Moving Average Forecast')
plt.legend(loc='best')

# Task 1: MA(5) for US GDP

# Task 2: MA(3) for India GDP

# Task 2: MA(7) for Daily Wage

# MA Forecast for Crude Oil

In [None]:
mvg_avg_oil1 = CrdOil.copy()
mvg_avg_oil2 = CrdOil.copy()
mvg_avg_oil3 = CrdOil.copy()

mvg_avg_oil1['moving_avg_forecast'] = CrdOil['Price'].rolling(300).mean()
mvg_avg_oil2['moving_avg_forecast'] = CrdOil['Price'].rolling(900).mean()
mvg_avg_oil3['moving_avg_forecast'] = CrdOil['Price'].rolling(1500).mean()

In [None]:
plt.figure(figsize=(16,8))
plt.plot(CrdOil['Price'], label='Crude Oil Price')
plt.plot(mvg_avg_oil1['moving_avg_forecast'], label='MA(300)')
plt.plot(mvg_avg_oil2['moving_avg_forecast'], label='MA(900)')
plt.plot(mvg_avg_oil3['moving_avg_forecast'], label='MA(1500)')
plt.legend(loc='upper left')

# Visualization of Seasonality: Quarterly Turnover

In [None]:
quarterly_turnover = pd.pivot_table(Turnover, values = "Turnover", columns = "Quarter", index = "Year")
quarterly_turnover

In [None]:
quarterly_turnover.plot()

In [None]:
quarterly_turnover.boxplot()

# Task 1: Seasonality of Portugal Port

# Task 2: Seasonality of Petrol Consumption

# Visualization of Seasonality: Monthly Temperature

In [None]:
monthly_temp = pd.pivot_table(AirTemp, values = "AvgTemp", columns = "Month", index = "Year")
monthly_temp = monthly_temp[['Jan','Feb','Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']]
monthly_temp

In [None]:
monthly_temp.plot()

In [None]:
monthly_temp.boxplot()

# Task 3: Seasonality of Champagne Sales

# Task 4: Seasonality of Air Passengers

# Task 5: Seasonality of Tractor Sales

# Task 6: Seasonality of CO2 Emission

# Task 7: Seasonality of Retail Food Sales

# Task 8: Seasonality of MaunaLoa

# Seasonal Decomposition

In [None]:
import statsmodels.api as sm

In [None]:
decompTurnover = sm.tsa.seasonal_decompose(Turnover.Turnover, model="additive", freq=4)
decompTurnover.plot()
plt.show()

In [None]:
trend = decompTurnover.trend
seasonal = decompTurnover.seasonal
residual = decompTurnover.resid

In [None]:
trend.head(12)

In [None]:
seasonal.head(12)

In [None]:
residual.head(12)

# Split Data in Training and Test

In [None]:
Petrol = pd.read_csv('Petrol.csv')
Petrol.head()

In [None]:
date_rng = pd.date_range(start='1/1/2001', end='30/9/2013', freq='Q')
date_rng
Petrol['TimeIndex'] = pd.DataFrame(date_rng, columns=['Quarter'])
Petrol.head()
plt.plot(Petrol.TimeIndex, Petrol.Consumption)
pyplot.show()

In [None]:
#Creating train and test set 
train=Petrol[0:int(len(Petrol)*0.7)] 
test=Petrol[int(len(Petrol)*0.7):]

In [None]:
type(train)
train.head()

In [None]:
plt.plot(train.Consumption)
plt.plot(test.Consumption)
pyplot.show()

In [None]:
train.Consumption.plot()
test.Consumption.plot()

# Simple Exponential Smoothing

In [None]:
from statsmodels.tsa.holtwinters import ExponentialSmoothing, SimpleExpSmoothing, Holt

In [None]:
# create class
model = SimpleExpSmoothing(np.asarray(train['Consumption']))

In [None]:
# fit model
model99_fit = model.fit(smoothing_level=0.99,optimized=False)
model5_fit = model.fit(smoothing_level=0.5,optimized=False)
model1_fit = model.fit(smoothing_level=0.1,optimized=False)

In [None]:
fcst_SES = test.copy()

In [None]:
fcst_SES['SES99'] = model99_fit.forecast(len(test['Consumption']))
fcst_SES['SES5'] = model5_fit.forecast(len(test['Consumption']))
fcst_SES['SES1'] = model1_fit.forecast(len(test['Consumption']))

In [None]:
plt.figure(figsize=(16,8))
plt.plot(train['Consumption'], label='Train')
plt.plot(test['Consumption'], label='Test')
plt.plot(fcst_SES['SES99'], label='SES99')
plt.plot(fcst_SES['SES5'], label='SES5')
plt.plot(fcst_SES['SES1'], label='SES1')
plt.legend(loc='best')

In [None]:
fcst_SES['SES99']

In [None]:
#
def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100


In [None]:
mean_absolute_percentage_error(test['Consumption'],fcst_SES['SES99'])

In [None]:
rmse = sqrt(mean_squared_error(test['Consumption'],fcst_SES['SES99']))
rmse

In [None]:
model_opt_fit = model.fit()

In [None]:
model_opt_fit.model.params

# Double Exponential (Holt)

In [None]:
model = Holt(np.asarray(train['Consumption']))
model31_fit = model.fit(smoothing_level = 0.3,smoothing_slope = 0.1,optimized=False)

In [None]:
model31_fit = model.fit(smoothing_level = 0.3,smoothing_slope = 0.1,optimized=False)

In [None]:
fcst_Holt = test.copy()

In [None]:
fcst_Holt['Holt31'] = model31_fit.forecast(len(test['Consumption']))

In [None]:
plt.figure(figsize=(16,8))
plt.plot(train['Consumption'], label='Train')
plt.plot(test['Consumption'], label='Test')
plt.plot(fcst_Holt['Holt31'], label='Holt31')
plt.legend(loc='best')

In [None]:
modelopt_fit = model.fit()

In [None]:
fcst_Holt['Opt'] = modelopt_fit.forecast(len(test['Consumption']))

In [None]:
plt.figure(figsize=(16,8))
plt.plot(train['Consumption'], label='Train')
plt.plot(test['Consumption'], label='Test')
plt.plot(fcst_Holt['Opt'], label='HoltOpt')
plt.legend(loc='best')

In [None]:
modelopt_fit.model.params