# Time Series Machine Learning Part 1 Assignment

In [0]:
import numpy as np
import pandas as pd
import plotly.express as px
from sklearn.ensemble import AdaBoostRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

### Import the Netflix stock price data set (NFLX_data.csv).

In [0]:
data = pd.read_csv('https://tf-assets-prod.s3.amazonaws.com/tf-curric/data-science/Data%20Sets%20Time%20Series%20Analysis/Time%20Series%20-%20Day%205/NFLX_data.csv')
data['date'] = pd.to_datetime(data['date'])
data.columns = ['Date', 'Open', 'High', 'Low', 'Observed', 'Volume', 'Ticker']
data.head()

Unnamed: 0,Date,Open,High,Low,Observed,Volume,Ticker
0,2013-02-08,25.9635,26.28,25.7157,25.8528,25649820,NFLX
1,2013-02-11,25.5685,26.0071,24.9714,25.4128,29321782,NFLX
2,2013-02-12,25.8085,26.2228,25.1014,25.4214,34388802,NFLX
3,2013-02-13,25.8428,26.6285,25.6657,26.6098,40799094,NFLX
4,2013-02-14,26.7557,27.1214,26.3844,26.7714,31968685,NFLX


### Transform the data by shifting the series and creating features that will allow us to forecast the price 30 days into the future from 90 days of daily history.

In [0]:
data = data[['Date', 'Observed']]
history = 90
future = 30

shifts = [x + future for x in list(range(1, history + 1))]

for shift in shifts:
    data['t-' + str(shift)] = data['Observed'].shift(shift)

data.dropna(inplace=True)
data.head()

Unnamed: 0,Date,Observed,t-31,t-32,t-33,t-34,t-35,t-36,t-37,t-38,t-39,t-40,t-41,t-42,t-43,t-44,t-45,t-46,t-47,t-48,t-49,t-50,t-51,t-52,t-53,t-54,t-55,t-56,t-57,t-58,t-59,t-60,t-61,t-62,t-63,t-64,t-65,t-66,t-67,t-68,...,t-81,t-82,t-83,t-84,t-85,t-86,t-87,t-88,t-89,t-90,t-91,t-92,t-93,t-94,t-95,t-96,t-97,t-98,t-99,t-100,t-101,t-102,t-103,t-104,t-105,t-106,t-107,t-108,t-109,t-110,t-111,t-112,t-113,t-114,t-115,t-116,t-117,t-118,t-119,t-120
120,2013-08-01,35.5885,32.69,32.7471,30.57,30.77,29.6628,30.6371,31.5614,31.46,31.1057,31.9228,32.1871,31.71,32.3214,31.8085,30.7628,30.5985,32.6771,32.3114,32.6514,33.87,34.2214,34.1428,33.8614,34.7714,33.4243,32.7684,31.0991,30.9157,29.8014,29.4643,30.0985,30.4928,30.6414,30.4157,30.8671,30.7161,30.7928,30.5357,...,23.2943,23.5228,23.8128,24.2485,25.2414,26.0614,27.04,27.1771,27.23,25.8271,25.9,25.9985,26.15,25.91,26.5128,26.4071,26.91,27.48,26.0157,25.7785,26.3857,25.9371,26.1343,25.9614,25.8871,27.0528,26.8685,26.3314,26.2985,25.6171,25.6943,26.7357,26.7314,28.0643,27.0731,26.7714,26.6098,25.4214,25.4128,25.8528
121,2013-08-02,35.1685,33.1871,32.69,32.7471,30.57,30.77,29.6628,30.6371,31.5614,31.46,31.1057,31.9228,32.1871,31.71,32.3214,31.8085,30.7628,30.5985,32.6771,32.3114,32.6514,33.87,34.2214,34.1428,33.8614,34.7714,33.4243,32.7684,31.0991,30.9157,29.8014,29.4643,30.0985,30.4928,30.6414,30.4157,30.8671,30.7161,30.7928,...,24.1943,23.2943,23.5228,23.8128,24.2485,25.2414,26.0614,27.04,27.1771,27.23,25.8271,25.9,25.9985,26.15,25.91,26.5128,26.4071,26.91,27.48,26.0157,25.7785,26.3857,25.9371,26.1343,25.9614,25.8871,27.0528,26.8685,26.3314,26.2985,25.6171,25.6943,26.7357,26.7314,28.0643,27.0731,26.7714,26.6098,25.4214,25.4128
122,2013-08-05,36.2628,31.9314,33.1871,32.69,32.7471,30.57,30.77,29.6628,30.6371,31.5614,31.46,31.1057,31.9228,32.1871,31.71,32.3214,31.8085,30.7628,30.5985,32.6771,32.3114,32.6514,33.87,34.2214,34.1428,33.8614,34.7714,33.4243,32.7684,31.0991,30.9157,29.8014,29.4643,30.0985,30.4928,30.6414,30.4157,30.8671,30.7161,...,23.7243,24.1943,23.2943,23.5228,23.8128,24.2485,25.2414,26.0614,27.04,27.1771,27.23,25.8271,25.9,25.9985,26.15,25.91,26.5128,26.4071,26.91,27.48,26.0157,25.7785,26.3857,25.9371,26.1343,25.9614,25.8871,27.0528,26.8685,26.3314,26.2985,25.6171,25.6943,26.7357,26.7314,28.0643,27.0731,26.7714,26.6098,25.4214
123,2013-08-06,36.5571,30.9857,31.9314,33.1871,32.69,32.7471,30.57,30.77,29.6628,30.6371,31.5614,31.46,31.1057,31.9228,32.1871,31.71,32.3214,31.8085,30.7628,30.5985,32.6771,32.3114,32.6514,33.87,34.2214,34.1428,33.8614,34.7714,33.4243,32.7684,31.0991,30.9157,29.8014,29.4643,30.0985,30.4928,30.6414,30.4157,30.8671,...,24.7157,23.7243,24.1943,23.2943,23.5228,23.8128,24.2485,25.2414,26.0614,27.04,27.1771,27.23,25.8271,25.9,25.9985,26.15,25.91,26.5128,26.4071,26.91,27.48,26.0157,25.7785,26.3857,25.9371,26.1343,25.9614,25.8871,27.0528,26.8685,26.3314,26.2985,25.6171,25.6943,26.7357,26.7314,28.0643,27.0731,26.7714,26.6098
124,2013-08-07,35.6014,30.8,30.9857,31.9314,33.1871,32.69,32.7471,30.57,30.77,29.6628,30.6371,31.5614,31.46,31.1057,31.9228,32.1871,31.71,32.3214,31.8085,30.7628,30.5985,32.6771,32.3114,32.6514,33.87,34.2214,34.1428,33.8614,34.7714,33.4243,32.7684,31.0991,30.9157,29.8014,29.4643,30.0985,30.4928,30.6414,30.4157,...,24.7428,24.7157,23.7243,24.1943,23.2943,23.5228,23.8128,24.2485,25.2414,26.0614,27.04,27.1771,27.23,25.8271,25.9,25.9985,26.15,25.91,26.5128,26.4071,26.91,27.48,26.0157,25.7785,26.3857,25.9371,26.1343,25.9614,25.8871,27.0528,26.8685,26.3314,26.2985,25.6171,25.6943,26.7357,26.7314,28.0643,27.0731,26.7714


### Split the data into a training set and a testing set. Make the test set size 20%.

In [0]:
x = data.drop(['Date', 'Observed'], axis=1)
y = data['Observed']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

### Instantiate an AdaBoost model and fit it to the training set.

In [0]:
model = AdaBoostRegressor()
model.fit(x_train, y_train)

AdaBoostRegressor(base_estimator=None, learning_rate=1.0, loss='linear',
                  n_estimators=50, random_state=None)

### Generate predictions for the test set.

In [0]:
predictions = model.predict(x_test)

### Evaluate the results using R-Squared, Mean Absolute Error, and Root Mean Squared Error metrics.

In [0]:
print('R-Squared:', r2_score(y_test, predictions))
print('Mean Absolute Error:', mean_absolute_error(y_test, predictions))
print('Root Mean Squared Error:', np.sqrt(mean_squared_error(y_test, predictions)))

R-Squared: 0.9582239860871545
Mean Absolute Error: 8.222435206607573
Root Mean Squared Error: 9.75301246167116


### Visually examine the results by creating a scatter plot where the x axis represents the observed results and the y axis represents the predictions.

In [0]:
def iscatter(df, x, y, color=None, size=None, title=''):
    fig = px.scatter(df, x=x, y=y, color=color, size=size, 
                     title=title, template='none')
    
    fig.update_traces(marker_line_color='black', 
                  marker_line_width=1)

    fig.show()
    
results = pd.DataFrame([list(y_test), predictions], index=['Observed', 'Predicted']).T
iscatter(results, 'Observed', 'Predicted', title='Observed vs. Predicted')