In [2]:
import quandl
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split 

In [10]:
# This is the stock we would like to observe and forecast for the future 
STOCK = "wIKI/FB"
df = quandl.get(STOCK)
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Ex-Dividend,Split Ratio,Adj. Open,Adj. High,Adj. Low,Adj. Close,Adj. Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2012-05-18,42.05,45.0,38.0,38.2318,573576400.0,0.0,1.0,42.05,45.0,38.0,38.2318,573576400.0
2012-05-21,36.53,36.66,33.0,34.03,168192700.0,0.0,1.0,36.53,36.66,33.0,34.03,168192700.0
2012-05-22,32.61,33.59,30.94,31.0,101786600.0,0.0,1.0,32.61,33.59,30.94,31.0,101786600.0
2012-05-23,31.37,32.5,31.36,32.0,73600000.0,0.0,1.0,31.37,32.5,31.36,32.0,73600000.0
2012-05-24,32.95,33.21,31.77,33.03,50237200.0,0.0,1.0,32.95,33.21,31.77,33.03,50237200.0


In [11]:
# Get adjusted close price
df = df[["Adj. Close"]]
df.head()

Unnamed: 0_level_0,Adj. Close
Date,Unnamed: 1_level_1
2012-05-18,38.2318
2012-05-21,34.03
2012-05-22,31.0
2012-05-23,32.0
2012-05-24,33.03


In [16]:
# How far into the future we want to forecast the stocks (in days)
forecast_out = 30

# Create target column 
df['Target'] = df[['Adj. Close']].shift(-forecast_out)

df.head()

Unnamed: 0_level_0,Adj. Close,Target
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2012-05-18,38.2318,30.771
2012-05-21,34.03,31.2
2012-05-22,31.0,31.47
2012-05-23,32.0,31.73
2012-05-24,33.03,32.17


In [17]:
# Create independent data set (X)
# Convert df to numpy array
X = np.array(df.drop(['Target'], 1))

#Remove the last forecast_out rows because they will be NaN

X = X[:-forecast_out]
print(X)

[[ 38.2318]
 [ 34.03  ]
 [ 31.    ]
 ...
 [171.5499]
 [175.98  ]
 [176.41  ]]


In [19]:
# Create the target or dependent data var
# Convert df to numpy array
y = np.array(df['Target'])

# Get all y values except last forecast_out rows
y = y[:-forecast_out]
print(y)

[ 30.771  31.2    31.47  ... 159.39  160.06  152.19 ]


In [20]:
# Split data into 80% train and 20% test 
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
 
# Creat and train model for Support Vector Machine

svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
svr_rbf.fit(x_train, y_train)



SVR(C=1000.0, gamma=0.1)

In [22]:
# Test the model, using score which returns the R^2 coeeficient of determination of the prediction

svm_score = svr_rbf.score(x_test, y_test)
print('Score is ', svm_score)


Score is  0.9832472704883634


In [23]:
# Create and train the linear regression model
lr = LinearRegression()
lr.fit(x_train, y_train)


LinearRegression()

In [24]:
lr_score = lr.score(x_test, y_test)
print('Score is ', lr_score)

Score is  0.979298861392639


In [25]:
# Set x_forecast equal to the last forecase_out rows
x_forecast = np.array(df.drop(['Target'], 1))[-forecast_out:]
print(x_forecast)

[[173.15]
 [179.52]
 [179.96]
 [177.36]
 [176.01]
 [177.91]
 [178.99]
 [183.29]
 [184.93]
 [181.46]
 [178.32]
 [175.94]
 [176.62]
 [180.4 ]
 [179.78]
 [183.71]
 [182.34]
 [185.23]
 [184.76]
 [181.88]
 [184.19]
 [183.86]
 [185.09]
 [172.56]
 [168.15]
 [169.39]
 [164.89]
 [159.39]
 [160.06]
 [152.19]]


In [27]:
# Print the predictions for these rows ^
lr_pred = lr.predict(x_forecast)
print(lr_pred)

# Print the predictions for these rows ^
svr_pred = svr_rbf.predict(x_forecast)
print(svr_pred)

[177.14124584 183.57905653 184.02374048 181.39606264 180.03169146
 181.95191757 183.04341451 187.3891894  189.04664772 185.53970846
 182.36628215 179.96094628 180.6481851  184.46842442 183.84182432
 187.81366043 186.42907634 189.34984132 188.87483802 185.96417949
 188.29877019 187.96525723 189.20835097 176.5449651  172.08801923
 173.34121943 168.79331548 163.23476622 163.91189858 155.95811991]
[174.79450754 179.37369921 178.28846123 178.69405041 177.05308153
 180.04497104 180.61558936 186.00590396 174.53578332 182.27433094
 180.77500615 177.01999241 177.42598606 177.94407005 178.68450381
 182.78108541 187.42956446 174.51758194 174.99886458 185.10087036
 178.56562366 181.45320783 174.39873071 176.59050455 171.3202189
 173.22848544 172.19832784 167.66852092 166.19084397 157.6397945 ]
