# Stock Price Prediction

In [1]:
# Importing Libraries
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# Preparing Data

In [2]:
def prepare_data(df,forecast_col,forecast_out,test_size):
    label = df[forecast_col].shift(-forecast_out) 
    # creating new column called label with the last 5 rows are nan
    
    X = np.array(df[[forecast_col]]) # creating the feature array
    
    X = preprocessing.scale(X) # processing the feature array
    
    X_lately = X[-forecast_out:] # creating a column later we will use in predicting method
    
    X = X[:-forecast_out] # X that will contain the training and testing
    
    label.dropna(inplace=True) # dropping na values
    
    y = np.array(label) # assigning y
    
    X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=test_size, random_state=0)
                 # cross validation
    
    response = [X_train,X_test,Y_train,Y_test,X_lately]
    
    return response

# Uploading DATA

In [3]:
df = pd.read_csv("E:/Datasets/ML Datasets/prices.csv")

In [4]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,1/27/2020,71.32,71.635002,71.32,71.324997,71.324997,0
1,1/28/2020,71.654999,71.654999,71.178001,71.440002,71.440002,0
2,1/29/2020,71.230103,71.425003,71.168503,71.2304,71.2304,0
3,1/30/2020,71.300003,71.711998,71.300003,71.300003,71.300003,0
4,1/31/2020,71.639999,71.639999,71.277496,71.639999,71.639999,0


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 262 entries, 0 to 261
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       262 non-null    object 
 1   Open       262 non-null    float64
 2   High       262 non-null    float64
 3   Low        262 non-null    float64
 4   Close      262 non-null    float64
 5   Adj Close  262 non-null    float64
 6   Volume     262 non-null    int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 14.5+ KB


In [6]:
forecast_col = 'Close'
forecast_out = 5
test_size = 0.2

In [7]:
X_train,X_test,Y_train,Y_test,X_lately = prepare_data(df,forecast_col,forecast_out,test_size)

In [8]:
# Initializing Linear Regression model
learner = LinearRegression()

In [9]:
# Training the linear Regrssion Model
learner.fit(X_train,Y_train)

LinearRegression()

In [10]:
# Testing the linear Regrssion Model
score = learner.score(X_test,Y_test)

In [11]:
# Set that will contain the forecasted data
forecast = learner.predict(X_lately)

In [12]:
# Creating json object
response = {}

In [13]:
response['test_score'] = score
response['forecast_set'] = forecast
print(response)

{'test_score': 0.639145178346672, 'forecast_set': array([73.37040254, 73.12634778, 73.16456803, 73.20017668, 73.1776807 ])}
