# Tesla price prediction using Linear Regression, Ridge Regression, Lasso Regression and Elastic Net Regression

In [78]:
import pandas as pd
import numpy as np
from sklearn import model_selection
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt

In [79]:
df = pd.read_csv('TSLA.csv') 

In [80]:
print(df.shape)

(2416, 6)


In [81]:
df.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0
mean,186.271147,189.578224,182.916639,186.403651,186.403651,5572722.0
std,118.740163,120.892329,116.857591,119.13602,119.13602,4987809.0
min,16.139999,16.629999,14.98,15.8,15.8,118500.0
25%,34.342498,34.897501,33.587501,34.400002,34.400002,1899275.0
50%,213.035004,216.745002,208.870002,212.960007,212.960007,4578400.0
75%,266.450012,270.927513,262.102501,266.774994,266.774994,7361150.0
max,673.690002,786.140015,673.52002,780.0,780.0,47065000.0


In [82]:
df.head()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
0,19.0,25.0,17.540001,23.889999,23.889999,18766300
1,25.790001,30.42,23.299999,23.83,23.83,17187100
2,25.0,25.92,20.27,21.959999,21.959999,8218800
3,23.0,23.1,18.709999,19.200001,19.200001,5139800
4,20.0,20.0,15.83,16.110001,16.110001,6866900


In [83]:
df.isnull().sum()

Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [84]:
df.head()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
0,19.0,25.0,17.540001,23.889999,23.889999,18766300
1,25.790001,30.42,23.299999,23.83,23.83,17187100
2,25.0,25.92,20.27,21.959999,21.959999,8218800
3,23.0,23.1,18.709999,19.200001,19.200001,5139800
4,20.0,20.0,15.83,16.110001,16.110001,6866900


In [85]:
df.duplicated()

0       False
1       False
2       False
3       False
4       False
        ...  
2411    False
2412    False
2413    False
2414    False
2415    False
Length: 2416, dtype: bool

In [86]:
df.drop_duplicates(inplace=True)

In [87]:
df.duplicated()

0       False
1       False
2       False
3       False
4       False
        ...  
2411    False
2412    False
2413    False
2414    False
2415    False
Length: 2416, dtype: bool

In [88]:
df.columns

Index(['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'], dtype='object')

# set features and labels

In [89]:
X=df.drop(['Close'],axis=1)
y=df['Close']

# split data into train and test

In [90]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=40)
print(X_train.shape); print(X_test.shape)

(1932, 5)
(484, 5)


# Linear Regression

In [91]:
lr = LinearRegression()
lr.fit(X_train, y_train)

LinearRegression()

In [92]:
#training error and score
pred_train_lr= lr.predict(X_train)
print(np.sqrt(mean_squared_error(y_train,pred_train_lr)))
print(r2_score(y_train, pred_train_lr))

5.87470356381102e-14
1.0


In [93]:
#testing error and score
pred_test_lr= lr.predict(X_test)
print(np.sqrt(mean_squared_error(y_test,pred_test_lr))) 
print(r2_score(y_test, pred_test_lr))

5.843441767052482e-14
1.0


In [94]:
#training score
lr.score(X_train,y_train)

1.0

In [95]:
#testing score
lr.score(X_test,y_test)

1.0

In [96]:
#find prediction
predictions = lr.predict(X_test)

In [97]:
#compare actual and predicted values
comparison = pd.DataFrame({'Predicted Values':predictions,'Actual Values':y_test})

In [98]:
print(comparison.head())

      Predicted Values  Actual Values
1484        220.279999     220.279999
452          33.590000      33.590000
86           21.840000      21.840000
1686        243.690002     243.690002
772         121.699997     121.699997


# Regularized Regression

Apply regularized regression, hyper parameter tuning to decrease the error

# Ridge Regression

In [99]:
rr = Ridge(alpha=0.001)
rr.fit(X_train, y_train)

Ridge(alpha=0.001)

In [100]:
pred_train_rr= rr.predict(X_train)
print(np.sqrt(mean_squared_error(y_train,pred_train_rr)))
print(r2_score(y_train, pred_train_rr))

2.5187429785185666e-07
1.0


In [101]:
pred_test_rr= rr.predict(X_test)
print(np.sqrt(mean_squared_error(y_test,pred_test_rr))) 
print(r2_score(y_test, pred_test_rr))

2.361769100556973e-07
1.0


# Lasso regression

In [102]:
model_lasso = Lasso(alpha=0.01)
model_lasso.fit(X_train, y_train) 

Lasso(alpha=0.01)

In [103]:
pred_train_lasso= model_lasso.predict(X_train)
print(np.sqrt(mean_squared_error(y_train,pred_train_lasso)))
print(r2_score(y_train, pred_train_lasso))

1.3790029863232487
0.9998640094812384


In [104]:
pred_test_lasso= model_lasso.predict(X_test)
print(np.sqrt(mean_squared_error(y_test,pred_test_lasso))) 
print(r2_score(y_test, pred_test_lasso))

1.3754029197607927
0.9998728240269276


# Elastic Net regression

In [105]:
model_enet = ElasticNet(alpha = 0.01)
model_enet.fit(X_train, y_train) 

ElasticNet(alpha=0.01)

In [106]:
pred_train_enet= model_enet.predict(X_train)
print(np.sqrt(mean_squared_error(y_train,pred_train_enet)))
print(r2_score(y_train, pred_train_enet))

1.378612548189579
0.999864086476529


In [107]:
pred_test_enet= model_enet.predict(X_test)
print(np.sqrt(mean_squared_error(y_test,pred_test_enet)))
print(r2_score(y_test, pred_test_enet))

1.3750085195340591
0.9998728969525269
