In [1]:
# import the library
import numpy as np

In [2]:
class Lasso_Regression():

    # Initiating the hyperparameters
    def __init__(self, learning_rate, no_of_iterations, lambda_parameter):

        self.learning_rate = learning_rate
        self.no_of_iterations = no_of_iterations
        self.lambda_parameter = lambda_parameter

    # Fitting the dataset to the Lasso Regression model
    def fit(self, X, Y):

        # number of Data points --> number of rows(m) & number of columns(n)

        self.m, self.n = X.shape

        self.w = np.zeros(self.n)

        self.b = 0

        self.X = X

        self.Y = Y

        # Implementing Gradient Descent algorithm for Optimization

        for i in range(self.no_of_iterations):
            self.update_weights()

    # Function for updating the weight & bias value
    def update_weights(self):

        # Linear equation of the model
        Y_prediction = self.predict(self.X)

        # Gradients (dw, db)

        # Gradient for weight
        dw = np.zeros(self.n)

        for i in range(self.n):

            if self.w[i] > 0:
                dw[i] = (-(2 * (self.X[:, i]).dot(self.Y - Y_prediction)) + self.lambda_parameter) / self.m
            else:
                dw[i] = (-(2 * (self.X[:, i]).dot(self.Y - Y_prediction)) - self.lambda_parameter) / self.m

        # Gradient for bias
        db = -2 * np.sum(self.Y - Y_prediction) / self.m

        # Updating the weights & bias
        self.w = self.w - self.learning_rate * dw
        self.b = self.b - self.learning_rate * db

    # Predicting the Target variable
    def predict(self, X):
        return X.dot(self.w) + self.b


In [3]:
# now importing the required library
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics  import accuracy_score
import matplotlib.pyplot as plt


In [4]:
# Loading the data
data = pd.read_csv("/content/drive/MyDrive/salary_data.csv")

In [5]:
# printing the first 5 data
data.head()

Unnamed: 0,YearsExperience,Salary
0,1.1,39343
1,1.3,46205
2,1.5,37731
3,2.0,43525
4,2.2,39891


In [6]:
# printing the last 5 dta
data.tail()

Unnamed: 0,YearsExperience,Salary
25,9.0,105582
26,9.5,116969
27,9.6,112635
28,10.3,122391
29,10.5,121872


In [7]:
# shape of data that is number of rows and  columns
data.shape

(30, 2)

In [8]:
# checking the null values
data.isnull().sum()

YearsExperience    0
Salary             0
dtype: int64

In [9]:
# spliting the data
x = data.iloc[:,:-1].values
y = data.iloc[:,1].values

In [10]:
print(x)

[[ 1.1]
 [ 1.3]
 [ 1.5]
 [ 2. ]
 [ 2.2]
 [ 2.9]
 [ 3. ]
 [ 3.2]
 [ 3.2]
 [ 3.7]
 [ 3.9]
 [ 4. ]
 [ 4. ]
 [ 4.1]
 [ 4.5]
 [ 4.9]
 [ 5.1]
 [ 5.3]
 [ 5.9]
 [ 6. ]
 [ 6.8]
 [ 7.1]
 [ 7.9]
 [ 8.2]
 [ 8.7]
 [ 9. ]
 [ 9.5]
 [ 9.6]
 [10.3]
 [10.5]]


In [11]:
print(y)

[ 39343  46205  37731  43525  39891  56642  60150  54445  64445  57189
  63218  55794  56957  57081  61111  67938  66029  83088  81363  93940
  91738  98273 101302 113812 109431 105582 116969 112635 122391 121872]


In [12]:
# spliting the dataset into training data and testing data
X_train, X_test , Y_train , Y_test = train_test_split(x,y, test_size = 0.3 , random_state=2)

Training the lasso model

In [13]:
model = Lasso_Regression(learning_rate = 0.02, no_of_iterations=1000,
                         lambda_parameter=200)

In [14]:
model.fit(X_train, Y_train)

Prediction of Salary value for test data


In [15]:
test_data_prediction = model.predict(X_test)

In [16]:
print(test_data_prediction)

[ 36134.17658033  34227.3127045   66643.99859368  59016.54309034
  91433.22897953  80945.47766244 101920.98029662  52342.51952492
  42808.20014575]


In [17]:
from sklearn import metrics

In [19]:
# R squared error
score_1 = metrics.r2_score(Y_test, test_data_prediction)
# Mean Absolute Error
score_2 = metrics.mean_absolute_error(Y_test, test_data_prediction)
print("R squared error : ", score_1)
print('Mean Absolute Error : ', score_2)

R squared error :  0.9001465640148576
Mean Absolute Error :  6977.405087770438


In [20]:
from sklearn.linear_model import Lasso
sk_model = Lasso()

In [21]:
sk_model.fit(X_train, Y_train)

In [22]:
sk_test_data_prediction = sk_model.predict(X_test)

In [24]:
# R squared error
score_1_ = metrics.r2_score(Y_test, sk_test_data_prediction)
# Mean Absolute Error
score_2_ = metrics.mean_absolute_error(Y_test, sk_test_data_prediction)

print("R squared error : ", score_1_)
print('Mean Absolute Error : ', score_2_)

R squared error :  0.9002161979504008
Mean Absolute Error :  6974.34146528967
