In [1]:
#Importing necessary libraries
import numpy as np
import pandas as pd

In [2]:
class LinearRegression: #writing the Linear regression algorithm from scratch

    def __init__(self, lr = 0.001, n_iters=1000):
        self.lr = lr
        self.n_iters = n_iters
        self.weights = None
        self.bias = None

    def fit(self, X, y): #writing code for the fit function
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.n_iters):
            y_pred = np.dot(X, self.weights) + self.bias

            dw = (1/n_samples) * np.dot(X.T, (y_pred-y))
            db = (1/n_samples) * np.sum(y_pred-y)

            self.weights = self.weights - self.lr * dw
            self.bias = self.bias - self.lr * db

    def predict(self, X): #writing code for the predict function
        y_pred = np.dot(X, self.weights) + self.bias
        return y_pred

In [32]:
df = pd.read_csv("Salary_dataset.csv") # importing the dataset 

In [33]:
df.head()

Unnamed: 0.1,Unnamed: 0,YearsExperience,Salary
0,0,1.2,39344.0
1,1,1.4,46206.0
2,2,1.6,37732.0
3,3,2.1,43526.0
4,4,2.3,39892.0


In [35]:
df.drop(['Unnamed: 0'],axis = 1,inplace = True) # As the Unnamed column is useless we are dropping it.

In [36]:
df.head()

Unnamed: 0,YearsExperience,Salary
0,1.2,39344.0
1,1.4,46206.0
2,1.6,37732.0
3,2.1,43526.0
4,2.3,39892.0


In [38]:
from sklearn.model_selection import train_test_split as tst #importing test train split and splitting the data
X_train,X_test,y_train,y_test = tst(df.drop(['Salary'],axis=1),df.Salary,test_size=0.2,random_state=10)

In [39]:
model = LinearRegression() # creating a linear regression model for the scratch version
model.fit(X_train,y_train)

In [47]:
y_test

20    91739.0
7     54446.0
5     56643.0
2     37732.0
3     43526.0
21    98274.0
Name: Salary, dtype: float64

In [54]:
from sklearn.metrics import explained_variance_score #importing this library to measure the accuracy.

y_pred = model.predict(X_test)
print(explained_variance_score(y_test,y_pred))

0.983476763509285


Got a accuracy of 98.34%

In [49]:
from sklearn.linear_model import LinearRegression #Now importing the linear regression model library
model1 = LinearRegression()

In [50]:
model1.fit(X_train,y_train)

In [55]:
y_pred1 = model1.predict(X_test)
print(explained_variance_score(y_test,y_pred1))

0.983476763509285


Here also we are getting an accuracy of 98.3%

#### Accuracy and performanace
    - The accuracy of both the models were very much same as you can see above , but when we have larger datasets then it might be difficult for the model made from scratch to optimize and it also takes time but the library one might be a good option but I think the difference will be not so huge.
#### Training time
    - The training time for small datasets will be very much similar but when we go for bigger datasets then the scratch model might take more time.
#### Code Complexity
    - Scratch version is more time taking and also complex and each time we cannot spend time writing that code rather I prefer doing the library version as it is much simpler to code.
#### Scalability
    - As I mentioned above the scratch might have a difficulty handling large data but not the library version.