# Linear Regression Model

# Importing Dependencies

In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split



# Creating Linear Regression Model

In [9]:
class Linear_Regression():
    def __init__(self,learning_rate,no_of_iterations):
        self.no_of_iterations = no_of_iterations
        self.learning_rate = learning_rate
        

    def fit(self,X,Y):
        self.m, self.n = X.shape
        self.w = np.full(self.n, 0)
        self.b = 0
        self.X = X
        self.Y = Y
        for i in range(self.no_of_iterations):
            self.update_weights()
        

    def update_weights(self):
        Y_prediction = self.predict(self.X)
        dw = -(2 *  (self.X.T).dot(self.Y - Y_prediction))/self.m
        db = -(2 * np.sum(self.Y - Y_prediction))/self.m
        self.w = self.w - self.learning_rate*dw
        self.b = self.b - self.learning_rate*db

    def predict(self, X):
        return X.dot(self.w) + self.b
    pass

# Creating DataFrame using a csv file

In [11]:
data = pd.read_csv("world happiness dataset.csv")

In [12]:
print(data)

     Overall rank         Country or region  Score  GDP per capita  \
0               1                   Finland  7.769           1.340   
1               2                   Denmark  7.600           1.383   
2               3                    Norway  7.554           1.488   
3               4                   Iceland  7.494           1.380   
4               5               Netherlands  7.488           1.396   
..            ...                       ...    ...             ...   
151           152                    Rwanda  3.334           0.359   
152           153                  Tanzania  3.231           0.476   
153           154               Afghanistan  3.203           0.350   
154           155  Central African Republic  3.083           0.026   
155           156               South Sudan  2.853           0.306   

     Social support  Healthy life expectancy  Freedom to make life choices  \
0             1.587                    0.986                         0.596   
1  

In [16]:
data.head()

Unnamed: 0,Overall rank,Country or region,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption
0,1,Finland,7.769,1.34,1.587,0.986,0.596,0.153,0.393
1,2,Denmark,7.6,1.383,1.573,0.996,0.592,0.252,0.41
2,3,Norway,7.554,1.488,1.582,1.028,0.603,0.271,0.341
3,4,Iceland,7.494,1.38,1.624,1.026,0.591,0.354,0.118
4,5,Netherlands,7.488,1.396,1.522,0.999,0.557,0.322,0.298


In [19]:
data.tail()

Unnamed: 0,Overall rank,Country or region,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption
151,152,Rwanda,3.334,0.359,0.711,0.614,0.555,0.217,0.411
152,153,Tanzania,3.231,0.476,0.885,0.499,0.417,0.276,0.147
153,154,Afghanistan,3.203,0.35,0.517,0.361,0.0,0.158,0.025
154,155,Central African Republic,3.083,0.026,0.0,0.105,0.225,0.235,0.035
155,156,South Sudan,2.853,0.306,0.575,0.295,0.01,0.202,0.091


# Splitting the target and feature

In [22]:
X = data.iloc[:,3:-3].values
Y = data["Score"].values

In [24]:
print(X)

[[1.34  1.587 0.986]
 [1.383 1.573 0.996]
 [1.488 1.582 1.028]
 [1.38  1.624 1.026]
 [1.396 1.522 0.999]
 [1.452 1.526 1.052]
 [1.387 1.487 1.009]
 [1.303 1.557 1.026]
 [1.365 1.505 1.039]
 [1.376 1.475 1.016]
 [1.372 1.548 1.036]
 [1.034 1.441 0.963]
 [1.276 1.455 1.029]
 [1.609 1.479 1.012]
 [1.333 1.538 0.996]
 [1.499 1.553 0.999]
 [1.373 1.454 0.987]
 [1.356 1.504 0.986]
 [1.433 1.457 0.874]
 [1.269 1.487 0.92 ]
 [1.503 1.31  0.825]
 [1.3   1.52  0.999]
 [1.07  1.323 0.861]
 [1.324 1.472 1.045]
 [1.368 1.43  0.914]
 [1.159 1.369 0.92 ]
 [0.8   1.269 0.746]
 [1.403 1.357 0.795]
 [1.684 1.313 0.871]
 [1.286 1.484 1.062]
 [1.149 1.442 0.91 ]
 [1.004 1.439 0.802]
 [1.124 1.465 0.891]
 [1.572 1.463 1.141]
 [0.794 1.242 0.789]
 [1.294 1.488 1.039]
 [1.362 1.368 0.871]
 [1.246 1.504 0.881]
 [1.231 1.477 0.713]
 [1.206 1.438 0.884]
 [0.745 1.529 0.756]
 [1.238 1.515 0.818]
 [0.985 1.41  0.841]
 [1.258 1.523 0.953]
 [0.694 1.325 0.835]
 [0.882 1.232 0.758]
 [1.092 1.432 0.881]
 [1.162 1.232

In [26]:
print(Y)

[7.769 7.6   7.554 7.494 7.488 7.48  7.343 7.307 7.278 7.246 7.228 7.167
 7.139 7.09  7.054 7.021 6.985 6.923 6.892 6.852 6.825 6.726 6.595 6.592
 6.446 6.444 6.436 6.375 6.374 6.354 6.321 6.3   6.293 6.262 6.253 6.223
 6.199 6.198 6.192 6.182 6.174 6.149 6.125 6.118 6.105 6.1   6.086 6.07
 6.046 6.028 6.021 6.008 5.94  5.895 5.893 5.89  5.888 5.886 5.86  5.809
 5.779 5.758 5.743 5.718 5.697 5.693 5.653 5.648 5.631 5.603 5.529 5.525
 5.523 5.467 5.432 5.43  5.425 5.386 5.373 5.339 5.323 5.287 5.285 5.274
 5.265 5.261 5.247 5.211 5.208 5.208 5.197 5.192 5.191 5.175 5.082 5.044
 5.011 4.996 4.944 4.913 4.906 4.883 4.812 4.799 4.796 4.722 4.719 4.707
 4.7   4.696 4.681 4.668 4.639 4.628 4.587 4.559 4.548 4.534 4.519 4.516
 4.509 4.49  4.466 4.461 4.456 4.437 4.418 4.39  4.374 4.366 4.36  4.35
 4.332 4.286 4.212 4.189 4.166 4.107 4.085 4.015 3.975 3.973 3.933 3.802
 3.775 3.663 3.597 3.488 3.462 3.41  3.38  3.334 3.231 3.203 3.083 2.853]


# Splitting the dataset into training data and testing data

In [93]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.25, random_state = 45)

# Training the linear regression mode

In [250]:
model = Linear_Regression(learning_rate = 0.00001, no_of_iterations = 100000)

In [258]:
model.fit(X_train,Y_train)

In [259]:
import pickle


In [260]:
filename = "trained_model.sav"
pickle.dump(model, open(filename, 'wb') )

In [261]:
print("weight =", model.w[0])
print("bias = ", model.b)
print(X_test)

weight = 1.2302738780898916
bias =  1.4686549733480032
[[1.34  1.587 0.986]
 [0.745 1.529 0.756]
 [1.231 1.477 0.713]
 [1.38  1.624 1.026]
 [0.96  1.274 0.854]
 [0.694 1.325 0.835]
 [0.855 1.475 0.777]
 [1.043 1.147 0.769]
 [0.332 1.069 0.443]
 [1.004 1.383 0.854]
 [0.366 1.114 0.433]
 [1.372 1.548 1.036]
 [1.015 1.401 0.779]
 [0.921 1.    0.815]
 [0.323 0.688 0.449]
 [1.294 1.488 1.039]
 [0.578 1.058 0.426]
 [0.949 1.265 0.831]
 [0.619 0.378 0.44 ]
 [0.336 1.033 0.532]
 [0.85  1.055 0.815]
 [0.931 1.203 0.66 ]
 [0.306 0.575 0.295]
 [0.987 1.224 0.815]
 [1.043 0.98  0.574]
 [0.611 0.868 0.486]
 [1.044 1.303 0.673]
 [1.396 1.522 0.999]
 [0.71  1.181 0.555]
 [1.684 1.313 0.871]
 [0.813 1.321 0.604]
 [0.794 1.242 0.789]]


In [262]:
test_data_prediction = model.predict(X_test)

In [263]:
print(test_data_prediction)

[6.80928318 5.74404123 6.21004819 6.96196424 5.6774642  5.41704192
 5.80951162 5.47788774 4.13796519 5.91594474 4.2456784  6.83380875
 5.88324607 5.1262046  3.48865259 6.63943814 4.40462902 5.62519622
 3.31931882 4.17298767 5.13187504 5.32337453 3.11918594 5.58624729
 4.99609171 4.18522628 5.6448128  6.78153618 4.90693168 6.6515216
 5.32052182 5.35266679]


In [270]:
loaded_model = pickle.load(open("trained_model.sav","rb"))

In [272]:
arr = np.array([0.982, 0.79, 1.27])

In [274]:
y = loaded_model.predict(arr)

In [276]:
print(y)

5.311243428491612


In [278]:
from sklearn.metrics import r2_score
s = r2_score(Y_test,test_data_prediction)
print(s)

0.824047834177107
