## Sibi Learning Deep Learning and ML 

In [18]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


#### Continous variable generation randomly

In [30]:
#set seed for reproducibility
np.random.seed(0)
n_samples = 100

#generate random predictor variables
X = np.random.uniform(low = -10, high = 10, size = n_samples)

#generate noise
noise = np.random.normal(loc=0.0, scale=1.0, size=n_samples)

#generate dependent variable
y = 2*X + 3 + noise

#convert to pandas dataframe
data = pd.DataFrame({'X': X, 'y': y})

print(data.head())


          X          y
0  0.976270   3.787390
1  4.303787  12.508401
2  2.055268   7.576197
3  0.897664   3.259084
4 -1.526904   1.434444
(100,)
[ 0.97627008  4.30378733  2.05526752  0.89766366 -1.52690401  2.91788226
 -1.24825577  7.83546002  9.27325521 -2.33116962  5.83450076  0.5778984
  1.36089122  8.51193277 -8.57927884 -8.25741401 -9.59563205  6.65239691
  5.56313502  7.40024296  9.57236684  5.98317128 -0.77041275  5.61058353
 -7.63451148  2.79842043 -7.13293425  8.89337834  0.43696644 -1.7067612
 -4.70888776  5.48467379 -0.87699336  1.36867898 -9.62420399  2.35270994
  2.24191445  2.33867994  8.87496157  3.63640598 -2.80984199 -1.25936092
  3.95262392 -8.79549057  3.33533431  3.41275739 -5.79234878 -7.42147405
 -3.69143298 -2.72578458  1.40393541 -1.22796973  9.76747676 -7.95910379
 -5.82246488 -6.77380964  3.06216651 -4.93416795 -0.67378454 -5.11148816
 -6.82060833 -7.79249718  3.12659179 -7.23634097 -6.06835277 -2.62549659
  6.4198646  -8.05797448  6.75889815 -8.07803184  9.52

#### Categorical variable generation randomly

In [9]:
#generating a categorical variable independent variable
x_1 = pd.Categorical(np.random.choice(['Low', 'Medium', 'High'], n_samples))

#generating dependent variable y
y_dict = {'Low': 1, 'Medium': 2, 'High': 3}

y_1 = pd.Categorical([y_dict[x] for x in x_1])

#convert to pandas dataframe
data_1 = pd.DataFrame({'X': x_1, 'y': y_1})

print(data_1.head())

        X  y
0  Medium  2
1     Low  1
2    High  3
3     Low  1
4    High  3


#### Categorical variables cannot be used in linear regression so have to convert to continous using one hot encoding

In [13]:
#one hot encoding of independent variable x
X_encoded = pd.get_dummies(data_1['X'], prefix='X', drop_first=False)

#convert the dependent variable to integer
y_encoded = data_1['y'].astype('int')

data_encoded = pd.concat([X_encoded, y_encoded], axis=1)

print(data_encoded.head())

   X_High  X_Low  X_Medium  y
0   False  False      True  2
1   False   True     False  1
2    True  False     False  3
3   False   True     False  1
4    True  False     False  3


#### Now data is ready we have to do test train split

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y_encoded, random_state=0)

In [15]:
model_categorical = LinearRegression().fit(X_train, y_train)

In [16]:
y_pred = model_categorical.predict(X_test)

In [20]:
mae = mean_squared_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print('Mean Absolute Error:', mae)
print('Mean Squared Error:', mse)
print('R2 Score:', r2)

Mean Absolute Error: 2.7215701230124905e-31
Mean Squared Error: 2.7215701230124905e-31
R2 Score: 1.0


#### Now lets try to do for continous variable

In [24]:
# test train split
X_train_1, X_test_1, y_train_1, y_test_1 = train_test_split(X, y, random_state=0)

mode_continuous = LinearRegression().fit(X_train_1.reshape(-1, 1), y_train_1)

In [25]:
y_pred_1 = mode_continuous.predict(X_test_1.reshape(-1, 1))

In [26]:
mae_1 = mean_squared_error(y_test_1, y_pred_1)
mse_1 = mean_squared_error(y_test_1, y_pred_1)
r2_1 = r2_score(y_test_1, y_pred_1)

print('Mean Absolute Error:', mae_1)
print('Mean Squared Error:', mse_1)
print('R2 Score:', r2_1)

Mean Absolute Error: 1.0934268926184856
Mean Squared Error: 1.0934268926184856
R2 Score: 0.9917428854927943


#### Creating Linear regression from scratch

In [31]:
class LinearRegression:
    def __init__(self, learning_rate=0.001, n_iters=1000):
        self.learning_rate = learning_rate
        self.n_iters = n_iters
        self.weights = None
        self.bias = None
    
    def fit(self, X, y):
        #init parameters
        n_samples, n_features = X.shape

        self.weights = np.zeros(n_features)
        self.bias = 0

        #gradient descent
        for _ in range(self.n_iters):
            y_pred = np.dot(X, self.weights) + self.bias

            dw = (1/n_samples) * np.dot(X.T, (y_pred - y))
            db = (1/n_samples) * np.sum(y_pred - y)

            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

    def predict(self, X):
        y_pred = np.dot(X, self.weights) + self.bias
        return y_pred