In [1]:
from abc import abstractmethod
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

%matplotlib inline

In [2]:
class BaseFunc:
    @abstractmethod
    def predict(self, data):
        pass

    @abstractmethod
    def derivative(self, data):
        pass

    def parameters(self):
        return self._parameters

    def update_parameters(self, new_parameters):
        self._parameters = new_parameters

In [3]:
class LossFunc:
    @abstractmethod
    def loss_value(self, prediction, ground_trouth):
        pass

    @abstractmethod
    def loss_derivative(self, prediction, ground_trouth):
        pass

In [4]:
class Model:
    def __init__(self, base_func: BaseFunc, loss_func: LossFunc, learning_rate) -> None:
        self.base_func = base_func
        self.loss_func = loss_func
        self.learning_rate = learning_rate

    def train(self, epochs_num, data, labels):
        for _ in range(epochs_num):
            prediction = self.base_func.predict(data)

            gradient = self.base_func.derivative(
                data
            ).T @ self.loss_func.loss_derivative(prediction, labels)

            new_parameters = self.base_func.parameters() - self.learning_rate * gradient

            print(new_parameters)

            self.base_func.update_parameters(new_parameters)

    def test(self, data, labels):
        prediction = base_func.predict(data)
        return loss_func.loss_value(prediction, labels)

# Concrete implementations

In [5]:
class OrdinaryBaseFunc(BaseFunc):
    def __init__(self, degree) -> None:
        self._parameters = np.zeros(degree)

    def predict(self, data):
        # temp = data @ self._parameters
        # print(temp.shape)
        return data @ self._parameters

    def derivative(self, data):
        return data

In [6]:
class SE(LossFunc):
    def loss_value(self, prediction, ground_trouth):
        return (1 / 2) * (ground_trouth - prediction).T @ (ground_trouth - prediction)

    def loss_derivative(self, prediction, ground_trouth):
        return prediction - ground_trouth

# Load and display data

In [7]:
df = pd.read_csv("dane.data", delimiter=r"\s+", header=None, decimal=",")

In [8]:
def divide_data(data, train_ratio):
    # return train_test_split(data, train_size=train_ratio,  random_state=42)
    return train_test_split(data, train_size=train_ratio, random_state=8)
    # return train_test_split(data, train_size=train_ratio, stratify=housing["median_house_value"], random_state=8)


train_set, test_set = divide_data(df, 0.75)

In [9]:
train_labels = train_set.pop(train_set.columns[7])
test_labels = test_set.pop(test_set.columns[7])
# train_set.insert(0, "Ones", 1)
train_set.head()

Unnamed: 0,0,1,2,3,4,5,6
1502,5,2,2,37,51,1,7
1781,7,-4,6,7,46,-2,19
989,0,-1,2,21,33,-3,13
539,-5,-2,5,31,58,-2,15
893,-1,-2,3,20,37,1,15


In [10]:
train_labels.head()

1502    2040.17
1781     673.05
989      949.79
539     1655.02
893     1000.37
Name: 7, dtype: float64

In [11]:
def preprocess_data(data_matrix):
    mean = np.mean(data_matrix, axis=0)
    std = np.std(data_matrix, axis=0)
    std[std == 0] = 1

    standarized = (data_matrix - mean) / std
    return np.c_[np.ones(standarized.shape[0]), standarized]

In [12]:
train_matrix = train_set.to_numpy()
train_labels_matrix = train_labels.to_numpy()

train_matrix = preprocess_data(train_matrix)

print(train_matrix[:6])
print(train_matrix.shape)
print(train_labels_matrix.shape)

[[ 1.          0.94645435  0.91001356 -1.50528864  1.56469799  0.75947842
   1.40393053 -0.91001356]
 [ 1.          1.29641366 -1.17578205  0.84883517 -0.93014205  0.17946102
  -0.68870308  1.17578205]
 [ 1.          0.07155606 -0.13288424 -1.50528864  0.23411663 -1.32858422
  -1.38624761  0.13288424]
 [ 1.         -0.80334223 -0.48051684  0.26030422  1.06572998  1.57150279
  -0.68870308  0.48051684]
 [ 1.         -0.1034236  -0.48051684 -0.91675768  0.1509553  -0.8645703
   1.40393053  0.48051684]
 [ 1.          1.64637298  0.21474836  0.84883517 -1.34594873  1.45549931
  -0.68870308 -0.21474836]]
(1499, 8)
(1499,)


# Ordinary model

In [13]:
base_func = OrdinaryBaseFunc(train_matrix.shape[1])
loss_func = SE()

model = Model(base_func, loss_func, 0.01)

In [14]:
model.train(40, train_matrix, train_labels_matrix)

print(model.test(train_matrix, train_labels_matrix))

[14459.3789      2216.82399728  -394.49900962   151.14793893
  6844.78765233  2252.55207138    88.39125017   394.49900962]
[-187827.331911    -31169.7430241    15502.50258104   -6583.16756089
  -89413.42438814  -29181.46220537    -396.68398623  -15502.50258104]
[2642163.75233489  467782.04367048 -510248.41736609  163363.62928993
 1279312.40433822  417021.27326789  -10638.26391106  510248.41736609]
[-36949411.51626512  -6926978.69181048  15689804.61914966
  -3603793.47645433 -18581709.44909183  -6086403.06395887
    524535.1296854  -15689804.61914967]
[ 5.16936726e+08  1.01478462e+08 -4.68491680e+08  7.81757755e+07
  2.80507726e+08  9.28564934e+07 -1.64172662e+07  4.68491680e+08]
[-7.23193034e+09 -1.46042445e+09  1.37990454e+10 -1.75258659e+09
 -4.52187686e+09 -1.51889236e+09  4.61435380e+08 -1.37990454e+10]
[ 1.01174720e+11  2.03302641e+10 -4.03774412e+11  4.15877246e+10
  8.05570292e+10  2.74655971e+10 -1.26523558e+10  4.03774412e+11]
[-1.41543432e+12 -2.63460205e+11  1.17768762e+13 -