In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder

In [2]:
data = {
        'year': [2018, 2015, 2020, 2012],
        'km': [50000, 80000, 20000, 120000],
        'engine_size': [1.6, 2.0, 1.8, 1.4],
        'brand': ['Toyota', 'Honda', 'Volkswagen', 'Ford'],
        'price': [120000, 90000, 150000, 60000]
}

In [3]:
def normalize(data):
    data_numpy = np.asarray(data)
    std_dist = (data - data_numpy.mean()) / data_numpy.std()
    return np.asarray(std_dist, dtype=np.float32)

In [4]:
le = LabelEncoder()

In [5]:
data['brand'] = le.fit_transform(data['brand']).astype(np.float32)
data['brand']

array([2., 1., 3., 0.], dtype=float32)

In [6]:
data['year'] = normalize(data['year'])
data['year']

array([ 0.57735026, -0.41239306,  1.2371792 , -1.4021363 ], dtype=float32)

In [7]:
data['km'] = normalize(data['km'])
data['km']

array([-0.47301617,  0.3378687 , -1.283901  ,  1.4190485 ], dtype=float32)

In [8]:
data['engine_size'] = normalize(data['engine_size'])
data['engine_size']

array([-0.4472136,  1.3416408,  0.4472136, -1.3416408], dtype=float32)

In [9]:
df = pd.DataFrame(data)
df

Unnamed: 0,year,km,engine_size,brand,price
0,0.57735,-0.473016,-0.447214,2.0,120000
1,-0.412393,0.337869,1.341641,1.0,90000
2,1.237179,-1.283901,0.447214,3.0,150000
3,-1.402136,1.419049,-1.341641,0.0,60000


In [10]:
X = df.drop(['price'], axis=1)
X

Unnamed: 0,year,km,engine_size,brand
0,0.57735,-0.473016,-0.447214,2.0
1,-0.412393,0.337869,1.341641,1.0
2,1.237179,-1.283901,0.447214,3.0
3,-1.402136,1.419049,-1.341641,0.0


In [11]:
y = df['price']
y

0    120000
1     90000
2    150000
3     60000
Name: price, dtype: int64

In [12]:
X_train, X_test, y_train, y_test = X.iloc[:3], X.iloc[3:], y.iloc[:3], y.iloc[3:]

In [13]:
X_train = np.asarray(X_train).astype(np.float32)
X_train

array([[ 0.57735026, -0.47301617, -0.4472136 ,  2.        ],
       [-0.41239306,  0.3378687 ,  1.3416408 ,  1.        ],
       [ 1.2371792 , -1.283901  ,  0.4472136 ,  3.        ]],
      dtype=float32)

In [14]:
X_test = np.asarray(X_test).astype(np.float32)
X_test

array([[-1.4021363,  1.4190485, -1.3416408,  0.       ]], dtype=float32)

In [15]:
y_train = np.asarray(y_train).astype(np.float32)
y_train

array([120000.,  90000., 150000.], dtype=float32)

In [16]:
y_test = np.asarray(y_test).astype(np.float32)
y_test

array([60000.], dtype=float32)

In [17]:
W = np.random.randn(X_train.shape[1])
b = 0.

In [18]:
W

array([-1.10546063,  0.07530671, -0.78121566,  1.19797432])

In [19]:
b

0.0

In [20]:
def cost(X, y, W, b): 
    m = X.shape[0]
    cost = 0.
    for i in range(m):
        prediction = np.dot(X[i], W) + b
        cost += (prediction - y[i]) ** 2
    return cost / (2*m)

In [21]:
cost(X_train, y_train, W, b)

7499809196.503006

In [22]:
cost_of_test_before_training = cost(X_test, y_test, W, b)
cost_of_test_before_training

1799837704.7861533

In [23]:
def gradient(X, y, W, b): 
    m, n = X.shape
    derivative_w = np.zeros((n,))
    derivative_b = 0
    for i in range(m):
        predictions = (np.dot(X[i], W) + b) - y[i]
        for j in range(n):
            derivative_w[j] += predictions * X[i, j]    
        derivative_b += predictions
    return derivative_w / m, derivative_b / m

In [24]:
gradient(X_train, y_train, W, b)

(array([ -72580.13042587,   72978.61760294,  -44721.1219416 ,
        -259996.62841624]),
 -119998.50571176993)

In [25]:
def gradient_descent(X, y, W, b, cost_function, gradient_function, alpha, num_iters): 
    m, n = X.shape
    for i in range(num_iters):
        derivative_w, derivative_b = gradient_function(X, y, W, b)  
        W = W - alpha * derivative_w               
        b = b - alpha * derivative_b               
        cost =  np.sum(cost_function(X, y, W, b))
        if i % (num_iters / 10) == 0:
            print(f"Iteration {i:4}: Cost {float(cost):8.2f}   ")
    print(f"Iteration {num_iters:4}: Cost {float(cost):8.2f}   ")
    return W, b

In [26]:
alpha = 0.01
iterations = 100

In [27]:
W, b = gradient_descent(X_train, y_train, W, b, cost, gradient, alpha, iterations)

Iteration    0: Cost 6584746351.77   
Iteration   10: Cost 1887562172.73   
Iteration   20: Cost 652523781.07   
Iteration   30: Cost 314425845.63   
Iteration   40: Cost 210860451.77   
Iteration   50: Cost 170391759.60   
Iteration   60: Cost 148337205.52   
Iteration   70: Cost 132794698.65   
Iteration   80: Cost 120386172.87   
Iteration   90: Cost 109988007.52   
Iteration  100: Cost 101933242.05   


In [28]:
alpha = 0.015

In [29]:
W, b = gradient_descent(X_train, y_train, W, b, cost, gradient, alpha, iterations)

Iteration    0: Cost 100691090.35   
Iteration   10: Cost 89637240.18   
Iteration   20: Cost 80630035.07   
Iteration   30: Cost 73178385.51   
Iteration   40: Cost 66919894.88   
Iteration   50: Cost 61584697.70   
Iteration   60: Cost 56971103.61   
Iteration   70: Cost 52927890.22   
Iteration   80: Cost 49341247.68   
Iteration   90: Cost 46125128.36   
Iteration  100: Cost 43493009.47   


In [30]:
alpha = 0.02

In [31]:
W, b = gradient_descent(X_train, y_train, W, b, cost, gradient, alpha, iterations)

Iteration    0: Cost 43121427.09   
Iteration   10: Cost 39637709.11   
Iteration   20: Cost 36519151.36   
Iteration   30: Cost 33702299.58   
Iteration   40: Cost 31140562.65   
Iteration   50: Cost 28798915.04   
Iteration   60: Cost 26650338.10   
Iteration   70: Cost 24673423.66   
Iteration   80: Cost 22850755.06   
Iteration   90: Cost 21167808.41   
Iteration  100: Cost 19762350.25   


In [32]:
alpha = 0.025

In [33]:
W, b = gradient_descent(X_train, y_train, W, b, cost, gradient, alpha, iterations)

Iteration    0: Cost 19574757.05   
Iteration   10: Cost 17795703.47   
Iteration   20: Cost 16180435.49   
Iteration   30: Cost 14713044.57   
Iteration   40: Cost 13379491.73   
Iteration   50: Cost 12167267.68   
Iteration   60: Cost 11065151.52   
Iteration   70: Cost 10063032.17   
Iteration   80: Cost 9151770.55   
Iteration   90: Cost 8323089.08   
Iteration  100: Cost 7641662.13   


In [35]:
alpha = 0.03

In [36]:
W, b = gradient_descent(X_train, y_train, W, b, cost, gradient, alpha, iterations)

Iteration    0: Cost 7555084.88   
Iteration   10: Cost 6741483.06   
Iteration   20: Cost 6015514.36   
Iteration   30: Cost 5367732.10   
Iteration   40: Cost 4789711.45   
Iteration   50: Cost 4273937.28   
Iteration   60: Cost 3813705.09   
Iteration   70: Cost 3403033.09   
Iteration   80: Cost 3036584.00   
Iteration   90: Cost 2709595.51   
Iteration  100: Cost 2445522.89   


In [37]:
alpha = 0.035

In [38]:
W, b = gradient_descent(X_train, y_train, W, b, cost, gradient, alpha, iterations)

Iteration    0: Cost 2413216.16   
Iteration   10: Cost 2112716.16   
Iteration   20: Cost 1849635.26   
Iteration   30: Cost 1619313.90   
Iteration   40: Cost 1417672.75   
Iteration   50: Cost 1241140.49   
Iteration   60: Cost 1086590.49   
Iteration   70: Cost 951285.44   
Iteration   80: Cost 832828.94   
Iteration   90: Cost 729122.94   
Iteration  100: Cost 646876.27   


In [39]:
alpha = 0.04

In [40]:
W, b = gradient_descent(X_train, y_train, W, b, cost, gradient, alpha, iterations)

Iteration    0: Cost 637114.51   
Iteration   10: Cost 547243.10   
Iteration   20: Cost 470048.95   
Iteration   30: Cost 403743.82   
Iteration   40: Cost 346791.69   
Iteration   50: Cost 297873.23   
Iteration   60: Cost 255855.21   
Iteration   70: Cost 219764.26   
Iteration   80: Cost 188764.30   
Iteration   90: Cost 162137.19   
Iteration  100: Cost 141399.92   


In [41]:
alpha = 0.045

In [42]:
W, b = gradient_descent(X_train, y_train, W, b, cost, gradient, alpha, iterations)

Iteration    0: Cost 139000.52   
Iteration   10: Cost 117135.67   
Iteration   20: Cost 98710.17   
Iteration   30: Cost 83183.01   
Iteration   40: Cost 70098.28   
Iteration   50: Cost 59071.78   
Iteration   60: Cost 49779.76   
Iteration   70: Cost 41949.38   
Iteration   80: Cost 35350.72   
Iteration   90: Cost 29790.03   
Iteration  100: Cost 25537.39   


In [43]:
alpha = 0.05

In [44]:
W, b = gradient_descent(X_train, y_train, W, b, cost, gradient, alpha, iterations)

Iteration    0: Cost 25056.13   
Iteration   10: Cost 20715.18   
Iteration   20: Cost 17126.30   
Iteration   30: Cost 14159.18   
Iteration   40: Cost 11706.12   
Iteration   50: Cost  9678.05   
Iteration   60: Cost  8001.33   
Iteration   70: Cost  6615.11   
Iteration   80: Cost  5469.05   
Iteration   90: Cost  4521.54   
Iteration  100: Cost  3809.99   


In [45]:
alpha = 0.055

In [46]:
W, b = gradient_descent(X_train, y_train, W, b, cost, gradient, alpha, iterations)

Iteration    0: Cost  3731.05   
Iteration   10: Cost  3026.21   
Iteration   20: Cost  2454.53   
Iteration   30: Cost  1990.84   
Iteration   40: Cost  1614.75   
Iteration   50: Cost  1309.71   
Iteration   60: Cost  1062.29   
Iteration   70: Cost   861.61   
Iteration   80: Cost   698.85   
Iteration   90: Cost   566.83   
Iteration  100: Cost   469.47   


In [47]:
alpha = 0.06

In [48]:
W, b = gradient_descent(X_train, y_train, W, b, cost, gradient, alpha, iterations)

Iteration    0: Cost   458.87   
Iteration   10: Cost   365.13   
Iteration   20: Cost   290.53   
Iteration   30: Cost   231.18   
Iteration   40: Cost   183.95   
Iteration   50: Cost   146.37   
Iteration   60: Cost   116.47   
Iteration   70: Cost    92.68   
Iteration   80: Cost    73.74   
Iteration   90: Cost    58.68   
Iteration  100: Cost    47.77   


In [49]:
W

array([-7085.32548792, 10698.85876424,  -857.90251836, 44141.33456451])

In [50]:
b

40471.12891001622

In [51]:
cost(X_train, y_train, W, b)

47.77071772505596

In [52]:
cost_of_test_after_training = cost(X_test, y_test, W, b)
cost_of_test_after_training

22706509.22454511

In [53]:
print("Cost before training: ", cost_of_test_before_training)
print("Cost after training: ", cost_of_test_after_training)
print("Diff: ", cost_of_test_before_training - cost_of_test_after_training)

Cost before training:  1799837704.7861533
Cost after training:  22706509.22454511
Diff:  1777131195.5616083


In [54]:
def predict(X, W, b):
    return (np.dot(X, W) + b).astype(np.float32)

In [55]:
y_pred = predict(X_test, W, b)
y_pred

array([66738.92], dtype=float32)

In [56]:
y_test

array([60000.], dtype=float32)

In [69]:
def accuracy(y, y_pred):
    diff = np.absolute(y_pred - y)
    return 100 - np.absolute(diff / y) * 100

In [70]:
acc = accuracy(y_pred, y_test)
acc

array([89.90256], dtype=float32)

In [71]:
print("Model accuracy is: ", acc[0])

Model accuracy is:  89.90256
