In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [94]:
columns = ["Id", "Latitutde", "Longitude", "target"]
data = pd.read_csv('3D_spatial_network.txt', sep=',', names=columns)
data.drop(columns= ["Id"], index = range(10000,434874) ,inplace = True)


In [97]:
data.c

Unnamed: 0,Latitutde,Longitude,target
0,-0.469079,-1.093255,17.052772
1,-0.468515,-1.093958,17.614840
2,-0.467917,-1.094435,18.083536
3,-0.467490,-1.094649,18.279465
4,-0.467078,-1.094643,18.422974
...,...,...,...
9995,1.265857,1.503518,45.790861
9996,1.265069,1.503432,45.475026
9997,1.264078,1.503180,46.834495
9998,1.263290,1.503108,48.185708


In [95]:
train_X = np.asarray(data.iloc[:-2000, :-1])
y_train = np.asarray(data.target[:-2000])
test_X = np.asarray(data.iloc[-2000:,:-1])
y_test = np.asarray(data.target[-2000:])

In [96]:
### Normalise

for i in range(train_X.shape[1]):
    train_X[:, i] = (train_X[:, i] - train_X[:, i].mean())/ train_X[:,i].std()
    test_X[:, i] = (test_X[:, i] - test_X[:, i].mean())/ test_X[:,i].std()

y_train = (y_train - y_train.mean())/ y_train.std()
y_test = (y_test - y_test.mean()) / y_test.std()

In [6]:
def modify_train_dataset(train, degree):
    if degree == 1:
        return train
    
    train_prev = np.copy(train)
    out = np.copy(train_prev)
    
    for i in range(2, degree + 1):
        train_prev = multiply_dataset(train_prev, train, i)
        out = np.concatenate((out, train_prev), axis = 1)
    return out 

In [7]:
def multiply_dataset(train_prev, train, degree):
#     print(train_prev)
#     print(train)
    train_degree = np.copy(train_prev)
    for i in range(train.shape[1]):
        train_degree[:,i] = train_degree[:, i] * train[:, 0]
    train_degree = np.insert(train_degree, train_degree.shape[1], train[:,-1] ** degree, axis=1)
    return train_degree

In [8]:
degree = 3
train_X_degree = modify_train_dataset(train_X, degree)

In [86]:
print("Matrix Method")
X_train = np.insert(train_X, 0, 1, axis=1)
X_test = np.insert(test_X, 0, 1, axis=1)

XTX_inv = np.linalg.inv(np.matmul(X_train.T, X_train))
moore_penrose_pseudo_inv = np.matmul(XTX_inv, X_train.T)
weights = np.matmul(moore_penrose_pseudo_inv, y_train)

print('intercept : ', weights[0])
print('weights : ', weights[1:])

y_pred = np.matmul(X_test, weights)
error = y_pred - y_test
loss_min = np.matmul(error.T, error)
print('\nloss on test set : ', loss_min)
print('RMSE loss: ', np.sqrt(loss_min/X_test.shape[0]))


y_pred = np.matmul(X_train, weights)
error = y_pred - y_train
loss_min = np.matmul(error.T, error)
print('\nloss on train set : ', loss_min)
print('RMSE loss: ', np.sqrt(loss_min/X_train.shape[0]))


Matrix Method
intercept :  -1.343543332144037e-15
weights :  [ 0.16006403 -0.09090649]

loss on test set :  1818.2618605279426
RMSE loss:  0.953483576294826

loss on train set :  7814.131056322699
RMSE loss:  0.9883149204784563


In [73]:
W_ = np.random.random(X_train.shape[1])

In [74]:
# W_ = np.copy(weights)
# W_ = [ 0.001, 0.200, -0.20]

In [87]:
y_pred = np.matmul(X_test, W_)
error = y_pred - y_test
loss_min = np.matmul(error.T, error)
print("Before gradient descent, Random initialisation of W")
print('\nloss on test set : ', loss_min)
print('RMSE loss: ', np.sqrt(loss_min/X_test.shape[0]))

y_pred = np.matmul(X_train, W_)
error = y_pred - y_train
loss_min = np.matmul(error.T, error)
print('\nloss on train set : ', loss_min)
print('RMSE loss: ', np.sqrt(loss_min/X_train.shape[0]))

Before gradient descent, Random initialisation of W

loss on test set :  3700.112829282558
RMSE loss:  1.3601677891500294

loss on train set :  15703.090152041936
RMSE loss:  1.401030431148889


In [88]:
learning_rate = 1e-9
X = np.insert(train_X,0,1,axis=1)

W1 = np.copy(W_)
# print(W)
loss = []

run=0

while True:
    run += 1
    y_pred = np.matmul(X, W1)
    error = y_pred - y_train
    loss_val = 1/2*np.matmul(error.T, error)
    loss.append(loss_val)
    
    grad = np.matmul(error.T, X)
    
    W1 = W1 - learning_rate*grad
    if(run==1):
        continue
    if(loss[run-2]-loss[run-1]< 0.01):
        break
    
    print(run, end = "\r")
print(W1)

[ 0.39348349  0.1419395  -0.04669549]


In [89]:
run

115097

In [90]:
print("From Gradient descent")

y_pred = np.matmul(X_test, W1)
error = y_pred - y_test
loss_min = np.matmul(error.T, error)
print('\nloss on test set : ', loss_min)
print('RMSE loss: ', np.sqrt(loss_min/X_test.shape[0]))

y_pred = np.matmul(X_train, W1)
error = y_pred - y_train
loss_min = np.matmul(error.T, error)
print('\nloss on train set : ', loss_min)
print('RMSE loss: ', np.sqrt(loss_min/X_train.shape[0]))

print("Intercept: ", W1[0])
print(W1[1:])

From Gradient descent

loss on test set :  2112.865089930333
RMSE loss:  1.0278290446203426

loss on train set :  9066.33776021314
RMSE loss:  1.0645619850561274
Intercept:  0.3934834947553149
[ 0.1419395  -0.04669549]


In [98]:
### stochastic
W = np.copy(W_)
loss = []
run = 0
learning_rate = 1e-4
while True:
    run += 1
    error = 0
#     print(run, end = "\r")
    for i in range(X_train.shape[0]):
        y_pred = np.dot(X_train[i], W)
        loss_val = y_pred - y_train[i]
        grad = loss_val * X_train[i]
        error += loss_val**2
        W = W - learning_rate * grad
    
    loss.append(error)
    
    if run == 1:
        continue
    
    if(loss[run-2] - loss[run-1] < 0.01):
        break
    print(error, end = "\r")
print(W)

[ 0.01823482  0.12028969 -0.1068097 ]


In [93]:
print("From stochastic Gradient Descent")

y_pred = np.matmul(X_test, W)
error = y_pred - y_test
loss_min = 1/2*np.matmul(error.T, error)
print('\nloss on test set : ', loss_min)
print('RMSE loss: ', np.sqrt(loss_min/X_test.shape[0]))

y_pred = np.matmul(X_train, W)
error = y_pred - y_train
loss_min = 1/2*np.matmul(error.T, error)
print('\nloss on train set : ', loss_min)
print('RMSE loss: ', np.sqrt(loss_min/X_train.shape[0]))

From stochastic Gradient Descent

loss on test set :  951.0400582773357
RMSE loss:  0.6895796031921679

loss on train set :  3917.5872144074524
RMSE loss:  0.6997845395555203


In [None]:
pred_y_no_reg = np.matmul(X_train, W)
error = y_pred - train_y
loss_min = 1/2*np.matmul(error.T, error)
print('\nloss on train set : ', loss_min)
print('RMSE loss: ', np.sqrt(loss_min/8000))

In [None]:
X_train.shape

In [None]:
train_y.shape

In [None]:
X = np.insert(train_X,0,1,axis=1)

train_X_reg = X[:-1500]
train_y_reg = train_y[:-1500]

val_X = X[-1500:]
val_y = train_y[-1500:]

learning_rate = 1e-9
lmd_2 = [1]

W = W_
print(W)

loss_valid = []
for i,lmd in enumerate(lmd_2):
    W = W_
    loss = [] 
    run = 0
    while True:
        run += 1
        pred_y = np.matmul(X,W)
        error = pred_y - train_y
        loss_val = 1/2*np.matmul(error.T,error) + lmd/2*np.matmul(W.T,W)
        loss.append(loss_val)
        grad = np.matmul(error.T,X) + lmd*W
        W = W - learning_rate*grad
        if(run==1):
            continue
        # previous loss (run - 1 - 1) minus current loss (run - 1)
        if(loss[run-2]-loss[run-1]<0.01):
            break
    
    print('\nintercept : ',W[0])
    print('weights : ',W[1:])
    print("lamda : {} , runs : {} , loss on training : {}".format(lmd,run,loss_val))

    pred_y = np.matmul(val_X,W)
    error = pred_y - val_y
    loss_valid.append(1/2*np.matmul(error.T,error))
    print('loss on val set : ',loss_valid[i])
    print("~~~~~~~~~~~~~~~~~~~")
    

plt.plot(lmd_2,loss_valid)
plt.show()

In [None]:
X = np.insert(train_X,0,1,axis=1)

train_X_reg = X[:-1500]
train_y_reg = train_y[:-1500]

val_X = X[-1500:]
val_y = train_y[-1500:]

learning_rate = 1e-10
lmd_1 = [1e-5,1e-4,1e-3,1e-2,1e-1,1,10,100,1000,10000,100000]

W = W_
print(W)

loss_valid = []

for i,lmd in enumerate(lmd_1):
    W = W_
    loss = [] 
    run = 0
    while True:
        run += 1
        pred_y = np.matmul(X,W)
        error = pred_y - train_y
        loss_val = 1/2*np.matmul(error.T,error) + lmd/2*np.sum(abs(W))
        loss.append(loss_val)
        grad = np.matmul(error.T,X) + lmd/2*np.sign(W)
        W = W - learning_rate*grad
        if(run==1):
            continue
        
        if(loss[run-2]-loss[run-1] < 0.01): 
            break
    
    print('\nintercept : ',W[0])
    print('weights : ',W[1:])
    print("lamda : {} , runs : {} , loss on training : {}".format(lmd,run,loss_val))

    pred_y = np.matmul(val_X,W)
    error = pred_y - val_y
    loss_valid.append(1/2*np.matmul(error.T,error))
    print('loss on val set : ',loss_valid[i])
    print("~~~~~~~~~~~~~~~~~~~")
    