In [1]:
import numpy as np
import pandas as pd
import csv
import matplotlib.pyplot as plt
import os
import matplotlib.colors as mcol
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from tqdm.notebook import tqdm_notebook as pbar
from sklearn import linear_model
from tqdm.notebook import tqdm_notebook as pbar

In [2]:
# io train
X = []
Y = []
with open("synthetic_dataset/train.txt") as f:
    reader = csv.reader(f)
    for row in reader:
        X.append([float(row[0]), float(row[1])])
        Y.append([int(row[2])-1])
                
# PredictorScaler=StandardScaler()
# X = PredictorScaler.fit_transform(X)
X = np.array(X, dtype=np.float64)
Y = np.array(Y, dtype=np.int16)
df_X = pd.DataFrame(X)

In [3]:
# io dev
X_dev = []
Y_dev = []
with open("synthetic_dataset/dev.txt") as f:
    reader = csv.reader(f)
    for row in reader:
        X_dev.append([float(row[0]), float(row[1])])
        Y_dev.append([int(row[2])-1])
        
# X_dev = PredictorScaler.transform(X_dev)
X_dev = np.array(X_dev, dtype=np.float64)
Y_dev = np.array(Y_dev, dtype=np.int16)

In [4]:
#creating instance of one-hot-encoder
encoder = OneHotEncoder()

# create df_Y
df_Y = pd.DataFrame(np.array(Y,dtype=int))

#perform one-hot encoding on  Y
encoder_df = pd.DataFrame(encoder.fit_transform(Y.reshape(-1,1)).toarray())
Yhat = encoder_df.to_numpy()

In [5]:
# prepend 1 to x to get z
Z = []
for x in X:
    z = [x1 for x1 in x]
    z.insert(0,1)
    Z.append(z)
Z = np.array(Z)
Z

array([[  1.    , -13.826 ,   4.799 ],
       [  1.    , -12.301 ,  -1.3551],
       [  1.    , -13.968 ,   4.3138],
       ...,
       [  1.    ,  -9.4373,   0.1722],
       [  1.    ,  -5.1853,   1.0373],
       [  1.    ,  -8.9136,  -4.6225]])

In [6]:
from scipy.special import expit
def dot_sigmoid(x:np.ndarray,y:np.ndarray):
    try:
        z = np.dot(x,y)
    except:
        print(x)
        print(y)
        x = np.array(x).ravel()
        z = np.dot(x,y)
    return expit(z)

In [7]:
def neg_log_likelihood(y: float, y_pred: float) -> float:
    return -((y * np.log(y_pred)))

def error(ys: np.ndarray, ys_pred: np.ndarray) -> float:
    assert len(ys) == len(ys_pred)
    num_items: int = len(ys)
    sum_nll: float = np.sum([neg_log_likelihood(y, y_pred) for y, y_pred in zip(ys, ys_pred)])
    return (1 / num_items) * sum_nll

In [8]:
beta = np.zeros(Z.shape[1])

print(f'Starting with "beta": {beta}')

epochs: int = 10000
learning_rate: float = 0.0001

for epoch in pbar(range(epochs)):
    # Calculate the "predictions" (squishified dot product of `beta` and `x`) based on our current `beta` vector
    ys_pred = np.array([dot_sigmoid(beta, x) for x in Z])
    # Calculate and print the error
    if epoch % 10 == True:
        loss: float = error(Y, ys_pred)
        print(f'Epoch {epoch} --> loss: {loss}')
        if(loss < 0.08):
            break
        

    # Calculate the gradient
    grad = [0. for _ in range(len(beta))]
    for x, y in zip(Z, Y):
        err: float = dot_sigmoid(beta, x) - y
        for i, x_i in enumerate(x):
            grad[i] += (err * x_i)
    grad = [1 / len(x) * g_i for g_i in grad]

    # Take a small step in the direction of greatest decrease
    beta = np.array([b + (gb * -learning_rate) for b, gb in zip(beta, grad)]).ravel()
    # print(f'Epoch {epoch} beta: {beta}')

print(f'Best estimate for "beta": {beta}')


Starting with "beta": [0. 0. 0.]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch 1 --> loss: 0.5653701772222722
Epoch 11 --> loss: 0.4207354355277279
Epoch 21 --> loss: 0.40824765119948514
Epoch 31 --> loss: 0.3978993301593847
Epoch 41 --> loss: 0.38807847924203126
Epoch 51 --> loss: 0.3787274182859961
Epoch 61 --> loss: 0.3698202961560433
Epoch 71 --> loss: 0.36133405368825416
Epoch 81 --> loss: 0.35324674815972495
Epoch 91 --> loss: 0.3455373778794243
Epoch 101 --> loss: 0.3381858665318782
Epoch 111 --> loss: 0.3311730646406067
Epoch 121 --> loss: 0.3244807463010878
Epoch 131 --> loss: 0.3180915983922856
Epoch 141 --> loss: 0.3119892029724705
Epoch 151 --> loss: 0.3061580141294807
Epoch 161 --> loss: 0.3005833305313967
Epoch 171 --> loss: 0.2952512647836322
Epoch 181 --> loss: 0.29014871054308466
Epoch 191 --> loss: 0.28526330819201257
Epoch 201 --> loss: 0.2805834097395757
Epoch 211 --> loss: 0.2760980434988459
Epoch 221 --> loss: 0.27179687898165683
Epoch 231 --> loss: 0.2676701923622161
Epoch 241 --> loss: 0.263708832782085
Epoch 251 --> loss: 0.25990418

In [9]:
acc = 0
for i in range(len(X_dev)):
    zprime = [x for x in X_dev[i]]
    zprime.insert(0,1)
    pred_val = dot_sigmoid(beta, zprime)
    if(pred_val >= 0.5):
        pred = 1
    else:
        pred = 0
    if(pred == int(Y_dev[i])):
        acc += 1
print(acc*100/len(X_dev), " %")

91.0  %


In [11]:
def hypothesis(theta, X):
    return 1 / (1 + np.exp(-(np.dot(theta, X.T)))) - 0.0000001

In [12]:
def cost(X, y, theta):
    y1 = hypothesis(X, theta)
    return -(1/len(X)) * np.sum(y*np.log(y1) + (1-y)*np.log(1-y1))

In [23]:
def gradient_descent(X, y, theta, alpha, epochs):
    m = len(X)
    for i in range(0, epochs):
        for j in range(0, 2):
            theta = pd.DataFrame(theta)
            h = hypothesis(theta.iloc[:,j], X)
            for k in range(0, theta.shape[0]):
                theta.iloc[k, j] -= (alpha/m) * np.sum((h-y.iloc[:, j])*X.iloc[:, k])
            theta = pd.DataFrame(theta)
    return theta, cost

In [24]:
theta = np.zeros([df_X.shape[0], encoder_df.shape[1]])
print(theta.shape)
theta = gradient_descent(df_X, encoder_df, theta, 0.02, 1500)

(2500, 2)


IndexError: single positional indexer is out-of-bounds

In [35]:
LRG = linear_model.LogisticRegression(
   random_state = 100,solver = 'liblinear',multi_class = 'auto'
).fit(X, Y.ravel())
LRG.score(X, Y.ravel())

0.8976

In [36]:
LRG.score(X_dev, Y_dev.ravel())

0.913