In [2]:
import numpy as np
import pandas as pd

# Step 1 prepare datasets

## Titanic

In [3]:
Titanic=pd.read_csv("Titanic/tested.csv")

In [4]:
Titanic=Titanic.drop(columns=["PassengerId","Ticket","Name"])

In [5]:
Titanic.loc[pd.isna(Titanic.Cabin)==False,"Cabin"]=1

In [6]:
Titanic.loc[pd.isna(Titanic.Cabin)==True,"Cabin"]=0

In [7]:
Titanic.loc[:,"Sex"]=(Titanic.Sex=="male").astype(int)

In [8]:
Titanic.loc[pd.isna(Titanic.Age)==True,"Age"]=Titanic.Age.median()

In [9]:
Titanic=pd.get_dummies(Titanic,columns=["Embarked"])

## Diabetes

In [10]:
Diabetes=pd.read_csv("diabetes/dataset_37_diabetes.csv")

In [11]:
Diabetes.loc[:,"class"]=(Diabetes["class"]=="tested_positive").astype(int)

## Pokemon

In [12]:
Pokemon=pd.read_csv("Pokemon/pokemon.csv")

In [13]:
Pokemon=Pokemon.filter(regex='^(?!against).*')

In [14]:
Pokemon=Pokemon.drop(columns=["abilities","classfication","pokedex_number","japanese_name","name"])

In [15]:
Pokemon.loc[pd.isna(Pokemon.height_m),"height_m"]=0

In [16]:
Pokemon.loc[pd.isna(Pokemon.percentage_male),"percentage_male"]=0

In [17]:
Pokemon.loc[pd.isna(Pokemon.weight_kg),"weight_kg"]=0

In [18]:
Pokemon.capture_rate=pd.to_numeric(Pokemon.capture_rate,errors="coerce")

In [19]:
Pokemon.loc[pd.isna(Pokemon.capture_rate),"capture_rate"]=255

In [20]:
types1=pd.get_dummies(Pokemon["type1"],columns=["type1"])

In [21]:
types2=pd.get_dummies(Pokemon["type2"],columns=["type2"])

In [22]:
Pokemon=Pokemon.drop(columns=["type1","type2"])

In [23]:
Pokemon=pd.concat([Pokemon,types2+types1], axis=1)

## cancer

In [24]:
Cancer=pd.read_csv("cancer/wdbc.data",header=None)

In [25]:
Cancer.iloc[:,1]=(Cancer[1]=="M").astype(int)

In [26]:
Cancer=Cancer.drop(columns=[0])

## Bank

In [27]:
Bank=pd.read_csv("bank/bank.csv",sep=";")

In [28]:
Bank.loc[:,"default"]=(Bank.default=="yes").astype(int)

In [29]:
Bank.loc[:,"housing"]=(Bank.housing=="yes").astype(int)

In [30]:
Bank.loc[:,"loan"]=(Bank.loan=="yes").astype(int)

In [31]:
Bank.loc[:,"y"]=(Bank.y=="yes").astype(int)

In [32]:
Bank=pd.get_dummies(Bank)

In [33]:
import numpy.random as rand

In [34]:
Y1=rand.choice(2,10)

In [35]:
Y2=rand.choice(2,10)

In [36]:
def accuracy(Y,Y_hat):
    temp=Y==Y_hat
    return np.mean(temp)

In [37]:
def precision(Y,Y_hat):
    tp=np.logical_and(Y,Y_hat)
    fp=np.logical_and(np.logical_not(Y),Y_hat)
    return np.sum(tp)/(np.sum(tp)+np.sum(fp))

In [38]:
def recall(Y,Y_hat):
    tp=np.logical_and(Y,Y_hat)
    fn=np.logical_and(Y,np.logical_not(Y_hat))
    return np.sum(tp)/(np.sum(tp)+np.sum(fn))

In [39]:
def F_measure(Y,Y_hat):
    pre=precision(Y,Y_hat)
    re=recall(Y,Y_hat)
    return 2*pre*re/(pre+re)

# IWLS

In [40]:
Y=Pokemon["is_legendary"].to_numpy()

In [41]:
X=Pokemon.drop(columns=["is_legendary"]).to_numpy()

In [42]:
X

array([[4.900e+01, 5.120e+03, 7.000e+01, ..., 0.000e+00, 0.000e+00,
        0.000e+00],
       [6.200e+01, 5.120e+03, 7.000e+01, ..., 0.000e+00, 0.000e+00,
        0.000e+00],
       [1.000e+02, 5.120e+03, 7.000e+01, ..., 0.000e+00, 0.000e+00,
        0.000e+00],
       ...,
       [1.010e+02, 3.072e+04, 0.000e+00, ..., 0.000e+00, 0.000e+00,
        0.000e+00],
       [1.070e+02, 3.072e+04, 0.000e+00, ..., 0.000e+00, 0.000e+00,
        0.000e+00],
       [9.500e+01, 3.072e+04, 0.000e+00, ..., 0.000e+00, 1.000e+00,
        0.000e+00]])

In [43]:
Beta=np.zeros(X.shape[1])

In [44]:
W=np.ones(X.shape[0])*0.5

In [45]:
eps=10**-10

In [46]:
def prob(X,Beta):
    return np.exp(np.clip(X @ Beta,-100,100))/(1+np.exp(np.clip(X @ Beta,-100,100)))

In [87]:
Beta+=np.linalg.inv((X.T * W[:,None].T) @ X) @ X.T @ (Y-prob(X , Beta))

W=(1-prob(X , Beta)) * prob(X , Beta) + eps

In [88]:
Y_hat=prob(X , Beta)

In [89]:
prob(X , Beta)

array([3.72007598e-44, 3.72007598e-44, 3.72007598e-44, 3.72007598e-44,
       3.72007598e-44, 3.72007598e-44, 3.72007598e-44, 3.72007598e-44,
       3.72007598e-44, 3.72007598e-44, 3.72007598e-44, 3.72007598e-44,
       3.72007598e-44, 3.72007598e-44, 3.72007598e-44, 3.72007598e-44,
       3.72007598e-44, 3.72007598e-44, 3.72007598e-44, 3.72007598e-44,
       3.72007598e-44, 3.72007598e-44, 3.72007598e-44, 3.72007598e-44,
       3.72007598e-44, 3.72007598e-44, 3.72007598e-44, 3.72007598e-44,
       3.72007598e-44, 3.72007598e-44, 3.72007598e-44, 3.72007598e-44,
       3.72007598e-44, 3.72007598e-44, 3.72007598e-44, 1.00000000e+00,
       3.72007598e-44, 3.72007598e-44, 3.72007598e-44, 3.72007598e-44,
       3.72007598e-44, 3.72007598e-44, 3.72007598e-44, 3.72007598e-44,
       3.72007598e-44, 3.72007598e-44, 3.72007598e-44, 3.72007598e-44,
       3.72007598e-44, 3.72007598e-44, 3.72007598e-44, 3.72007598e-44,
       3.72007598e-44, 3.72007598e-44, 3.72007598e-44, 3.72007598e-44,
      

In [90]:
Pokemon[Y_hat>0.5]

Unnamed: 0,attack,base_egg_steps,base_happiness,base_total,capture_rate,defense,experience_growth,height_m,hp,percentage_male,...,ghost,grass,ground,ice,normal,poison,psychic,rock,steel,water
35,70,2560,140,483,25.0,73,800000,1.3,95,24.6,...,0,0,0,0,0,0,0,0,0,0
143,85,20480,35,580,3.0,100,1250000,1.7,90,0.0,...,0,0,0,1,0,0,0,0,0,0
144,90,20480,35,580,3.0,85,1250000,1.6,90,0.0,...,0,0,0,0,0,0,0,0,0,0
145,100,20480,35,580,3.0,90,1250000,2.0,90,0.0,...,0,0,0,0,0,0,0,0,0,0
149,150,30720,0,780,3.0,70,1250000,2.0,106,0.0,...,0,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,89,30720,0,570,30.0,71,1250000,3.8,83,0.0,...,0,0,0,0,0,0,0,0,0,0
796,101,30720,0,570,25.0,103,1250000,9.2,97,0.0,...,0,0,0,0,0,0,0,0,1,0
798,101,30720,0,570,15.0,53,1250000,5.5,223,0.0,...,0,0,0,0,0,0,0,0,0,0
799,107,30720,0,600,3.0,101,1250000,2.4,97,0.0,...,0,0,0,0,0,0,1,0,0,0


In [91]:
precision(Y,Y_hat>0.5)

0.9420289855072463

In [92]:
recall(Y,Y_hat>0.5)

0.9285714285714286

In [93]:
from sklearn.linear_model import LogisticRegression

In [94]:
reg = LogisticRegression(max_iter=400).fit(X, Y)

In [95]:
Y_hat=reg.predict(X)

In [96]:
precision(Y,Y_hat)

0.96875

In [97]:
recall(Y,Y_hat)

0.8857142857142857

## GD

In [98]:
def MSE(Y,Y_hat):
    return np.sum((Y-Y_hat)**2)

def MSE_grad(Y,Y_hat):
     return Y_hat-Y

def sigmoid(x):
    return(1/(1 + np.exp(np.clip(-x,-100,100))))

def sigmoid_grad(x):
    return(sigmoid(x)*(1-sigmoid(x)))

def cross_entropy(targets,predictions,  epsilon=1e-12):
    predictions = np.clip(predictions, epsilon, 1. - epsilon)
    ce = abs(np.sum(targets*np.log(predictions)))
    return ce

def cross_entropy_grad(Y,Y_hat,eps=10**-10):
    Y_hat=Y_hat+eps
    return (Y_hat-eps-Y)/Y_hat/(1-Y_hat)

In [99]:
X=(X-X.mean(0))/X.std(0)

In [100]:
Beta=np.random.sample(X.shape[1])

In [101]:
Beta=np.zeros(X.shape[1])

In [102]:
MAX_EPOCH=400
alpha=0.01

In [103]:
bias=0

In [104]:
for epoch in range(MAX_EPOCH):
    pred=sigmoid(X @ Beta)
    error=cross_entropy(Y,pred)
    print(error)
    Beta=Beta - alpha * (X*(cross_entropy_grad(Y,pred) * sigmoid_grad(X @ Beta+bias)).reshape(-1,1)).mean(axis=0)
    bias=bias - alpha * ((cross_entropy_grad(Y,pred) * sigmoid_grad(X @ Beta+bias)).reshape(-1,1)).mean(axis=0)

48.52030263919617
47.674729559811645
46.85147965989364
46.04994725074782
45.269536913880344
44.509664007003565
43.76975509677563
43.04924832236708
42.34759369406191
41.66425333115798
40.9987016434374
40.35042546044038
39.71892411270007
39.103709468989734
38.5043059335006
37.920250406716335
37.351092213580884
36.79639300237643
36.255726617540205
35.728678949456096
35.21484776406318
34.71384251492943
34.22528414024836
33.74880484702976
33.28404788457533
32.83066730915568
32.38832774163933
31.956704119665694
31.53548144580454
31.12435453300329
30.72302774849151
30.331214757188018
29.948638265541796
29.57502976663074
29.210129287244364
28.853685137586012
28.505453664146685
28.165199006226988
27.83269285651416
27.507714226058056
27.190049213933023
26.879490781820742
26.57583853370284
26.27889850081009
25.988482931937803
25.704410089203588
25.42650404929406
25.15459451022106
24.88851660358441
24.628110712318538
24.373222293882222
24.12370170883563
23.87940405473543
23.640189005267793
23.4059

In [105]:
Y_hat=prob(X , Beta)

In [106]:
prob(X , Beta)

array([0.32394882, 0.35887381, 0.45757952, 0.33487012, 0.37593215,
       0.50620352, 0.31195375, 0.34922007, 0.45394251, 0.31161419,
       0.31475921, 0.42731784, 0.30905873, 0.31182283, 0.44291298,
       0.32255787, 0.37054998, 0.48982425, 0.2730705 , 0.32192793,
       0.32313513, 0.4058342 , 0.36009341, 0.44388718, 0.35726251,
       0.44348541, 0.30173937, 0.35819048, 0.44543697, 0.49038495,
       0.54008981, 0.27365153, 0.31083833, 0.35784376, 0.29618095,
       0.36829017, 0.40758331, 0.49877927, 0.34343353, 0.41739794,
       0.33311843, 0.43160449, 0.3809047 , 0.4144086 , 0.45950926,
       0.36653442, 0.41657849, 0.37277674, 0.45128615, 0.32982878,
       0.38989551, 0.31630327, 0.37743372, 0.38220674, 0.47281639,
       0.39003862, 0.45693771, 0.39131115, 0.50766092, 0.38653942,
       0.41918031, 0.47849734, 0.41497818, 0.46351445, 0.55833858,
       0.33652309, 0.38568123, 0.43626304, 0.37826934, 0.41987101,
       0.46710295, 0.4251661 , 0.50769883, 0.2962714 , 0.33224

In [107]:
Pokemon[Y_hat>0.5]

Unnamed: 0,attack,base_egg_steps,base_happiness,base_total,capture_rate,defense,experience_growth,height_m,hp,percentage_male,...,ghost,grass,ground,ice,normal,poison,psychic,rock,steel,water
5,104,5120,70,634,45.0,78,1059860,1.7,78,88.1,...,0,0,0,0,0,0,0,0,0,0
30,92,5120,70,505,45.0,87,1059860,1.3,90,0.0,...,0,0,1,0,0,1,0,0,0,0
58,110,5120,70,555,75.0,80,1250000,1.9,90,75.4,...,0,0,0,0,0,0,0,0,0,0
64,50,5120,70,600,50.0,65,1059860,1.5,55,75.4,...,0,0,0,0,0,0,1,0,0,0
72,70,5120,70,515,60.0,65,1250000,1.6,80,50.0,...,0,0,0,0,0,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
796,101,30720,0,570,25.0,103,1250000,9.2,97,0.0,...,0,0,0,0,0,0,0,0,1,0
797,181,30720,0,570,255.0,131,1250000,0.3,59,0.0,...,0,1,0,0,0,0,0,0,1,0
798,101,30720,0,570,15.0,53,1250000,5.5,223,0.0,...,0,0,0,0,0,0,0,0,0,0
799,107,30720,0,600,3.0,101,1250000,2.4,97,0.0,...,0,0,0,0,0,0,1,0,0,0


In [108]:
precision(Y,Y_hat>0.5)

0.29045643153526973

In [109]:
recall(Y,Y_hat>0.5)

1.0