In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from scipy.optimize import minimize

In [2]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [45]:
# Must put thetas as the first arg
def cost_function(thetas, X, y):
    m = X.shape[0]
    h = sigmoid(X.dot(thetas))
    J = -1*(1/m)*(np.log(h).T.dot(y)+np.log(1-h).T.dot(1-y))
# log(0) approached -Inf results in NaN
    if np.isnan(J[0]):
        return(np.inf)
    else:
        return(J[0])

In [4]:
labeled = pd.read_csv("https://storage.googleapis.com/kaggle_datasets/Titanic-Machine-Learning-from-Disaster/train.csv")

In [6]:
labeled = labeled[~labeled['Age'].isna()]

In [7]:
labeled

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.0750,,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C
10,11,1,3,"Sandstrom, Miss. Marguerite Rut",female,4.0,1,1,PP 9549,16.7000,G6,S


In [8]:
def gradient(thetas, X, y):
    m = y.size
    h = sigmoid(X.dot(thetas.reshape(-1,1)))
    grad =(1/m)*X.T.dot(h-y)
    return(grad.ravel())

In [9]:
train, validation = train_test_split(labeled, test_size=0.3, random_state=123)

In [24]:
X_train = train.loc[:, ["Age", "Fare"]].values
ones = np.ones(X_train.shape[0]).reshape(-1, 1)
X_train = np.concatenate([ones, X_train], axis=1)
y_train = train.loc[:, "Survived"].values.reshape(-1, 1)

In [30]:
initial_thetas = np.zeros(X_train.shape[1])

In [37]:
initial_thetas

array([0., 0., 0.])

In [44]:
cost_function(initial_thetas,X_train,y_train)

array([0.69314718])

In [46]:
res = minimize(cost_function, initial_thetas, args=(X_train, y_train), method=None, jac=gradient, options={'maxiter':400})

  """
  """


In [47]:
res

      fun: 0.6249923100906284
 hess_inv: array([[ 2.57707497e+01, -6.39438923e-01, -9.67852173e-02],
       [-6.39438923e-01,  2.32935569e-02, -1.31976410e-03],
       [-9.67852173e-02, -1.31976410e-03,  4.31442824e-03]])
      jac: array([5.79538929e-08, 1.65741052e-06, 3.06075254e-06])
  message: 'Optimization terminated successfully.'
     nfev: 23
      nit: 14
     njev: 18
   status: 0
  success: True
        x: array([-0.62176132, -0.01014703,  0.01692017])

In [50]:
theta = res.x.reshape(-1,1)

In [55]:
X_valid = validation.loc[:,['Age','Fare']].values

In [56]:
X_valid

array([[ 18.    ,  23.    ],
       [ 60.    ,  26.55  ],
       [ 31.    ,   8.6833],
       [ 15.    ,   7.225 ],
       [ 18.    , 108.9   ],
       [ 40.5   ,   7.75  ],
       [ 22.    ,   7.125 ],
       [ 27.    ,  13.    ],
       [ 58.    ,  29.7   ],
       [ 40.    ,  13.    ],
       [ 26.    ,   7.8542],
       [ 19.    ,   0.    ],
       [ 19.    ,   7.65  ],
       [ 42.    ,  26.    ],
       [ 51.    ,  77.9583],
       [ 31.    ,  10.5   ],
       [ 54.    ,  26.    ],
       [ 32.    ,  56.4958],
       [ 33.    ,  26.    ],
       [ 17.    ,   8.6625],
       [ 32.    ,  26.    ],
       [ 50.    ,  26.    ],
       [ 28.    ,  26.55  ],
       [  0.75  ,  19.2583],
       [ 14.    ,   7.8542],
       [ 35.    ,  21.    ],
       [ 23.    ,   7.8958],
       [  0.83  ,  18.75  ],
       [ 36.    ,  27.75  ],
       [ 20.    ,   8.05  ],
       [ 28.    ,  13.    ],
       [ 34.5   ,   6.4375],
       [ 22.    , 151.55  ],
       [ 20.5   ,   7.25  ],
       [  9.  

In [62]:
X_valid = np.concatenate([np.ones(X_valid.shape[0]).reshape(-1,1),X_valid],axis = 1)

In [69]:
sigmoid(X_valid.dot(theta))

array([[0.39765538],
       [0.31402844],
       [0.3122983 ],
       [0.34260237],
       [0.73850225],
       [0.28872495],
       [0.32641606],
       [0.33720629],
       [0.33009398],
       [0.30838503],
       [0.32023551],
       [0.30692073],
       [0.33512051],
       [0.35251278],
       [0.54483121],
       [0.31893781],
       [0.32524377],
       [0.50236303],
       [0.3736268 ],
       [0.34351031],
       [0.37600454],
       [0.33421364],
       [0.38778139],
       [0.42469773],
       [0.34730075],
       [0.34942132],
       [0.32705291],
       [0.42239967],
       [0.37343239],
       [0.33436805],
       [0.3349422 ],
       [0.29673167],
       [0.84803292],
       [0.33023902],
       [0.45415257],
       [0.50236051],
       [0.30712987],
       [0.33116012],
       [0.3132661 ],
       [0.37283162],
       [0.30967339],
       [0.29729251],
       [0.34594919],
       [0.61606249],
       [0.32151937],
       [0.33667858],
       [0.50990141],
       [0.449