In [167]:
# This model is written based on the DeeplearningAI courses by Andrew Ng

In [168]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [169]:
train_set = pd.read_csv('preprocessed_data.csv')
train_set

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Embarked
0,0,3,1,22.0,1,0,0
1,1,1,0,38.0,1,0,1
2,1,3,0,26.0,0,0,0
3,1,1,0,35.0,1,0,0
4,0,3,1,35.0,0,0,0
5,0,3,1,0.0,0,0,2
6,0,1,1,54.0,0,0,0
7,0,3,1,2.0,3,1,0
8,1,3,0,27.0,0,2,0
9,1,2,0,14.0,1,0,1


In [170]:
# define the sigmoid function
def sigmoid(z):
    s = 1 / (1 + np.exp(-z))
    return s

In [171]:
# initialize the parameters with zero
def initialize_paras(dim):

    w = np.zeros((dim, 1))
    b = 0

    assert(w.shape == (dim, 1))
    assert(isinstance(b, float) or isinstance(b, int))
    
    return w, b

In [172]:
dim = 5
w, b = initialize_paras(dim)
print ("w = " + str(w))
print ("b = " + str(b))

w = [[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
b = 0


In [173]:
# Forward and backward propagation
def propagate(w, b, X, Y):
    """
    Implement the cost function and its gradient for the propagation explained above

    Arguments:
    w -- weights, a numpy array of size 6
    b -- bias, a scalar
    X -- data of size (6, number of examples)
    Y -- true "label" vector 

    """
    
    m = X.shape[1]
    
    # FORWARD PROPAGATION (FROM X TO COST)
    A = sigmoid(np.dot(w.T, X) + b)            # compute activation
    cost = -1 / m * np.sum(Y * np.log(A) + (1 - Y) * np.log(1 - A))         # compute cost
    
    # BACKWARD PROPAGATION (TO FIND GRAD)
    
    dw = 1 / m * np.dot(X, (A - Y).T)
    db = 1 / m * np.sum(A - Y)

    assert(dw.shape == w.shape)
    assert(db.dtype == float)
    cost = np.squeeze(cost)
    assert(cost.shape == ())
    
    grads = {"dw": dw,
             "db": db}
    
    return grads, cost

In [174]:
# Optimize the parameters with the propagation
def optimize(w, b, X, Y, num_iterations, learning_rate, print_cost = False):
    
    costs = []
    
    for i in range(num_iterations):
        
        # Cost and gradient calculation
        grads, cost = propagate(w, b, X, Y)
        
        # Retrieve derivatives from grads
        dw = grads["dw"]
        db = grads["db"]
        
        # update rule 
        w = w - learning_rate * dw
        b = b - learning_rate * db
        
        # Record the costs
        if i % 100 == 0:
            costs.append(cost)
        
        # Print the cost every 100 training examples
        if print_cost and i % 100 == 0:
            print ("Cost after iteration %i: %f" %(i, cost))
    
    params = {"w": w,
              "b": b}
    
    grads = {"dw": dw,
             "db": db}
    
    return params, grads, costs

In [175]:
def predict(w, b, X):
    
    m = X.shape[1]
    Y_prediction = np.zeros((1,m))
    w = w.reshape(X.shape[0], 1)
      
    A = sigmoid(np.dot(w.T, X) + b)

    for i in range(A.shape[1]):
        
        if A[0, i] <= 0.5:
            Y_prediction[0, i] = 0
        else:
            Y_prediction[0, i] = 1
    
    assert(Y_prediction.shape == (1, m))
    
    return Y_prediction

In [176]:
#  Builds the logistic regression model by calling the function implemented above
def train_model(X_train, Y_train, num_iterations = 2000, learning_rate = 0.5, print_cost = False):

    # initialize parameters with zeros 
    w, b = initialize_paras(X_train.shape[0])

    # Gradient descent 
    parameters, grads, costs = optimize(w, b, X_train, Y_train, num_iterations, learning_rate, print_cost)
    
    # Retrieve parameters w and b from dictionary "parameters"
    w = parameters["w"]
    b = parameters["b"]
    
    # Predict test/train set examples 
    Y_prediction_train = predict(w, b, X_train)

    # Print train/test Errors
    print("train accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_train - Y_train)) * 100))

    d = {"costs": costs,
         "Y_prediction_train" : Y_prediction_train, 
         "w" : w, 
         "b" : b,
         "learning_rate" : learning_rate,
         "num_iterations": num_iterations}
    
    return d

In [177]:
X_train = train_set[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Embarked']]
X_train = np.array(X_train)

In [178]:
Y_train = train_set[['Survived']]
Y_train = np.array(Y_train)

In [179]:
# Feature Scaling
from sklearn.preprocessing import MinMaxScaler
mms_X = MinMaxScaler()
X_train = mms_X.fit_transform(X_train)

In [180]:
# Check the dims of X_train and Y_train
X_trans = X_train.T
Y_trans = Y_train.T
print(X_trans.shape)
print(Y_trans.shape)

(6, 891)
(1, 891)


In [181]:
Y_trans

array([[0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1,
        1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1,
        1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1,
        1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0,
        1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0,
        0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0,
        0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0,
        1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0,
        1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1,
        0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0,
        0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0,
        1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 

In [183]:
d = train_model(X_trans, Y_trans, num_iterations = 10000, learning_rate = 0.5, print_cost = True)

Cost after iteration 0: 0.693147
Cost after iteration 100: 0.478502
Cost after iteration 200: 0.464866
Cost after iteration 300: 0.460510
Cost after iteration 400: 0.458172
Cost after iteration 500: 0.456656
Cost after iteration 600: 0.455608
Cost after iteration 700: 0.454862
Cost after iteration 800: 0.454321
Cost after iteration 900: 0.453922
Cost after iteration 1000: 0.453625
Cost after iteration 1100: 0.453400
Cost after iteration 1200: 0.453229
Cost after iteration 1300: 0.453096
Cost after iteration 1400: 0.452992
Cost after iteration 1500: 0.452909
Cost after iteration 1600: 0.452844
Cost after iteration 1700: 0.452791
Cost after iteration 1800: 0.452749
Cost after iteration 1900: 0.452714
Cost after iteration 2000: 0.452686
Cost after iteration 2100: 0.452662
Cost after iteration 2200: 0.452643
Cost after iteration 2300: 0.452627
Cost after iteration 2400: 0.452613
Cost after iteration 2500: 0.452602
Cost after iteration 2600: 0.452592
Cost after iteration 2700: 0.452584
Cost

In [166]:
test_set = pd.read_csv('preprocessed_test_data.csv')
test_set.shape

(418, 6)

In [186]:
X_test = mms_X.fit_transform(test_set)
X_test = X_test.T
X_test.shape

  return self.partial_fit(X, y)


(6, 418)

In [187]:
w = d["w"]
b = d["b"]
    
# Predict test set 
Y_prediction= predict(w, b, X_test)

In [188]:
Y_prediction

array([[0., 0., 0., 0., 1., 0., 1., 0., 1., 0., 0., 0., 1., 0., 1., 1.,
        0., 0., 1., 1., 0., 0., 1., 1., 1., 0., 1., 0., 0., 0., 0., 0.,
        0., 1., 0., 0., 1., 1., 0., 0., 0., 1., 0., 1., 1., 0., 0., 0.,
        1., 1., 0., 0., 1., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1.,
        0., 1., 1., 0., 0., 1., 1., 0., 1., 0., 1., 0., 0., 1., 0., 1.,
        0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 1., 0., 1., 0., 1., 0.,
        1., 0., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1.,
        1., 1., 1., 0., 0., 1., 0., 1., 1., 0., 1., 0., 0., 1., 0., 1.,
        0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0.,
        0., 0., 1., 0., 1., 0., 1., 0., 0., 1., 0., 0., 1., 1., 0., 1.,
        1., 0., 1., 0., 0., 1., 0., 0., 1., 1., 0., 0., 0., 0., 0., 1.,
        1., 0., 1., 1., 0., 0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 1.,
        0., 0., 0., 0., 1., 1., 0., 1., 1., 0., 0., 1., 0., 1., 1., 0.,
        1., 0., 0., 0., 0., 1., 0., 0., 1., 0., 1., 0., 1., 0., 

In [189]:
user_id = pd.read_csv('test.csv')
user_id = user_id[['PassengerId']]
user_id

Unnamed: 0,PassengerId
0,892
1,893
2,894
3,895
4,896
5,897
6,898
7,899
8,900
9,901


In [191]:
user_id['Survived'] = Y_prediction.T

In [194]:
user_id[['Survived']] = user_id[['Survived']].astype('int')
user_id

Unnamed: 0,PassengerId,Survived
0,892,0
1,893,0
2,894,0
3,895,0
4,896,1
5,897,0
6,898,1
7,899,0
8,900,1
9,901,0


In [195]:
user_id.to_csv('LR_hand.csv', index = False)