In [18]:
import numpy as np
import pandas as pd
import math
pd.options.mode.chained_assignment = None  # default='warn'

In [19]:
def Dataprocess(location):
    df = pd.read_csv(location)
    
    df.drop_duplicates(inplace=True)    # inplace is for overwriting
    
    df = df.drop(['PassengerId','Name','Ticket','Cabin'],axis=1)
    
    df.Sex[df.Sex == 'male'] = 1
    df.Sex[df.Sex == 'female'] = 0

    df.Embarked[df.Embarked == 'S'] = 1
    df.Embarked[df.Embarked == 'C'] = 0
    df.Embarked[df.Embarked == 'Q'] = 2
    
    age = df['Age']
    df['Age'] = (age - age.mean())/age.std()
    df['Age'].fillna(df['Age'].mean())

    fare = df['Fare']
    df['Fare'] = (fare - fare.mean())/fare.std()
    df['Fare'].fillna(df['Fare'].mean())

    df.ffill(inplace = True) 
    return df

In [20]:
# split into train test sets
train_loc = r"C:\Users\varun\Coding\ML\titanic\train.csv"
df_train = Dataprocess(train_loc)
X_train = np.array(df_train.drop(['Survived'],axis = 1))
y_train = np.array(df_train['Survived'])

test_loc = r"C:\Users\varun\Coding\ML\titanic\test.csv"
df_test = Dataprocess(test_loc)
X_test = np.array(df_test)


In [21]:
y_train = y_train.reshape(y_train.shape[0],1)
print('X_train: ' ,(X_train.shape))
print('Y_train: ' ,(y_train.shape))
print('X_test:  ' ,(X_test.shape))

print(X_train)

X_train:  (891, 7)
Y_train:  (891, 1)
X_test:   (418, 7)
[[ 3.          1.         -0.5300051  ...  0.         -0.50216314
   1.        ]
 [ 1.          0.          0.57143041 ...  0.          0.78640362
   0.        ]
 [ 3.          0.         -0.25464622 ...  0.         -0.48857985
   1.        ]
 ...
 [ 3.          0.         -0.73652426 ...  2.         -0.1761643
   1.        ]
 [ 1.          1.         -0.25464622 ...  0.         -0.04435613
   0.        ]
 [ 3.          1.          0.1583921  ...  0.         -0.49210144
   2.        ]]


In [22]:
def sigmoid(Z) :
    Z = np.asarray(Z, dtype = np.float64)
    return(1/(1+np.exp(-Z)))

In [23]:
def reLU(Z) :
    return np.maximum(0,Z)

In [24]:
def initialize_parameters(layers):  
    parameters = {}

    for i in range(len(layers)-1) :
        W = np.random.randn(layers[i],layers[i+1]) * 0.01
        b = np.zeros((1,layers[i+1]))

        parameters["W"+str(i+1)] = W
        parameters["b"+str(i+1)] = b
    
    return parameters

In [25]:
def activation_derivatives(layers,parameters,cache) :
    activation_diff = {}
    for i in range(len(layers) - 1) :
        a = np.where(activation_diff["A"+str(len(layers))] > 0)

In [26]:
import numpy as np
a = np.array([1,2,3,4,5,6,7,8,9,9,8,7,6,5,4,3,2,1])
b = np.where(a>5)
a[b[0]]


array([6, 7, 8, 9, 9, 8, 7, 6])

In [27]:
def cost_function(y,A_final):
    cost =  -np.mean(np.multiply(y,np.log(A_final)) + np.multiply((1-y),np.log(1-A_final)))
    cost = float(np.squeeze(cost))
    return cost

In [39]:
def forward_propagation(X, parameters, layers) :

    cache  = {}
    A = X
    for i in range(len(layers) - 1) :
        Z = np.dot(A,parameters["W"+str(i+1)]) + parameters["b"+str(i+1)]
        A = reLU(Z).reshape(A.shape[0],parameters["b"+str(i+1)].shape[1])
        
        cache["Z"+str(i+1)] = Z
        cache["A"+str(i+1)] = A
        print("Z,A shapes",Z.shape,A.shape)
    return cache

In [36]:
def backward_propagation(layers,cache,parameters,X,y,m,activation_diff):

    grads = {}
    
    dZ = np.multiply((y - cache[pos])/(cache[pos]*(1-cache[pos])),activation_diff[pos])

    dW = (1/m)*np.dot(cache[prev_pos].T,dZ)
    db = (1/m)*np.sum(dZ,axis=0)

    grads["W"+str(pos)] = dW
    grads["b"+str(pos)] = db

    for i in range(1,len(layers) - 1) :
        pos = "A"+str(len(layers)-i-1)
        prev_pos = "A"+str(len(layers)-i-2)

        dZ = np.multiply(dZ,activation_diff[pos])

        dW = (1/m)*np.dot(cache[prev_pos].T,dZ)
        db = (1/m)*np.sum(dZ,axis=0)

        grads["W"+str(pos)] = dW
        grads["b"+str(pos)] = db
    
    return grads

In [37]:
def update_parameters(layers, parameters, grads, learning_rate):

    for i in range(len(layers)):
        W = parameters["W"+str(i+1)] -learning_rate*grads["W"+str(i+1)]
        b = parameters["b"+str(i+1)] -learning_rate*grads["b"+str(i+1)]

        parameters["W"+str(i+1)] = W
        parameters["b"+str(i+1)] = b
    
    return parameters

In [42]:
iterations = 1
learning_rate = 0.03
m = X_train.shape[0]
layers = [7,4,1]

parameters = initialize_parameters(layers)
for i in range(len(layers) - 1):
    print("W",i," shape: ",parameters["W"+str(i)].shape)
    print("b",i," shape: ",parameters["b"+str(i)].shape) 
    
for i in range(0, iterations) :
    cache = forward_propagation(X_train,parameters,layers)

    activation_diff = activation_derivatives(layers,parameters,cache)
    grads = backward_propagation(layers,cache,parameters,X_train,y_train,m)
    
    parameters = update_parameters(layers, parameters, grads, learning_rate)
    if i%1000 == 0:
        print("Cost for",i,"iterations",cost_function(y_train, cache["A2"]))

print("Final cost after",i,"iterations",cost_function(y_train, cache["A2"]))

AttributeError: 'str' object has no attribute 'values'

In [None]:
result = forward_propagation(X_train,parameters)
final = result["A2"]
final[final > 0.5] = 1
final[final < 0.5] = 0

print(np.mean(final == y_train)*100)

TypeError: forward_propagation() missing 1 required positional argument: 'layers'

In [None]:
result = forward_propagation(X_test,parameters)
final = result["A2"]
final[final > 0.55] = 1
final[final < 0.55] = 0

myvar = []
for i in range(final.shape[0]) :
    myvar.append([892+i,int(final[i][0])])

df = pd.DataFrame(myvar , columns = ["PassengerId","Survived"])
df.to_csv(r'C:\Users\varun\Coding\ML\titanic\gender_submission.csv', index=False)