In [42]:
import pandas as pd
import numpy as np
from collections import Counter

In [43]:
train_data = pd.read_csv('train_processed.csv', dtype=str, usecols=range(1,10))
actual = pd.read_csv('gender_submission.csv', usecols=[1], dtype=str)
test_data = pd.read_csv('test_processed.csv', usecols=range(1,9), dtype=str)
train_X = train_data.drop(['Survived'], axis=1)
train_Y = train_data['Survived']
test_X = test_data
test_Y = actual

In [44]:
# one-hot encoding
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder()
enc_train_X = enc.fit_transform(train_X)
enc_test_X = enc.fit_transform(test_X)
from sklearn.preprocessing import MinMaxScaler
min_max_scaler = MinMaxScaler(feature_range=(-1.,1.))
enc_train_X = min_max_scaler.fit_transform(enc_train_X.toarray())
enc_test_X = min_max_scaler.fit_transform(enc_test_X.toarray())
enc_train_X = pd.DataFrame(enc_train_X)
enc_test_X = pd.DataFrame(enc_test_X)
print(enc_test_X.head())

    0    1    2    3    4    5    6    7    8    9  ...    16   17   18   19  \
0 -1.0 -1.0  1.0  1.0 -1.0 -1.0 -1.0  1.0 -1.0 -1.0 ...  -1.0 -1.0 -1.0  1.0   
1 -1.0 -1.0  1.0 -1.0  1.0 -1.0 -1.0  1.0 -1.0 -1.0 ...  -1.0 -1.0 -1.0 -1.0   
2 -1.0  1.0 -1.0  1.0 -1.0 -1.0 -1.0 -1.0  1.0 -1.0 ...  -1.0 -1.0 -1.0  1.0   
3 -1.0 -1.0  1.0  1.0 -1.0 -1.0  1.0 -1.0 -1.0 -1.0 ...  -1.0 -1.0 -1.0 -1.0   
4 -1.0 -1.0  1.0 -1.0  1.0 -1.0  1.0 -1.0 -1.0 -1.0 ...  -1.0 -1.0 -1.0 -1.0   

    20   21   22   23   24   25  
0 -1.0  1.0 -1.0 -1.0 -1.0 -1.0  
1  1.0 -1.0 -1.0  1.0 -1.0 -1.0  
2 -1.0  1.0 -1.0 -1.0 -1.0 -1.0  
3  1.0  1.0 -1.0 -1.0 -1.0 -1.0  
4  1.0 -1.0 -1.0  1.0 -1.0 -1.0  

[5 rows x 26 columns]


In [45]:
# make prediction for each row
def predict(row, weights, bias):
    activation = bias
    activation += sum(row*weights)
    return 1.0 if activation >= 0.0 else 0.0

In [56]:
def train(train_data, n_epoch, learning_rate):
#     weights = np.random.rand(train_data.shape[1]-1,1).flatten()
    weights = np.random.uniform(-1,1,train_data.shape[1]-1)
    bias = 0.5
    for epoch in range(n_epoch):
        error_sum = 0.0
        for row in train_data.values.astype('float'):
            pred = predict(row[:-1], weights, bias)
#           I wrote it as pred-real; which made it malfunctioning
            error = row[-1]-pred
            error_sum += error**2
            bias += learning_rate*error
            weights += learning_rate*error*row[:-1]
#         print('>epoch=%d, lrate=%.3f, error=%.3f' % (epoch, learning_rate, error_sum))
    return weights, bias

In [52]:
def perceptron(train_data, test_X, learning_rate, n_epoch):
    predictions = list()
    weights, bias = train(train_data, n_epoch, learning_rate)
    for row in test_X.values.astype('float'):
        prediction = predict(row, weights, bias)
        predictions.append(prediction)
    return predictions

In [53]:
def accuracy(actual, predicted):
    correct = 0
    for i in range(len(actual)):
        if actual[i] == predicted[i]:
            correct += 1
    return correct/float(len(actual))*100.0

In [54]:
def evaluate(train_data, test_X, test_Y, learning_rate, n_epoch):
    predicted = perceptron(train_data, test_X, learning_rate, n_epoch)
    acc = accuracy(test_Y, predicted)
    print(predicted)
    return acc

In [55]:
best_score =0
for i in range(1):
    train_data = pd.concat([enc_train_X, train_Y], axis=1)
    #     print(train_data.head())
    test_X = enc_test_X
    actual = test_Y.values.astype('float')
    learning_rate = 0.01
    n_epoch = 100
    score = evaluate(train_data, test_X, actual, learning_rate, n_epoch)
    if score > best_score:
        best_score = score
print('The accuracy for SGD is %f' % best_score)

[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0,

In [28]:
weights = np.random.uniform(-1,1,train_data.shape[1]-1)
print(weights)

[ 0.12672268 -0.44328229  0.71540003 -0.00742846 -0.44266876  0.1963046
 -0.65580191  0.10059205  0.89203522 -0.07477978  0.40331069 -0.42932525
 -0.81385873 -0.40922669  0.05293893 -0.1468926   0.8055561  -0.60680218
 -0.76531256 -0.13333172  0.98695769 -0.03569291 -0.36503539 -0.18148858
 -0.12194093  0.70323212]


In [91]:
np.random.uniform(-1,1,20)

array([-0.76918488, -0.68147978,  0.35146723, -0.34726379, -0.24723325,
       -0.32691635, -0.41369798,  0.06263327, -0.92821683, -0.31036156,
       -0.8073101 ,  0.66493366,  0.37104266,  0.68949669,  0.48165257,
        0.47312651,  0.33912385, -0.71928204,  0.02872441,  0.60868847])

In [5]:
np.array([1,2])*np.array([2,1])

array([2, 2])