## Reading train.csv

In [1]:
import csv as csv
import numpy as np

In [5]:
csv_file_object = csv.reader(open('train.csv', 'rt'))
header = next(csv_file_object)
data = []
for row in csv_file_object:
    data.append(row)
data = np.array(data)

In [10]:
print(header)

['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked']


In [8]:
print(data[0])

['1' '0' '3' 'Braund, Mr. Owen Harris' 'male' '22' '1' '0' 'A/5 21171'
 '7.25' '' 'S']


In [9]:
print(data[0,3])

Braund, Mr. Owen Harris


## Playing with data

Now if you want to call a specific column of data, say, the gender column, I can just type data[0::,4], remembering that "0::" means all (from start to end), and Python starts indices from 0 (not 1). You should be aware that the csv reader works by default with strings, so you will need to convert to floats in order to do numerical calculations. For example, you can turn the Pclass variable into floats by using data[0::,2].astype(np.float). Using this, we can calculate the proportion of survivors on the Titanic: 

In [11]:
number_passengers = np.size(data[0::,1].astype(np.float))
number_survived = np.sum(data[0::,1].astype(np.float))
proportion_survivors = number_survived / number_passengers

In [12]:
women_only_stats = data[0::,4] == 'female'
men_only_stats = data[0::,4] != 'female'

In [14]:
women_only_stats

array([False,  True,  True,  True, False, False, False, False,  True,
        True,  True,  True, False, False,  True,  True, False, False,
        True,  True, False, False,  True, False,  True,  True, False,
       False,  True, False, False,  True,  True, False, False, False,
       False, False,  True,  True,  True,  True, False,  True,  True,
       False, False,  True, False,  True, False, False,  True,  True,
       False, False,  True, False,  True, False, False,  True, False,
       False, False, False,  True, False,  True, False, False,  True,
       False, False, False, False, False, False, False,  True, False,
       False,  True, False,  True,  True, False, False,  True, False,
       False, False, False, False, False, False, False, False,  True,
       False,  True, False, False, False, False, False,  True, False,
       False,  True, False,  True, False,  True,  True, False, False,
       False, False,  True, False, False, False,  True, False, False,
       False, False,

In [15]:
women_onboard = data[women_only_stats,1].astype(np.float)
men_onbaord = data[men_only_stats,1].astype(np.float)

In [16]:
women_onboard

array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  0.,  1.,  0.,  1.,  1.,  0.,
        1.,  1.,  1.,  1.,  0.,  1.,  0.,  0.,  1.,  1.,  1.,  0.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  0.,  1.,  1.,  1.,  1.,  1.,  1.,
        0.,  1.,  1.,  0.,  0.,  0.,  0.,  1.,  1.,  0.,  1.,  1.,  0.,
        1.,  1.,  0.,  1.,  1.,  1.,  1.,  0.,  1.,  0.,  0.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  0.,  0.,  1.,  1.,  1.,  1.,  1.,  0.,
        1.,  1.,  0.,  1.,  0.,  1.,  0.,  1.,  0.,  0.,  1.,  1.,  1.,
        1.,  1.,  0.,  1.,  1.,  1.,  1.,  1.,  0.,  1.,  1.,  1.,  1.,
        0.,  0.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  0.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  0.,  1.,  1.,  0.,  1.,  1.,  1.,  1.,  0.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  0.,  1.,  0.,  0.,  0.,
        1.,  0.,  1.,  1.,  0.,  0.,  1.,  1.,  1.,  1.,  1.,  0.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  0.,  1.,  1

In [17]:
proportion_women_survived = np.sum(women_onboard) / np.size(women_onboard)
proportion_men_survived = np.sum(men_onbaord) / np.size(men_onbaord)

In [18]:
print('Proportion of female survivors: %s' % proportion_women_survived)
print('Proportion of male survivors: %s' % proportion_men_survived)

Proportion of female survivors: 0.742038216561
Proportion of male survivors: 0.188908145581


## Reading test data & writing gender model

In [20]:
test_file = open('test.csv','rt')
test_file_object = csv.reader(test_file)
header = next(test_file_object)

In [21]:
prediction_file = open('genderbasedmodel.csv', 'wt')
prediction_file_object = csv.writer(prediction_file)

In [22]:
prediction_file_object.writerow(['PassengerId','Survived'])
for row in test_file_object:
    if row[3] == 'female':
        prediction_file_object.writerow([row[0],'1'])
    else:
        prediction_file_object.writerow([row[0],'0'])
test_file.close()
prediction_file.close()

## Pythonising the second submission