# Machine Learning on the Titanic Dataset

This notebook focuses on one of [Kaggle's](https://www.kaggle.com/c/titanic) "Getting Started" prediction competitions, the Titanic challenge.

In [None]:
# classifier models
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

# modules to handle data
import pandas as pd
import numpy as np

# visualization tools
import matplotlib.pyplot as plt
import seaborn as sns

# ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
# environment setup
sns.set()
plt.style.use('ggplot')
%matplotlib notebook

## I. Data Wrangling

Like in most cases with any data science project, we are likely to encounter dirty or missing data and will need to do some wrangling before we can really do anything else.

In [None]:
# load data
train = pd.read_csv('./Data/train.csv')
test = pd.read_csv('./Data/test.csv')

In [None]:
from pycaret.classification import *

In [None]:
d=setup(data=train,
       target='Survived',
       ignore_features=['PassengerId','Name'],
       bin_numeric_features=['Age','Fare']
       )

In [None]:
compare_models()

In [None]:
rf=create_model('rf')

In [None]:
tuned_rf=tune_model('rf',n_iter=40)

In [None]:
predict_model(tuned_rf)

In [None]:
bagged_rf = ensemble_model(rf,n_estimators=40)

In [None]:
boosted_rf = ensemble_model(rf, method = 'Boosting')

In [None]:
tuned_bagged_rf = tune_model('rf', ensemble=True, method='Bagging')

In [None]:
predict_model(final_rd)

In [None]:
unseen_predictions = predict_model(final_rd, data=test)
unseen_predictions.head()

In [None]:
# save PassengerId for final submission
passengerId = test.PassengerId

In [None]:
unseen_predictions.rename(columns={'Label':'Survived'}, inplace=True)

In [None]:
unseen_predictions

In [None]:
k=unseen_predictions.values

In [None]:
pred=k[:,-2]

In [None]:
# dataframe with predictions
kaggle = pd.DataFrame( {'PassengerId': passengerId, 'Survived': pred} )

In [None]:
kaggle

In [None]:
# save to csv
kaggle.to_csv('./Data/titanic_pycaret_catboost3.csv', index=False)

In [None]:
interpret_model(rd,plot='summary')