In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

In [2]:
train = pd.read_csv('train_features.csv',index_col='PassengerId')
test = pd.read_csv('test_features.csv',index_col='PassengerId')
survive = pd.read_csv('Survive_Label.csv',index_col='PassengerId')

In [3]:
from sklearn.model_selection import GridSearchCV

### Decision Tree

In [4]:
from sklearn.tree import DecisionTreeClassifier

In [5]:
tree_dict = {'criterion':['gini','entropy']}
tree_gscv = GridSearchCV(DecisionTreeClassifier(),param_grid=tree_dict,cv=10,verbose=0,n_jobs=-1)
tree_model = tree_gscv.fit(train,survive)
print(tree_gscv.best_params_)
print(tree_gscv.best_score_)

{'criterion': 'gini'}
0.7946566791510612


### Random Forest

In [6]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(random_state=0,n_jobs=-1)

In [7]:
rf_dict = {'criterion':['gini','entropy']}
rf_gscv = GridSearchCV(rf,param_grid=rf_dict,cv=10,verbose=0,n_jobs=-1)
rf_model = rf_gscv.fit(train,survive)
print(rf_gscv.best_params_)
print(rf_gscv.best_score_)

  self.best_estimator_.fit(X, y, **fit_params)


{'criterion': 'gini'}
0.812621722846442


### KNN

In [8]:
from sklearn.neighbors import KNeighborsClassifier
KNN = KNeighborsClassifier()

In [9]:
knn_dict = {'n_neighbors':[1,2,3,4,5,6,7,8,9,10]}
knn_gscv = GridSearchCV(KNN,param_grid=knn_dict,cv=10,verbose=0,n_jobs=-1)
knn_model = knn_gscv.fit(train,survive)
print(knn_gscv.best_params_)
print(knn_gscv.best_score_)

{'n_neighbors': 6}
0.828314606741573


  self.best_estimator_.fit(X, y, **fit_params)


### Logistic Regression
Note:<br>
    C = <br>
      Smaller values specify stronger regularization.
      #Regularization -> adding an additional penalty term in the error function.  The additional term controls the excessively fluctuating function such that the coefficients don't take extreme values.

In [10]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr_dict = {'penalty':['l1','l2'],'C':np.logspace(0,4,10)}
lr_gscv = GridSearchCV(lr,param_grid=lr_dict,cv=10,verbose=0,n_jobs=-1)
lr_model = lr_gscv.fit(train,survive)
print(lr_gscv.best_params_)
print(lr_gscv.best_score_)

{'C': 1.0, 'penalty': 'l2'}
0.8069912609238452


  return f(**kwargs)


### SVC 

In [11]:
from sklearn.svm import SVC
svc = SVC()

In [12]:
svc_dict = {'C':np.logspace(0,4,10)}
svc_gscv = GridSearchCV(svc,param_grid=svc_dict,cv=10,verbose=0,n_jobs=-1)
svc_model = svc_gscv.fit(train,survive)
print(svc_gscv.best_params_)
print(svc_gscv.best_score_)

{'C': 1.0}
0.8294132334581773


  return f(**kwargs)


### NaiveBayes

In [13]:
from sklearn.naive_bayes import BernoulliNB
nb = BernoulliNB()

In [14]:
nb_dict = {'fit_prior':[[0.1,0.9],[0.2,0.8],[0.3,0.7],[0.4,0.6],[0.5,0.5],
                       [0.6,0.4],[0.7,0.3],[0.8,0.2],[0.9,0.1]]}
nb_gscv = GridSearchCV(nb,param_grid=nb_dict,cv=10,verbose=0,n_jobs=-1)
nb_model = nb_gscv.fit(train,survive)
print(nb_gscv.best_params_)
print(nb_gscv.best_score_)

{'fit_prior': [0.1, 0.9]}
0.7800374531835207


  return f(**kwargs)


### Output
SVC has highest CV score; therefore, submit SVC

In [15]:
svc_predict = svc_model.predict(test)
svc_predict

array([0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1,
       1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1,
       1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1,
       1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1,
       0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1,
       0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1,
       1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1,
       0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0,
       1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
       0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0,
       0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0,

In [16]:
submit_svc = pd.DataFrame({'PassengerId':test.index,'Survived':svc_predict})
submit_svc.index = submit_svc['PassengerId']
submit_svc.drop('PassengerId',axis=1,inplace=True)
submit_svc

Unnamed: 0_level_0,Survived
PassengerId,Unnamed: 1_level_1
892,0
893,1
894,0
895,0
896,1
...,...
1305,0
1306,1
1307,0
1308,0


In [17]:
submit_svc.to_csv('svc_as_final.csv')