Just following PyCaret CLF101 Tutorial

Link: https://www.pycaret.org/tutorials/html/CLF101.html

# Library

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pycaret as pyc
from sklearn.model_selection import train_test_split
import statsmodels.api as sm

%matplotlib inline
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
pd.options.display.float_format = "{:,.2f}".format

# Data

In [2]:
train = pd.read_csv('train.csv')

In [3]:
train.head()

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name,Transported
0,0001_01,Europa,False,B/0/P,TRAPPIST-1e,39.0,False,0.0,0.0,0.0,0.0,0.0,Maham Ofracculy,False
1,0002_01,Earth,False,F/0/S,TRAPPIST-1e,24.0,False,109.0,9.0,25.0,549.0,44.0,Juanna Vines,True
2,0003_01,Europa,False,A/0/S,TRAPPIST-1e,58.0,True,43.0,3576.0,0.0,6715.0,49.0,Altark Susent,False
3,0003_02,Europa,False,A/0/S,TRAPPIST-1e,33.0,False,0.0,1283.0,371.0,3329.0,193.0,Solam Susent,False
4,0004_01,Earth,False,F/1/S,TRAPPIST-1e,16.0,False,303.0,70.0,151.0,565.0,2.0,Willy Santantines,True


In [4]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8693 entries, 0 to 8692
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   PassengerId   8693 non-null   object 
 1   HomePlanet    8492 non-null   object 
 2   CryoSleep     8476 non-null   object 
 3   Cabin         8494 non-null   object 
 4   Destination   8511 non-null   object 
 5   Age           8514 non-null   float64
 6   VIP           8490 non-null   object 
 7   RoomService   8512 non-null   float64
 8   FoodCourt     8510 non-null   float64
 9   ShoppingMall  8485 non-null   float64
 10  Spa           8510 non-null   float64
 11  VRDeck        8505 non-null   float64
 12  Name          8493 non-null   object 
 13  Transported   8693 non-null   bool   
dtypes: bool(1), float64(6), object(7)
memory usage: 891.5+ KB


## Missing/Error Data (_NA_) 

In [5]:
train.isna().sum()

PassengerId       0
HomePlanet      201
CryoSleep       217
Cabin           199
Destination     182
Age             179
VIP             203
RoomService     181
FoodCourt       183
ShoppingMall    208
Spa             183
VRDeck          188
Name            200
Transported       0
dtype: int64

In [6]:
train._get_numeric_data().fillna(0, inplace=True)
train.isna().sum()

PassengerId       0
HomePlanet      201
CryoSleep       217
Cabin           199
Destination     182
Age               0
VIP             203
RoomService       0
FoodCourt         0
ShoppingMall      0
Spa               0
VRDeck            0
Name            200
Transported       0
dtype: int64

### VIP

In [7]:
train.VIP.fillna(False, inplace=True)
train.isna().sum()

PassengerId       0
HomePlanet      201
CryoSleep       217
Cabin           199
Destination     182
Age               0
VIP               0
RoomService       0
FoodCourt         0
ShoppingMall      0
Spa               0
VRDeck            0
Name            200
Transported       0
dtype: int64

### HomePlanet

In [8]:
pd.crosstab(index=train.HomePlanet, columns="ID")

col_0,ID
HomePlanet,Unnamed: 1_level_1
Earth,4602
Europa,2131
Mars,1759


In [9]:
train.HomePlanet.fillna('Earth',inplace=True)
pd.crosstab(index=train.HomePlanet, columns="ID")

col_0,ID
HomePlanet,Unnamed: 1_level_1
Earth,4803
Europa,2131
Mars,1759


In [10]:
train.HomePlanet = np.where(train.HomePlanet=="Europa","Earth",train.HomePlanet)
pd.crosstab(index=train.HomePlanet, columns="ID")

col_0,ID
HomePlanet,Unnamed: 1_level_1
Earth,6934
Mars,1759


In [11]:
train = pd.get_dummies(train, columns=['HomePlanet'])
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8693 entries, 0 to 8692
Data columns (total 15 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   PassengerId       8693 non-null   object 
 1   CryoSleep         8476 non-null   object 
 2   Cabin             8494 non-null   object 
 3   Destination       8511 non-null   object 
 4   Age               8693 non-null   float64
 5   VIP               8693 non-null   bool   
 6   RoomService       8693 non-null   float64
 7   FoodCourt         8693 non-null   float64
 8   ShoppingMall      8693 non-null   float64
 9   Spa               8693 non-null   float64
 10  VRDeck            8693 non-null   float64
 11  Name              8493 non-null   object 
 12  Transported       8693 non-null   bool   
 13  HomePlanet_Earth  8693 non-null   bool   
 14  HomePlanet_Mars   8693 non-null   bool   
dtypes: bool(4), float64(6), object(5)
memory usage: 781.1+ KB


### CryoSleep

In [12]:
pd.crosstab(index=train.CryoSleep, columns="ID")

col_0,ID
CryoSleep,Unnamed: 1_level_1
False,5439
True,3037


In [13]:
train.CryoSleep.fillna(False,inplace=True)
pd.crosstab(index=train.CryoSleep, columns="ID")

col_0,ID
CryoSleep,Unnamed: 1_level_1
False,5656
True,3037


### Cabin

In [14]:
pd.crosstab(index=train.Cabin, columns="ID")

col_0,ID
Cabin,Unnamed: 1_level_1
A/0/P,2
A/0/S,2
A/1/S,3
A/10/P,1
A/10/S,1
A/100/S,2
A/101/S,2
A/102/S,1
A/103/S,3
A/104/S,3


### Destination

In [15]:
pd.crosstab(index=train.Destination, columns="ID")

col_0,ID
Destination,Unnamed: 1_level_1
55 Cancri e,1800
PSO J318.5-22,796
TRAPPIST-1e,5915


## Duplicate

In [16]:
train.duplicated().sum()

0

# Model

## Target Data

In [17]:
pd.crosstab(index=train.Transported, columns="C")

col_0,C
Transported,Unnamed: 1_level_1
False,4315
True,4378


In [18]:
data_train = train.drop(columns=['Cabin','Destination','Name','PassengerId'])
data_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8693 entries, 0 to 8692
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   CryoSleep         8693 non-null   bool   
 1   Age               8693 non-null   float64
 2   VIP               8693 non-null   bool   
 3   RoomService       8693 non-null   float64
 4   FoodCourt         8693 non-null   float64
 5   ShoppingMall      8693 non-null   float64
 6   Spa               8693 non-null   float64
 7   VRDeck            8693 non-null   float64
 8   Transported       8693 non-null   bool   
 9   HomePlanet_Earth  8693 non-null   bool   
 10  HomePlanet_Mars   8693 non-null   bool   
dtypes: bool(5), float64(6)
memory usage: 450.1 KB


## PyCaret

In [19]:
from pycaret.classification import *
model = setup(data= data_train, target='Transported', session_id=8888, use_gpu=True)

[LightGBM] [Info] Number of positive: 1, number of negative: 1
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] Using GPU Device: Quadro T1000, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 16 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 1, number of negative: 1
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] Using GPU Device: Quadro T1000, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 16 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number o

Unnamed: 0,Description,Value
0,Session id,8888
1,Target,Transported
2,Target type,Binary
3,Original data shape,"(8693, 11)"
4,Transformed data shape,"(8693, 11)"
5,Transformed train set shape,"(6085, 11)"
6,Transformed test set shape,"(2608, 11)"
7,Numeric features,6
8,Preprocess,True
9,Imputation type,simple


[LightGBM] [Info] Number of positive: 1, number of negative: 1
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] Using GPU Device: Quadro T1000, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 16 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 1, number of negative: 1
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] Using GPU Device: Quadro T1000, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 16 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


In [20]:
compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.79,0.8432,0.8094,0.7814,0.7951,0.5798,0.5803,0.257
gbc,Gradient Boosting Classifier,0.7896,0.8559,0.8408,0.7651,0.8011,0.579,0.582,0.418
lightgbm,Light Gradient Boosting Machine,0.7893,0.8536,0.8261,0.7717,0.7979,0.5784,0.58,0.26
et,Extra Trees Classifier,0.7872,0.8396,0.7997,0.7826,0.791,0.5743,0.5745,0.21
ada,Ada Boost Classifier,0.7854,0.8506,0.8287,0.7651,0.7955,0.5704,0.5727,0.16
lr,Logistic Regression,0.7829,0.8459,0.7984,0.777,0.7874,0.5657,0.5662,0.171
knn,K Neighbors Classifier,0.773,0.8146,0.8091,0.7571,0.7822,0.5458,0.5473,0.104
svm,SVM - Linear Kernel,0.7643,0.7666,0.8326,0.7362,0.78,0.5282,0.5356,0.033
ridge,Ridge Classifier,0.7573,0.8341,0.6577,0.825,0.7317,0.5152,0.5264,0.023
lda,Linear Discriminant Analysis,0.7573,0.834,0.6577,0.825,0.7317,0.5152,0.5264,0.037


## Gradient Boosting

In [21]:
gbc = create_model('gbc', fold=5)
print(gbc)

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8053,0.8708,0.8532,0.7806,0.8153,0.6102,0.6129
1,0.774,0.8467,0.8075,0.7592,0.7826,0.5478,0.549
2,0.7921,0.8652,0.8499,0.7639,0.8046,0.5839,0.5876
3,0.8028,0.8625,0.8597,0.7739,0.8145,0.6052,0.6091
4,0.7625,0.8364,0.814,0.7404,0.7754,0.5247,0.5273
Mean,0.7873,0.8563,0.8369,0.7636,0.7985,0.5744,0.5772
Std,0.0166,0.0128,0.0216,0.0138,0.0165,0.0332,0.0337


GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
                           learning_rate=0.1, loss='log_loss', max_depth=3,
                           max_features=None, max_leaf_nodes=None,
                           min_impurity_decrease=0.0, min_samples_leaf=1,
                           min_samples_split=2, min_weight_fraction_leaf=0.0,
                           n_estimators=100, n_iter_no_change=None,
                           random_state=8888, subsample=1.0, tol=0.0001,
                           validation_fraction=0.1, verbose=0,
                           warm_start=False)


### Tune Model

In [22]:
tuned_gbc = tune_model(gbc)

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7947,0.8613,0.8404,0.7725,0.805,0.5892,0.5915
1,0.8062,0.8809,0.8567,0.7804,0.8168,0.6121,0.6151
2,0.7915,0.8624,0.8208,0.7778,0.7987,0.5827,0.5836
3,0.7833,0.8505,0.8176,0.7676,0.7918,0.5662,0.5675
4,0.8062,0.8669,0.8567,0.7804,0.8168,0.6121,0.6151
5,0.7895,0.8595,0.8301,0.7697,0.7987,0.5787,0.5805
6,0.8141,0.8592,0.8431,0.7988,0.8203,0.6281,0.6291
7,0.7911,0.8588,0.8366,0.7688,0.8013,0.582,0.5843
8,0.7928,0.8439,0.8399,0.7695,0.8031,0.5853,0.5878
9,0.7664,0.8366,0.8007,0.7515,0.7753,0.5327,0.5338


Fitting 10 folds for each of 10 candidates, totalling 100 fits


### Evaluate

In [23]:
evaluate_model(tuned_gbc)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…

In [24]:
## Random Forest

In [25]:
rf = create_model('rf', fold=5)
print(rf)

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8036,0.8542,0.8173,0.7978,0.8074,0.6071,0.6073
1,0.7847,0.8417,0.8026,0.7773,0.7897,0.5693,0.5696
2,0.7987,0.8469,0.8173,0.7902,0.8035,0.5972,0.5976
3,0.797,0.8432,0.8173,0.7877,0.8022,0.5939,0.5944
4,0.7732,0.8254,0.7928,0.7654,0.7788,0.5463,0.5466
Mean,0.7915,0.8423,0.8095,0.7837,0.7964,0.5828,0.5831
Std,0.011,0.0095,0.0101,0.0113,0.0106,0.0221,0.0221


RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='sqrt',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_samples_leaf=1,
                       min_samples_split=2, min_weight_fraction_leaf=0.0,
                       monotonic_cst=None, n_estimators=100, n_jobs=-1,
                       oob_score=False, random_state=8888, verbose=0,
                       warm_start=False)


### Tune Model

In [26]:
tuned_rf = tune_model(rf)

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.798,0.8625,0.8208,0.7875,0.8038,0.5959,0.5964
1,0.8095,0.8838,0.8502,0.7885,0.8182,0.6188,0.6207
2,0.7931,0.8623,0.8111,0.7855,0.7981,0.5861,0.5864
3,0.7833,0.8556,0.8046,0.7743,0.7891,0.5663,0.5668
4,0.8079,0.8654,0.8502,0.7861,0.8169,0.6155,0.6176
5,0.7911,0.8591,0.8137,0.7806,0.7968,0.5821,0.5826
6,0.8109,0.8602,0.8301,0.8013,0.8154,0.6216,0.622
7,0.7911,0.8566,0.8301,0.772,0.8,0.582,0.5837
8,0.7928,0.8435,0.8268,0.7761,0.8006,0.5853,0.5866
9,0.7582,0.8348,0.781,0.7492,0.7648,0.5163,0.5168


Fitting 10 folds for each of 10 candidates, totalling 100 fits


### Evaluate Model

In [27]:
evaluate_model(tuned_rf)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…

## Predict

In [28]:
predict_model(tuned_gbc)

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Gradient Boosting Classifier,0.7979,0.8536,0.8286,0.7827,0.805,0.5957,0.5967


Unnamed: 0,CryoSleep,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,HomePlanet_Earth,HomePlanet_Mars,Transported,prediction_label,prediction_score
859,False,33.00,False,13.00,0.00,984.00,0.00,0.00,True,False,False,1,0.52
3677,False,29.00,False,3478.00,11153.00,15.00,203.00,15.00,True,False,True,1,0.52
1049,False,36.00,False,0.00,3011.00,0.00,0.00,98.00,True,False,True,1,0.55
707,False,41.00,False,3594.00,3425.00,0.00,0.00,58.00,True,False,False,1,0.50
3899,True,39.00,False,0.00,0.00,0.00,0.00,0.00,False,True,True,1,0.56
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3370,False,29.00,False,0.00,0.00,0.00,0.00,0.00,True,False,False,1,0.51
5767,False,32.00,False,0.00,0.00,0.00,4588.00,0.00,True,False,False,0,0.56
6661,False,35.00,False,0.00,5552.00,6.00,8.00,226.00,True,False,True,1,0.55
5503,True,51.00,False,0.00,0.00,0.00,0.00,0.00,True,False,True,1,0.55


In [29]:
predict_model(tuned_rf)

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Random Forest Classifier,0.7941,0.8522,0.8149,0.7845,0.7994,0.5881,0.5885


Unnamed: 0,CryoSleep,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,HomePlanet_Earth,HomePlanet_Mars,Transported,prediction_label,prediction_score
859,False,33.00,False,13.00,0.00,984.00,0.00,0.00,True,False,False,1,0.59
3677,False,29.00,False,3478.00,11153.00,15.00,203.00,15.00,True,False,True,1,0.64
1049,False,36.00,False,0.00,3011.00,0.00,0.00,98.00,True,False,True,1,0.83
707,False,41.00,False,3594.00,3425.00,0.00,0.00,58.00,True,False,False,1,0.51
3899,True,39.00,False,0.00,0.00,0.00,0.00,0.00,False,True,True,1,0.89
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3370,False,29.00,False,0.00,0.00,0.00,0.00,0.00,True,False,False,1,0.55
5767,False,32.00,False,0.00,0.00,0.00,4588.00,0.00,True,False,False,0,0.92
6661,False,35.00,False,0.00,5552.00,6.00,8.00,226.00,True,False,True,1,0.88
5503,True,51.00,False,0.00,0.00,0.00,0.00,0.00,True,False,True,1,0.76


# Final Model

In [31]:
final_rf = finalize_model(tuned_rf)
print(final_rf)

Pipeline(memory=Memory(location=None),
         steps=[('numerical_imputer',
                 TransformerWrapper(exclude=None,
                                    include=['Age', 'RoomService', 'FoodCourt',
                                             'ShoppingMall', 'Spa', 'VRDeck'],
                                    transformer=SimpleImputer(add_indicator=False,
                                                              copy=True,
                                                              fill_value=None,
                                                              keep_empty_features=False,
                                                              missing_values=nan,
                                                              strategy='mean'))),
                ('categorical_imputer',
                 TransformerWrapper(ex...
                 RandomForestClassifier(bootstrap=False, ccp_alpha=0.0,
                                        class_weight='balanced_subsample

In [32]:
predict_model(final_rf)

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Random Forest Classifier,0.8286,0.892,0.8614,0.8102,0.835,0.657,0.6584


Unnamed: 0,CryoSleep,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,HomePlanet_Earth,HomePlanet_Mars,Transported,prediction_label,prediction_score
859,False,33.00,False,13.00,0.00,984.00,0.00,0.00,True,False,False,1,0.58
3677,False,29.00,False,3478.00,11153.00,15.00,203.00,15.00,True,False,True,1,0.76
1049,False,36.00,False,0.00,3011.00,0.00,0.00,98.00,True,False,True,1,0.85
707,False,41.00,False,3594.00,3425.00,0.00,0.00,58.00,True,False,False,0,0.71
3899,True,39.00,False,0.00,0.00,0.00,0.00,0.00,False,True,True,1,0.87
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3370,False,29.00,False,0.00,0.00,0.00,0.00,0.00,True,False,False,1,0.56
5767,False,32.00,False,0.00,0.00,0.00,4588.00,0.00,True,False,False,0,0.92
6661,False,35.00,False,0.00,5552.00,6.00,8.00,226.00,True,False,True,1,0.90
5503,True,51.00,False,0.00,0.00,0.00,0.00,0.00,True,False,True,1,0.78


## Applied on Test

In [33]:
test = pd.read_csv('test.csv')
test._get_numeric_data().fillna(0, inplace=True)
test.VIP.fillna(False, inplace=True)
test.CryoSleep.fillna(False,inplace=True)
test.HomePlanet = np.where(test.HomePlanet=="Europa","Earth",test.HomePlanet)
test = pd.get_dummies(test, columns=['HomePlanet'])
test.isna().sum()

PassengerId           0
CryoSleep             0
Cabin               100
Destination          92
Age                   0
VIP                   0
RoomService           0
FoodCourt             0
ShoppingMall          0
Spa                   0
VRDeck                0
Name                 94
HomePlanet_Earth      0
HomePlanet_Mars       0
dtype: int64

In [34]:
submit = predict_model(final_rf, data=test)
submit['Transported'] = np.where(submit.prediction_label==1,True,False)
submit.head()

Unnamed: 0,PassengerId,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name,HomePlanet_Earth,HomePlanet_Mars,prediction_label,prediction_score,Transported
0,0013_01,True,G/3/S,TRAPPIST-1e,27.0,False,0.0,0.0,0.0,0.0,0.0,Nelly Carsoning,True,False,1,0.84,True
1,0018_01,False,F/4/S,TRAPPIST-1e,19.0,False,0.0,9.0,0.0,2823.0,0.0,Lerome Peckers,True,False,0,0.9,False
2,0019_01,True,C/0/S,55 Cancri e,31.0,False,0.0,0.0,0.0,0.0,0.0,Sabih Unhearfus,True,False,1,0.84,True
3,0021_01,False,C/1/S,TRAPPIST-1e,38.0,False,0.0,6652.0,0.0,181.0,585.0,Meratz Caltilter,True,False,1,0.79,True
4,0023_01,False,F/5/S,TRAPPIST-1e,20.0,False,10.0,0.0,635.0,0.0,0.0,Brence Harperez,True,False,0,0.5,False


In [35]:
submit_file = submit[['PassengerId','Transported']]
submit_file.head()

Unnamed: 0,PassengerId,Transported
0,0013_01,True
1,0018_01,False
2,0019_01,True
3,0021_01,True
4,0023_01,False


In [36]:
submit_file.to_csv('submit_3.csv', index=False)

In [37]:
submit['Transported'] = np.where(submit.prediction_score>=0.55,True,False)
submit.head()

Unnamed: 0,PassengerId,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name,HomePlanet_Earth,HomePlanet_Mars,prediction_label,prediction_score,Transported
0,0013_01,True,G/3/S,TRAPPIST-1e,27.0,False,0.0,0.0,0.0,0.0,0.0,Nelly Carsoning,True,False,1,0.84,True
1,0018_01,False,F/4/S,TRAPPIST-1e,19.0,False,0.0,9.0,0.0,2823.0,0.0,Lerome Peckers,True,False,0,0.9,True
2,0019_01,True,C/0/S,55 Cancri e,31.0,False,0.0,0.0,0.0,0.0,0.0,Sabih Unhearfus,True,False,1,0.84,True
3,0021_01,False,C/1/S,TRAPPIST-1e,38.0,False,0.0,6652.0,0.0,181.0,585.0,Meratz Caltilter,True,False,1,0.79,True
4,0023_01,False,F/5/S,TRAPPIST-1e,20.0,False,10.0,0.0,635.0,0.0,0.0,Brence Harperez,True,False,0,0.5,False


In [38]:
submit_file = submit[['PassengerId','Transported']]
submit_file.to_csv('submit_4.csv', index=False)