In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score

In [None]:
df = pd.read_csv('resources/exoplanet_data.csv')
df.head()

In [None]:
def log_reg_model(df):
    feature_select = df
    
    #Remove the Candidate  rows
    dataset = feature_select[feature_select['koi_disposition'] != 'CANDIDATE']
    
    #Create X and y
    X = dataset.iloc[:,:-1].values
    y = dataset.iloc[:,-1].values
    
    #Create Train-test split    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=101)
    
    #Standardize the data    
    mms = MinMaxScaler()
    X_train_mms = mms.fit_transform(X_train)
    X_test_mms = mms.transform(X_test)
    
    #Using logistic regression, fit the model then make predictions from the test set
    log_cla = LogisticRegression(random_state = 0)
    log_cla.fit(X_train_mms, y_train)
    log_pred = log_cla.predict(X_test_mms)
    
    #print the results
    cm = confusion_matrix(y_test, log_pred)
    print(cm)
    score = accuracy_score(y_test, log_pred)
    print(score)

In [None]:
feature_select = df[['koi_period', 'koi_period_err1', 'koi_period_err2',
                     'koi_impact', 'koi_impact_err1', 'koi_impact_err2', 
                     'koi_duration', 'koi_duration_err1', 'koi_duration_err2', 
                     'koi_depth', 'koi_depth_err1', 'koi_depth_err2',
                     'koi_prad', 'koi_prad_err1', 'koi_prad_err2',
                     'koi_teq', 'koi_model_snr',
                     'koi_steff', 'koi_steff_err1', 'koi_steff_err2',
                     'koi_slogg', 'koi_slogg_err1', 'koi_slogg_err2',
                     'koi_srad', 'koi_srad_err1', 'koi_srad_err2',
                     'ra', 'dec', 'koi_kepmag',
                     'koi_disposition']]

In [None]:
log_reg_model(feature_select)

In [None]:
feature_select_full = df[['koi_fpflag_nt', 'koi_fpflag_ss', 'koi_fpflag_co',
                     'koi_fpflag_ec', 
                     'koi_period', 'koi_period_err1', 'koi_period_err2',
                     'koi_impact', 'koi_impact_err1', 'koi_impact_err2', 
                     'koi_duration', 'koi_duration_err1', 'koi_duration_err2', 
                     'koi_depth', 'koi_depth_err1', 'koi_depth_err2',
                     'koi_prad', 'koi_prad_err1', 'koi_prad_err2',
                     'koi_teq', 'koi_model_snr',
                     'koi_steff', 'koi_steff_err1', 'koi_steff_err2',
                     'koi_slogg', 'koi_slogg_err1', 'koi_slogg_err2',
                     'koi_srad', 'koi_srad_err1', 'koi_srad_err2',
                     'ra', 'dec', 'koi_kepmag',
                     'koi_disposition']]

In [None]:
log_reg_model(feature_select_full)

In [None]:
feature_select_trouble = df[['koi_fpflag_nt', 'koi_fpflag_ss', 'koi_fpflag_co',
                             'koi_fpflag_ec',                              
                             'koi_disposition']]

In [None]:
log_reg_model(feature_select_trouble)

In [None]:
feature_select_nt = df[['koi_fpflag_nt', 'koi_disposition']]
feature_select_ss = df[['koi_fpflag_ss', 'koi_disposition']]
feature_select_co = df[['koi_fpflag_co', 'koi_disposition']]
feature_select_ec = df[['koi_fpflag_ec', 'koi_disposition']]

In [None]:
log_reg_model(feature_select_nt)

In [None]:
log_reg_model(feature_select_ss)

In [None]:
log_reg_model(feature_select_co)

In [None]:
log_reg_model(feature_select_ec)

In [None]:
feature_select_nt_ss = df[['koi_fpflag_nt', 'koi_fpflag_ss', 'koi_disposition']]
feature_select_nt_co = df[['koi_fpflag_nt', 'koi_fpflag_co', 'koi_disposition']]
feature_select_nt_ec = df[['koi_fpflag_nt', 'koi_fpflag_ec', 'koi_disposition']]

In [None]:
log_reg_model(feature_select_nt_ss)

In [None]:
log_reg_model(feature_select_nt_co)

In [None]:
log_reg_model(feature_select_nt_ec)

In [None]:
feature_select_ss_co = df[['koi_fpflag_ss', 'koi_fpflag_co', 'koi_disposition']]
feature_select_ss_ec = df[['koi_fpflag_ss', 'koi_fpflag_ec', 'koi_disposition']]

In [None]:
log_reg_model(feature_select_ss_co)

In [None]:
log_reg_model(feature_select_ss_ec)

In [None]:
feature_select_co_ec = df[['koi_fpflag_co', 'koi_fpflag_ec', 'koi_disposition']]

In [None]:
log_reg_model(feature_select_co_ec)