# Import Libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectFromModel
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import Imputer,LabelEncoder,StandardScaler
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis,QuadraticDiscriminantAnalysis
from sklearn.decomposition import PCA
from sklearn.neighbors import LocalOutlierFactor
from sklearn.model_selection import KFold
from sklearn.svm import NuSVC
import matplotlib.pyplot as plt
from sklearn.ensemble import AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from imblearn.over_sampling import SMOTE, ADASYN
from sklearn.metrics import roc_curve
from sklearn.externals import joblib


%matplotlib inline 

# Load the Test Dataframe

In [2]:
test_dataframe = pd.read_csv("./data/TestingSet.csv")

In [3]:
test_data_matrix = test_dataframe.as_matrix()

X_test = test_data_matrix[:,1:]


# Load the Transformers and the Model

In [4]:
imputer = joblib.load('./data/model/imputer.pkl')
encoder = joblib.load('./data/model/encoder.pkl')
scaler = joblib.load('./data/model/scaler.pkl')
pca = joblib.load('./data/model/pca.pkl')
sfm = joblib.load('./data/model/sfm.pkl')
clf_RF = joblib.load('./data/model/rf.pkl') 

# Apply all the Transformers one by one

In [5]:
X = imputer.transform(X_test)
X = scaler.transform(X)
X = pca.transform(X)
X = sfm.transform(X)

# Apply the model on the data with Threshold = 0.561261261261

In [6]:
prediction = (clf_RF.predict_proba(X)[:,1]>0.561261261261).astype(np.int)

In [7]:
prediction

array([1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
       1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1,
       1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1,
       1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1,
       1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1,
       0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
       0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
       1, 1,

# Apply the Enocder to get the real labels back

In [8]:
predicted_labels = encoder.inverse_transform(prediction)

In [9]:
predicted_labels

array(['Good', 'Good', 'Bad', 'Good', 'Bad', 'Good', 'Bad', 'Good', 'Good',
       'Good', 'Good', 'Good', 'Good', 'Good', 'Good', 'Good', 'Good',
       'Good', 'Good', 'Bad', 'Good', 'Good', 'Good', 'Good', 'Bad',
       'Good', 'Good', 'Bad', 'Bad', 'Good', 'Good', 'Bad', 'Good', 'Good',
       'Good', 'Good', 'Good', 'Bad', 'Good', 'Bad', 'Good', 'Good', 'Bad',
       'Good', 'Good', 'Good', 'Good', 'Bad', 'Good', 'Bad', 'Good',
       'Good', 'Good', 'Bad', 'Good', 'Good', 'Good', 'Good', 'Good',
       'Good', 'Good', 'Bad', 'Bad', 'Good', 'Good', 'Good', 'Good', 'Bad',
       'Good', 'Good', 'Good', 'Good', 'Good', 'Bad', 'Good', 'Bad',
       'Good', 'Good', 'Good', 'Good', 'Bad', 'Good', 'Good', 'Good',
       'Good', 'Good', 'Good', 'Good', 'Bad', 'Good', 'Good', 'Good',
       'Good', 'Bad', 'Good', 'Good', 'Good', 'Good', 'Good', 'Good',
       'Bad', 'Good', 'Good', 'Good', 'Good', 'Bad', 'Good', 'Good',
       'Good', 'Good', 'Good', 'Good', 'Good', 'Good', 'Good', 'Good'

# Prepare the Submission Dataframe and save it

In [10]:
submission_dataframe = pd.DataFrame()
submission_dataframe['Sl No.'] = [i for i in range(1,len(predicted_labels)+1)]
submission_dataframe['Machine_State'] = predicted_labels

In [11]:
submission_dataframe.head(5)

Unnamed: 0,Sl No.,Machine_State
0,1,Good
1,2,Good
2,3,Bad
3,4,Good
4,5,Bad


In [12]:
submission_dataframe.to_csv('./data/Submission.csv',index=False)