# 1.Import libraries

In [40]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import  DecisionTreeClassifier
import warnings 
warnings.filterwarnings('ignore')
%matplotlib inline

# 2.Import Data set

In [2]:
fraud=pd.read_csv('Fraud_check.csv')
fraud

Unnamed: 0,Undergrad,Marital.Status,Taxable.Income,City.Population,Work.Experience,Urban
0,NO,Single,68833,50047,10,YES
1,YES,Divorced,33700,134075,18,YES
2,NO,Married,36925,160205,30,YES
3,YES,Single,50190,193264,15,YES
4,NO,Married,81002,27533,28,NO
...,...,...,...,...,...,...
595,YES,Divorced,76340,39492,7,YES
596,YES,Divorced,69967,55369,2,YES
597,NO,Divorced,47334,154058,0,YES
598,YES,Married,98592,180083,17,NO


# 3.Data Understanding

In [3]:
fraud.shape

(600, 6)

In [4]:
fraud.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 600 entries, 0 to 599
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Undergrad        600 non-null    object
 1   Marital.Status   600 non-null    object
 2   Taxable.Income   600 non-null    int64 
 3   City.Population  600 non-null    int64 
 4   Work.Experience  600 non-null    int64 
 5   Urban            600 non-null    object
dtypes: int64(3), object(3)
memory usage: 28.2+ KB


In [5]:
fraud.isna().sum()

Undergrad          0
Marital.Status     0
Taxable.Income     0
City.Population    0
Work.Experience    0
Urban              0
dtype: int64

In [6]:
fraud.describe(include='all')

Unnamed: 0,Undergrad,Marital.Status,Taxable.Income,City.Population,Work.Experience,Urban
count,600,600,600.0,600.0,600.0,600
unique,2,3,,,,2
top,YES,Single,,,,YES
freq,312,217,,,,302
mean,,,55208.375,108747.368333,15.558333,
std,,,26204.827597,49850.075134,8.842147,
min,,,10003.0,25779.0,0.0,
25%,,,32871.5,66966.75,8.0,
50%,,,55074.5,106493.5,15.0,
75%,,,78611.75,150114.25,24.0,


In [7]:
fraud.dtypes

Undergrad          object
Marital.Status     object
Taxable.Income      int64
City.Population     int64
Work.Experience     int64
Urban              object
dtype: object

In [8]:
fraud['Marital.Status'].value_counts

<bound method IndexOpsMixin.value_counts of 0        Single
1      Divorced
2       Married
3        Single
4       Married
         ...   
595    Divorced
596    Divorced
597    Divorced
598     Married
599    Divorced
Name: Marital.Status, Length: 600, dtype: object>

# 4.Data Preprocessing

In [9]:
fraud.rename(columns = {'Marital.Status':'Marital_Status', 'Taxable.Income':'Taxable_Income',
                        'City.Population':'City_Population','Work.Experience':'Work_Experience'}, inplace = True)
fraud

Unnamed: 0,Undergrad,Marital_Status,Taxable_Income,City_Population,Work_Experience,Urban
0,NO,Single,68833,50047,10,YES
1,YES,Divorced,33700,134075,18,YES
2,NO,Married,36925,160205,30,YES
3,YES,Single,50190,193264,15,YES
4,NO,Married,81002,27533,28,NO
...,...,...,...,...,...,...
595,YES,Divorced,76340,39492,7,YES
596,YES,Divorced,69967,55369,2,YES
597,NO,Divorced,47334,154058,0,YES
598,YES,Married,98592,180083,17,NO


In [10]:
le=LabelEncoder()

In [11]:
fraud['Undergrad']=le.fit_transform(fraud.Undergrad)
fraud['Marital_Status']=le.fit_transform(fraud.Marital_Status)
fraud['Urban']=le.fit_transform(fraud.Urban)

In [12]:
fraud.dtypes

Undergrad          int32
Marital_Status     int32
Taxable_Income     int64
City_Population    int64
Work_Experience    int64
Urban              int32
dtype: object

In [13]:
fraud["Tax"] = pd.cut(fraud["Taxable_Income"], bins = [10000,30000,100000], labels = ["Risky", "Good"])

In [14]:
fraud['Tax']=le.fit_transform(fraud.Tax)

In [32]:
fraud['Tax'].value_counts()

0    476
1    124
Name: Tax, dtype: int64

# 5.Model Building

In [15]:
X=fraud.drop('Tax',axis=1)
y=fraud.Tax

In [16]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.15,random_state=19)

In [17]:
classifier=DecisionTreeClassifier()

# 6.Model Training

## Train data

## plotting decision

In [18]:
from sklearn.tree import plot_tree

In [19]:
#plot_tree(classifier)
#plt.show()

In [20]:
classifier=classifier.fit(X_train,y_train)

# 7.Model Testing

## Train data

In [21]:
y_pred_train=classifier.predict(X_train)

## Test data

In [22]:
y_pred_test=classifier.predict(X_test)

# 8.Modal Evaluation

In [23]:
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
print('Accuracy Score : ',accuracy_score(y_train,y_pred_train))
print('-------------------------------------------')
print('Confusion Matrix:\n',confusion_matrix(y_train,y_pred_train))
print('-------------------------------------------')
print('Classification Report:\n',classification_report(y_train,y_pred_train))

Accuracy Score :  1.0
-------------------------------------------
Confusion Matrix:
 [[405   0]
 [  0 105]]
-------------------------------------------
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       405
           1       1.00      1.00      1.00       105

    accuracy                           1.00       510
   macro avg       1.00      1.00      1.00       510
weighted avg       1.00      1.00      1.00       510



In [24]:
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
print('Accuracy Score : ',accuracy_score(y_test,y_pred_test))
print('-------------------------------------------')
print('Confusion Matrix:\n',confusion_matrix(y_test,y_pred_test))
print('-------------------------------------------')
print('Classification Report:\n',classification_report(y_test,y_pred_test))

Accuracy Score :  1.0
-------------------------------------------
Confusion Matrix:
 [[71  0]
 [ 0 19]]
-------------------------------------------
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        71
           1       1.00      1.00      1.00        19

    accuracy                           1.00        90
   macro avg       1.00      1.00      1.00        90
weighted avg       1.00      1.00      1.00        90



# Building Random Forest

In [25]:
from sklearn.ensemble import RandomForestClassifier

In [26]:
rd_classifier=RandomForestClassifier()

In [27]:
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt','log2']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 1000,10)]
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10,14]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4,6,8]
# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
              'criterion':['entropy','gini']}

print(random_grid)

{'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000], 'max_features': ['auto', 'sqrt', 'log2'], 'max_depth': [10, 120, 230, 340, 450, 560, 670, 780, 890, 1000], 'min_samples_split': [2, 5, 10, 14], 'min_samples_leaf': [1, 2, 4, 6, 8], 'criterion': ['entropy', 'gini']}


In [28]:
from sklearn.model_selection import RandomizedSearchCV
random_search_cv=RandomizedSearchCV(estimator=rd_classifier, param_distributions=random_grid,
                                    random_state=23,n_iter=100,n_jobs=-1,cv=3,verbose=2)
random_final=random_search_cv.fit(X_train,y_train)


Fitting 3 folds for each of 100 candidates, totalling 300 fits


In [29]:
param_grid = {
    'criterion': [random_final.best_params_['criterion']],
    'max_depth': [random_final.best_params_['max_depth']],
    'max_features': [random_final.best_params_['max_features']],
    'min_samples_leaf': [random_final.best_params_['min_samples_leaf'], 
                         random_final.best_params_['min_samples_leaf']+2, 
                         random_final.best_params_['min_samples_leaf'] + 4],
    'min_samples_split': [random_final.best_params_['min_samples_split'] - 2,
                          random_final.best_params_['min_samples_split'] - 1,
                          random_final.best_params_['min_samples_split'], 
                          random_final.best_params_['min_samples_split'] +1,
                          random_final.best_params_['min_samples_split'] + 2],
    'n_estimators': [random_final.best_params_['n_estimators'] - 200, random_final.best_params_['n_estimators'] - 100, 
                     random_final.best_params_['n_estimators'], 
                     random_final.best_params_['n_estimators'] + 100, random_final.best_params_['n_estimators'] + 200]
}

print(param_grid)

{'criterion': ['gini'], 'max_depth': [450], 'max_features': ['sqrt'], 'min_samples_leaf': [8, 10, 12], 'min_samples_split': [8, 9, 10, 11, 12], 'n_estimators': [1400, 1500, 1600, 1700, 1800]}


In [30]:
from sklearn.model_selection import GridSearchCV
grid_cv=GridSearchCV(estimator=rd_classifier,param_grid=param_grid,n_jobs=-1,verbose=2)
grid_cv.fit(X_train,y_train)

Fitting 5 folds for each of 75 candidates, totalling 375 fits


GridSearchCV(estimator=RandomForestClassifier(), n_jobs=-1,
             param_grid={'criterion': ['gini'], 'max_depth': [450],
                         'max_features': ['sqrt'],
                         'min_samples_leaf': [8, 10, 12],
                         'min_samples_split': [8, 9, 10, 11, 12],
                         'n_estimators': [1400, 1500, 1600, 1700, 1800]},
             verbose=2)

In [31]:
grid_cv.best_params_

{'criterion': 'gini',
 'max_depth': 450,
 'max_features': 'sqrt',
 'min_samples_leaf': 8,
 'min_samples_split': 8,
 'n_estimators': 1400}

In [33]:
rd_classifier=RandomForestClassifier(n_estimators=1400,criterion='gini',max_depth=450,max_features='sqrt',min_samples_leaf=8,
                                    min_samples_split=8,class_weight={0:3,1:1})

# Model Training

In [34]:
rd_classifier=rd_classifier.fit(X_train,y_train)

# Model Testing  and Model Evaluation

#### Train data

In [35]:
y_pred_train=rd_classifier.predict(X_train)

In [36]:
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report
print('Accuracy Score : ',accuracy_score(y_train,y_pred_train))
print('-------------------------------------------')
print('Confusion Matrix:\n',confusion_matrix(y_train,y_pred_train))
print('-------------------------------------------')
print('Classification Report:\n',classification_report(y_train,y_pred_train))

Accuracy Score :  1.0
-------------------------------------------
Confusion Matrix:
 [[405   0]
 [  0 105]]
-------------------------------------------
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       405
           1       1.00      1.00      1.00       105

    accuracy                           1.00       510
   macro avg       1.00      1.00      1.00       510
weighted avg       1.00      1.00      1.00       510



#### Test data

In [37]:
y_pred_test=rd_classifier.predict(X_test)

In [38]:
print('Accuracy Score : ',accuracy_score(y_test,y_pred_test))
print('-------------------------------------------')
print('Confusion Matrix:\n',confusion_matrix(y_test,y_pred_test))
print('-------------------------------------------')
print('Classification Report:\n',classification_report(y_test,y_pred_test))

Accuracy Score :  1.0
-------------------------------------------
Confusion Matrix:
 [[71  0]
 [ 0 19]]
-------------------------------------------
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        71
           1       1.00      1.00      1.00        19

    accuracy                           1.00        90
   macro avg       1.00      1.00      1.00        90
weighted avg       1.00      1.00      1.00        90



# Building Gradient Boosting

In [41]:
from sklearn.ensemble import GradientBoostingClassifier

In [42]:
gd_classifier=GradientBoostingClassifier()

In [43]:
#loss function
loss = ['deviance','exponential']
#learning Rate
learning_rate = [float(x) for x in np.linspace(start=0,stop=1,num=10)]
#criterion
criterion= ['friedman_mse', 'squared_error', 'mse', 'mae' ]
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt','log2']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 1000,10)]
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10,14]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4,6,8]
# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
              'criterion':criterion,
              'loss':loss,'learning_rate':learning_rate}

print(random_grid)

{'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000], 'max_features': ['auto', 'sqrt', 'log2'], 'max_depth': [10, 120, 230, 340, 450, 560, 670, 780, 890, 1000], 'min_samples_split': [2, 5, 10, 14], 'min_samples_leaf': [1, 2, 4, 6, 8], 'criterion': ['friedman_mse', 'squared_error', 'mse', 'mae'], 'loss': ['deviance', 'exponential'], 'learning_rate': [0.0, 0.1111111111111111, 0.2222222222222222, 0.3333333333333333, 0.4444444444444444, 0.5555555555555556, 0.6666666666666666, 0.7777777777777777, 0.8888888888888888, 1.0]}


In [44]:
random_cv=RandomizedSearchCV(estimator=gd_classifier,param_distributions=random_grid,n_iter=10,cv=3,n_jobs=-1,verbose=2,
                             random_state=99)
random_cv.fit(X_train,y_train)

Fitting 3 folds for each of 10 candidates, totalling 30 fits


RandomizedSearchCV(cv=3, estimator=GradientBoostingClassifier(), n_jobs=-1,
                   param_distributions={'criterion': ['friedman_mse',
                                                      'squared_error', 'mse',
                                                      'mae'],
                                        'learning_rate': [0.0,
                                                          0.1111111111111111,
                                                          0.2222222222222222,
                                                          0.3333333333333333,
                                                          0.4444444444444444,
                                                          0.5555555555555556,
                                                          0.6666666666666666,
                                                          0.7777777777777777,
                                                          0.8888888888888888,
                           

In [45]:

param_grid = {
    'criterion': [random_cv.best_params_['criterion']],
    'max_depth': [random_cv.best_params_['max_depth']],
    'max_features': [random_cv.best_params_['max_features']],
    'min_samples_leaf': [random_cv.best_params_['min_samples_leaf'], 
                         random_cv.best_params_['min_samples_leaf']+2, 
                         random_cv.best_params_['min_samples_leaf'] + 4],
    'min_samples_split': [random_cv.best_params_['min_samples_split'] - 2,
                          random_cv.best_params_['min_samples_split'] - 1,
                          random_cv.best_params_['min_samples_split'], 
                          random_cv.best_params_['min_samples_split'] +1,
                          random_cv.best_params_['min_samples_split'] + 2],
    'n_estimators': [random_cv.best_params_['n_estimators'] - 200, random_cv.best_params_['n_estimators'] - 100, 
                     random_cv.best_params_['n_estimators'], 
                     random_cv.best_params_['n_estimators'] + 100, random_cv.best_params_['n_estimators'] + 200],
    'loss':[random_cv.best_params_['loss']],
    'learning_rate':[random_cv.best_params_['learning_rate']]
}

print(param_grid)

{'criterion': ['squared_error'], 'max_depth': [780], 'max_features': ['auto'], 'min_samples_leaf': [8, 10, 12], 'min_samples_split': [8, 9, 10, 11, 12], 'n_estimators': [1000, 1100, 1200, 1300, 1400], 'loss': ['deviance'], 'learning_rate': [0.3333333333333333]}


In [46]:
grid_cv=GridSearchCV(estimator=gd_classifier,param_grid=param_grid,cv=3,n_jobs=-1,verbose=2)
grid_cv.fit(X_train,y_train)

Fitting 3 folds for each of 75 candidates, totalling 225 fits


GridSearchCV(cv=3, estimator=GradientBoostingClassifier(), n_jobs=-1,
             param_grid={'criterion': ['squared_error'],
                         'learning_rate': [0.3333333333333333],
                         'loss': ['deviance'], 'max_depth': [780],
                         'max_features': ['auto'],
                         'min_samples_leaf': [8, 10, 12],
                         'min_samples_split': [8, 9, 10, 11, 12],
                         'n_estimators': [1000, 1100, 1200, 1300, 1400]},
             verbose=2)

In [47]:
grid_cv.best_params_

{'criterion': 'squared_error',
 'learning_rate': 0.3333333333333333,
 'loss': 'deviance',
 'max_depth': 780,
 'max_features': 'auto',
 'min_samples_leaf': 8,
 'min_samples_split': 8,
 'n_estimators': 1000}

In [48]:
gd_classifier=GradientBoostingClassifier(criterion= 'squared_error',learning_rate=0.3333333333333333,loss= 'deviance',
                                        max_depth = 780,max_features = 'auto',min_samples_leaf = 8,min_samples_split = 8,
                                       n_estimators = 1000)

# Model Training

In [49]:
gd_classifier.fit(X_train,y_train)

GradientBoostingClassifier(criterion='squared_error',
                           learning_rate=0.3333333333333333, max_depth=780,
                           max_features='auto', min_samples_leaf=8,
                           min_samples_split=8, n_estimators=1000)

# Model Testing | Model Evaluation

#### Train data

In [52]:
y_pred_train=gd_classifier.predict(X_train)

In [53]:
print('Accuracy Score : ',accuracy_score(y_train,y_pred_train))
print('-------------------------------------------')
print('Confusion Matrix:\n',confusion_matrix(y_train,y_pred_train))
print('-------------------------------------------')
print('Classification Report:\n',classification_report(y_train,y_pred_train))

Accuracy Score :  1.0
-------------------------------------------
Confusion Matrix:
 [[405   0]
 [  0 105]]
-------------------------------------------
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       405
           1       1.00      1.00      1.00       105

    accuracy                           1.00       510
   macro avg       1.00      1.00      1.00       510
weighted avg       1.00      1.00      1.00       510



In [50]:
y_pred_test=gd_classifier.predict(X_test)

In [51]:
print('Accuracy Score : ',accuracy_score(y_test,y_pred_test))
print('-------------------------------------------')
print('Confusion Matrix:\n',confusion_matrix(y_test,y_pred_test))
print('-------------------------------------------')
print('Classification Report:\n',classification_report(y_test,y_pred_test))

Accuracy Score :  1.0
-------------------------------------------
Confusion Matrix:
 [[71  0]
 [ 0 19]]
-------------------------------------------
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        71
           1       1.00      1.00      1.00        19

    accuracy                           1.00        90
   macro avg       1.00      1.00      1.00        90
weighted avg       1.00      1.00      1.00        90



# Building Ada Boost

In [54]:
from sklearn.ensemble import AdaBoostClassifier

In [55]:
ad_classifier=AdaBoostClassifier()

In [56]:
n_estimators = [int(x) for x in np.linspace(200,1000,10)]
learning_rate= [float(x) for x in np.linspace(0,1,10)]
algorithm=['SAMME', 'SAMME.R']
grid={'n_estimators':n_estimators,'learning_rate':learning_rate,'algorithm':algorithm}
print(grid)

{'n_estimators': [200, 288, 377, 466, 555, 644, 733, 822, 911, 1000], 'learning_rate': [0.0, 0.1111111111111111, 0.2222222222222222, 0.3333333333333333, 0.4444444444444444, 0.5555555555555556, 0.6666666666666666, 0.7777777777777777, 0.8888888888888888, 1.0], 'algorithm': ['SAMME', 'SAMME.R']}


In [57]:
gridsearch=GridSearchCV(estimator=ad_classifier,param_grid=grid,n_jobs=-1,cv=5,verbose=2)
gridsearch.fit(X_train,y_train)

Fitting 5 folds for each of 200 candidates, totalling 1000 fits


GridSearchCV(cv=5, estimator=AdaBoostClassifier(), n_jobs=-1,
             param_grid={'algorithm': ['SAMME', 'SAMME.R'],
                         'learning_rate': [0.0, 0.1111111111111111,
                                           0.2222222222222222,
                                           0.3333333333333333,
                                           0.4444444444444444,
                                           0.5555555555555556,
                                           0.6666666666666666,
                                           0.7777777777777777,
                                           0.8888888888888888, 1.0],
                         'n_estimators': [200, 288, 377, 466, 555, 644, 733,
                                          822, 911, 1000]},
             verbose=2)

In [58]:
gridsearch.best_params_

{'algorithm': 'SAMME',
 'learning_rate': 0.1111111111111111,
 'n_estimators': 200}

# Model Training

In [59]:
ad_classifier=AdaBoostClassifier(algorithm = 'SAMME',learning_rate= 0.1111111111111111,n_estimators=200)

In [60]:
ad_classifier.fit(X_train,y_train)

AdaBoostClassifier(algorithm='SAMME', learning_rate=0.1111111111111111,
                   n_estimators=200)

# Model Testing | Model Evaluation

#### Train data

In [61]:
y_pred_train=ad_classifier.predict(X_train)

In [62]:
print('Accuracy Score : ',accuracy_score(y_train,y_pred_train))
print('-------------------------------------------')
print('Confusion Matrix:\n',confusion_matrix(y_train,y_pred_train))
print('-------------------------------------------')
print('Classification Report:\n',classification_report(y_train,y_pred_train))

Accuracy Score :  1.0
-------------------------------------------
Confusion Matrix:
 [[405   0]
 [  0 105]]
-------------------------------------------
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       405
           1       1.00      1.00      1.00       105

    accuracy                           1.00       510
   macro avg       1.00      1.00      1.00       510
weighted avg       1.00      1.00      1.00       510



#### Test data

In [63]:
y_pred_test=ad_classifier.predict(X_test)

In [64]:
print('Accuracy Score : ',accuracy_score(y_test,y_pred_test))
print('-------------------------------------------')
print('Confusion Matrix:\n',confusion_matrix(y_test,y_pred_test))
print('-------------------------------------------')
print('Classification Report:\n',classification_report(y_test,y_pred_test))

Accuracy Score :  1.0
-------------------------------------------
Confusion Matrix:
 [[71  0]
 [ 0 19]]
-------------------------------------------
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        71
           1       1.00      1.00      1.00        19

    accuracy                           1.00        90
   macro avg       1.00      1.00      1.00        90
weighted avg       1.00      1.00      1.00        90



# Building XGBoost

In [65]:
from xgboost import XGBClassifier

In [66]:
xg_classifier=XGBClassifier()

# Model Training

In [67]:
xg_classifier.fit(X_train,y_train)



XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              gamma=0, gpu_id=-1, importance_type=None,
              interaction_constraints='', learning_rate=0.300000012,
              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=100, n_jobs=8,
              num_parallel_tree=1, predictor='auto', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)

# Model Testing | Model Evaluation

#### train data

In [68]:
y_pred_train=xg_classifier.predict(X_train)

In [69]:
print('Accuracy Score : ',accuracy_score(y_train,y_pred_train))
print('-------------------------------------------')
print('Confusion Matrix:\n',confusion_matrix(y_train,y_pred_train))
print('-------------------------------------------')
print('Classification Report:\n',classification_report(y_train,y_pred_train))

Accuracy Score :  1.0
-------------------------------------------
Confusion Matrix:
 [[405   0]
 [  0 105]]
-------------------------------------------
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       405
           1       1.00      1.00      1.00       105

    accuracy                           1.00       510
   macro avg       1.00      1.00      1.00       510
weighted avg       1.00      1.00      1.00       510



#### test data

In [70]:
y_pred_test=xg_classifier.predict(X_test)

In [71]:
print('Accuracy Score : ',accuracy_score(y_test,y_pred_test))
print('-------------------------------------------')
print('Confusion Matrix:\n',confusion_matrix(y_test,y_pred_test))
print('-------------------------------------------')
print('Classification Report:\n',classification_report(y_test,y_pred_test))

Accuracy Score :  1.0
-------------------------------------------
Confusion Matrix:
 [[71  0]
 [ 0 19]]
-------------------------------------------
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        71
           1       1.00      1.00      1.00        19

    accuracy                           1.00        90
   macro avg       1.00      1.00      1.00        90
weighted avg       1.00      1.00      1.00        90



# Building lightgradient boosting

In [72]:
from lightgbm import LGBMClassifier

In [73]:
lgbm_classifier=LGBMClassifier()

# Model Training

In [74]:
lgbm_classifier.fit(X_train,y_train)

LGBMClassifier()

# Model Testing | Model Evaluation

#### Train data

In [75]:
y_pred_train=lgbm_classifier.predict(X_train)

In [76]:
print('Accuracy Score : ',accuracy_score(y_train,y_pred_train))
print('-------------------------------------------')
print('Confusion Matrix:\n',confusion_matrix(y_train,y_pred_train))
print('-------------------------------------------')
print('Classification Report:\n',classification_report(y_train,y_pred_train))

Accuracy Score :  1.0
-------------------------------------------
Confusion Matrix:
 [[405   0]
 [  0 105]]
-------------------------------------------
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       405
           1       1.00      1.00      1.00       105

    accuracy                           1.00       510
   macro avg       1.00      1.00      1.00       510
weighted avg       1.00      1.00      1.00       510



#### Test data

In [77]:
y_pred_test=lgbm_classifier.predict(X_test)

In [78]:
print('Accuracy Score : ',accuracy_score(y_test,y_pred_test))
print('-------------------------------------------')
print('Confusion Matrix:\n',confusion_matrix(y_test,y_pred_test))
print('-------------------------------------------')
print('Classification Report:\n',classification_report(y_test,y_pred_test))

Accuracy Score :  1.0
-------------------------------------------
Confusion Matrix:
 [[71  0]
 [ 0 19]]
-------------------------------------------
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        71
           1       1.00      1.00      1.00        19

    accuracy                           1.00        90
   macro avg       1.00      1.00      1.00        90
weighted avg       1.00      1.00      1.00        90



# Constructed Different Ensemble Techiques along with Decision Tree ,Random forest and saw it is working with different algorithms