**Importing Packages**

In [2]:
import numpy as np
import pandas as pd
from sklearn.metrics import  confusion_matrix,accuracy_score,precision_score,recall_score,f1_score

**Reading the dataset**

In [3]:
data= pd.read_csv("main.csv")
df = pd.DataFrame(data)
df

Unnamed: 0,ctrust,cuntrust,last,context,score
0,4,1,4,sport,untrustworthy
1,4,1,4,sport,untrustworthy
2,1,4,4,sport,trustworthy
3,2,1,4,sport,trustworthy
4,3,1,4,sport,trustworthy
...,...,...,...,...,...
317,2,1,2,holiday,untrustworthy
318,1,1,1,holiday,trustworthy
319,1,1,1,holiday,trustworthy
320,1,1,1,holiday,trustworthy


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 322 entries, 0 to 321
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   ctrust    322 non-null    int64 
 1   cuntrust  322 non-null    int64 
 2   last      322 non-null    int64 
 3   context   322 non-null    object
 4   score     322 non-null    object
dtypes: int64(3), object(2)
memory usage: 12.7+ KB


In [5]:
print('ctrust',df.ctrust.unique())
print('cuntrust',df.cuntrust.unique())
print('last',df['last'].unique())
print('context',df.context.unique())
print('score',df.score.unique())

ctrust [4 1 2 3]
cuntrust [1 4 3 5]
last [4 1 3 2]
context ['sport' 'game' 'ECommerce' 'holiday']
score ['untrustworthy' 'trustworthy']


**Convert categorical value to binary**

In [6]:
df['score'].replace({'untrustworthy':0,'trustworthy':1},inplace=True)
df

Unnamed: 0,ctrust,cuntrust,last,context,score
0,4,1,4,sport,0
1,4,1,4,sport,0
2,1,4,4,sport,1
3,2,1,4,sport,1
4,3,1,4,sport,1
...,...,...,...,...,...
317,2,1,2,holiday,0
318,1,1,1,holiday,1
319,1,1,1,holiday,1
320,1,1,1,holiday,1


**Convert categorical value to integral value**

In [7]:
df['context'].replace({'sport':1,'game':2,'ECommerce':3,'holiday':4},inplace=True)
df

Unnamed: 0,ctrust,cuntrust,last,context,score
0,4,1,4,1,0
1,4,1,4,1,0
2,1,4,4,1,1
3,2,1,4,1,1
4,3,1,4,1,1
...,...,...,...,...,...
317,2,1,2,4,0
318,1,1,1,4,1
319,1,1,1,4,1
320,1,1,1,4,1


**Split X and Y data**

In [8]:
y=data.score.values
x_data=data.drop("score",axis=1)

Here the output data which is score is assigned to variable y and input data is assigned to x_data variable

**Normalize the data**

In [9]:
x=(x_data-np.min(x_data))/(np.max(x_data)-np.min(x_data))
x

Unnamed: 0,ctrust,cuntrust,last,context
0,1.000000,0.00,1.000000,0.0
1,1.000000,0.00,1.000000,0.0
2,0.000000,0.75,1.000000,0.0
3,0.333333,0.00,1.000000,0.0
4,0.666667,0.00,1.000000,0.0
...,...,...,...,...
317,0.333333,0.00,0.333333,1.0
318,0.000000,0.00,0.000000,1.0
319,0.000000,0.00,0.000000,1.0
320,0.000000,0.00,0.000000,1.0


**Split dataset into training and test dataset**

In [10]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.25,random_state=37)

**SVM**

In [11]:
from sklearn.svm import SVC
svclassifier = SVC(kernel='linear')
svclassifier.fit(x_train, y_train)
y_pred = svclassifier.predict(x_test)
confu_svm=confusion_matrix(y_test,y_pred)
accuracy_svm=accuracy_score(y_test,y_pred)
precision_svm=precision_score(y_test,y_pred)
recall_svm=recall_score(y_test,y_pred)
f1_svm=f1_score(y_test,y_pred)

**Naive Bayes**

In [12]:
from sklearn.naive_bayes import GaussianNB
nb=GaussianNB()
nb.fit(x_train,y_train)
y_pred = nb.predict(x_test)
confu_nb=confusion_matrix(y_test,y_pred)
accuracy_nb=accuracy_score(y_test,y_pred)
precision_nb=precision_score(y_test,y_pred)
recall_nb=recall_score(y_test,y_pred)
f1_nb=f1_score(y_test,y_pred)

**Comparing SVM and Naive Bayes**

In [13]:
from tabulate import tabulate
a='SVM'
b='Naive Bayes'
result1=(a,accuracy_svm,precision_svm,recall_svm,f1_svm)
result2=(b,accuracy_nb,precision_nb,recall_nb,f1_nb)
result=(result1,result2)
print('confusion matrix SVM')
print(confu_svm)

print('confusion matrix Naive Bayes')
print(confu_nb)

print(tabulate(result, headers=["accuracy", "precision", "recall","f1_score"]))

confusion matrix SVM
[[20  7]
 [ 5 49]]
confusion matrix Naive Bayes
[[ 8 19]
 [ 7 47]]
               accuracy    precision    recall    f1_score
-----------  ----------  -----------  --------  ----------
SVM            0.851852     0.875     0.907407    0.890909
Naive Bayes    0.679012     0.712121  0.87037     0.783333


**Ensembling:Stacking**

In [14]:
from numpy import mean
from numpy import std
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import StackingClassifier

function stacking ensemble of models

In [15]:
def get_stacking():
    # define the base models
    level0 = list()
    level0.append(('svm', SVC()))
    level0.append(('bayes', GaussianNB()))
    # define meta learner model
    level1 = LogisticRegression()
    # define the stacking ensemble
    model = StackingClassifier(estimators=level0, final_estimator=level1, cv=5)
    return model

function for list of models to evaluate

In [16]:
def get_models():
    models = dict()
    models['Meta Learner LR'] = LogisticRegression()
    models['Base Learner SVM'] = SVC()
    models['Base Learner Bayes'] = GaussianNB()
    models['Final stacking'] = get_stacking()
    return models

function to evaluate a given model using cross-validation

In [17]:
def evaluate_model(model):
    cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
    scores = cross_val_score(model, x, y, scoring='accuracy', cv=cv, n_jobs=1)
    return scores

In [18]:
models = get_models()
results, names = list(), list()
for name, model in models.items():
    scores = evaluate_model(model)
    results.append(scores)
    names.append(name)
    
    
    print('>%s accuracy=%.3f ' % (name, mean(scores)))


>Meta Learner LR accuracy=0.882 
>Base Learner SVM accuracy=1.000 
>Base Learner Bayes accuracy=0.704 
>Final stacking accuracy=0.994 


We can see that the accuracy of SVM alone is 0.85 and Naive baayes alone is 0.67

But when we ensemble these two models the resulting accuracy increases that is 0.994
