# Performance measurements of ML algorithms (Supervised and Unsupervised)

In [1]:
#import library packages
import pandas as p
import numpy as n

In [2]:
import warnings
warnings.filterwarnings("ignore") 

In [3]:
#Load given dataset
data = p.read_csv('Corona_NLP_train.csv',encoding='latin1')
df=data.dropna()

In [4]:
df.columns

Index(['UserName', 'ScreenName', 'Location', 'TweetAt', 'OriginalTweet',
       'Sentiment'],
      dtype='object')

array([3, 4, 4, ..., 4, 3, 2])

In [5]:
from sklearn.preprocessing import LabelEncoder
var_mod = ['UserName', 'ScreenName', 'Location', 'TweetAt', 'OriginalTweet','Sentiment']
le = LabelEncoder()
for i in var_mod:
    df[i] = le.fit_transform(df[i]).astype(int)

In [15]:
df

Unnamed: 0,UserName,ScreenName,Location,TweetAt,OriginalTweet,Sentiment
0,0,0,5690,14,4253,3
1,1,1,10282,14,31462,4
2,2,2,10551,14,10101,4
5,3,3,12171,14,7663,4
6,4,4,296,14,9449,4
...,...,...,...,...,...,...
41147,32562,32562,1880,13,31408,2
41149,32563,32563,10114,13,24684,2
41150,32564,32564,7461,13,15581,4
41152,32565,32565,10871,13,6724,3


In [6]:
#According to the cross-validated MCC scores, the random forest is the best-performing model, so now let's evaluate its performance on the test set.
from sklearn.metrics import confusion_matrix, classification_report, matthews_corrcoef, cohen_kappa_score, accuracy_score, average_precision_score, roc_auc_score

In [7]:
X = df.drop(labels='Sentiment', axis=1)
#Response variable
y = df.loc[:,'Sentiment']    

In [17]:
y

0        3
1        4
2        4
5        4
6        4
        ..
41147    2
41149    2
41150    4
41152    3
41156    2
Name: Sentiment, Length: 32567, dtype: int32

In [8]:
#We'll use a test size of 30%. We also stratify the split on the response variable, which is very important to do because there are so few fraudulent transactions.
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1, stratify=y)

Random Forest:

In [9]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier()

rfc.fit(X_train,y_train)
predictR = rfc.predict(X_test)
print("")
x = (accuracy_score(y_test,predictR)*100)
print('Accuracy result of Random Forest: is:', x)
print("")

print("")
print('Classification report of Random Forest: Results:')
print("")

print(classification_report(y_test,predictR))
xrf = (accuracy_score(y_test,predictR)*100)


cm2=confusion_matrix(y_test,predictR)
print('Confusion Matrix result of Random Forest: is:\n', confusion_matrix(y_test,predictR))
print("")

sensitivity1 = cm2[0,0]/(cm2[0,0]+cm2[0,1])
print('Sensitivity : ', sensitivity1 )
print("")
specificity1 = cm2[1,1]/(cm2[1,0]+cm2[1,1])
print('Specificity : ', specificity1)


Accuracy result of Random Forest: is: 23.631153413161396


Classification report of Random Forest: Results:

              precision    recall  f1-score   support

           0       0.14      0.08      0.10      1275
           1       0.18      0.13      0.15      1582
           2       0.24      0.28      0.26      2329
           3       0.20      0.17      0.19      1852
           4       0.28      0.37      0.32      2733

    accuracy                           0.24      9771
   macro avg       0.21      0.21      0.21      9771
weighted avg       0.22      0.24      0.23      9771

Confusion Matrix result of Random Forest: is:
 [[ 101  139  394  205  436]
 [ 112  212  412  239  607]
 [ 201  277  661  349  841]
 [ 130  212  512  320  678]
 [ 197  327  746  448 1015]]

Sensitivity :  0.42083333333333334

Specificity :  0.654320987654321


LogisticRegression

In [10]:
from sklearn.linear_model import LogisticRegression
logR= LogisticRegression()
logR.fit(X_train,y_train)
predictR = logR.predict(X_test)
print("")
x = (accuracy_score(y_test,predictR)*100)
print('Accuracy result of Logistic Regression is:', x)
print("")

print("")
print('Classification report of Logistic Regression : Results:')
print("")

print(classification_report(y_test,predictR))
xl = (accuracy_score(y_test,predictR)*100)


cm2=confusion_matrix(y_test,predictR)
print('Confusion Matrix result of Logistic Regression : is:\n', confusion_matrix(y_test,predictR))
print("")

sensitivity1 = cm2[0,0]/(cm2[0,0]+cm2[0,1])
print('Sensitivity : ', sensitivity1 )
print("")
specificity1 = cm2[1,1]/(cm2[1,0]+cm2[1,1])
print('Specificity : ', specificity1)


Accuracy result of Logistic Regression is: 27.76583768293931


Classification report of Logistic Regression : Results:

              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1275
           1       0.00      0.00      0.00      1582
           2       0.26      0.05      0.08      2329
           3       0.00      0.00      0.00      1852
           4       0.28      0.95      0.43      2733

    accuracy                           0.28      9771
   macro avg       0.11      0.20      0.10      9771
weighted avg       0.14      0.28      0.14      9771

Confusion Matrix result of Logistic Regression : is:
 [[   0    0   59    0 1216]
 [   0    0   60    0 1522]
 [   0    0  109    0 2220]
 [   0    0   66    0 1786]
 [   0    0  129    0 2604]]

Sensitivity :  nan

Specificity :  nan


In [11]:
def graph():
    import matplotlib.pyplot as plt
    data=[xkn,xkm,xrf,xs,xnb,xd,xl]
    alg="KNN","KM","RF","SVM","NB","DT","LR"
    plt.figure(figsize=(10,5))
    b=plt.bar(alg,data,color=("r","g","b","y","m","black","gray"))
    plt.title("Prediction of Opinion spam using ML",fontsize=15)
    plt.legend(b,data,fontsize=12)
    plt.savefig('comp.png')

In [12]:
graph()

NameError: name 'xkn' is not defined

In [1]:
import tkinter
from matplotlib.backends.backend_tkagg import (FigureCanvasTkAgg, NavigationToolbar2Tk)
from matplotlib.backend_bases import key_press_handler
from matplotlib.figure import Figure
import numpy as np
root = tkinter.Tk()
root.wm_title("Prediction of  using ML")
fig = Figure(figsize=(10,10),dpi=1)
canvas = FigureCanvasTkAgg(fig, master=root)  
canvas.draw()
canvas.get_tk_widget().pack(side=tkinter.TOP, fill=tkinter.BOTH, expand=1)
icon=tkinter.PhotoImage(file='comp.png')   
label=tkinter.Label(root,image=icon)
label.pack()
root.mainloop()