In [2]:
import pandas as pd
import numpy as np
from string import punctuation
from sklearn import svm
from sklearn.metrics import classification_report
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
import re

In [3]:
data = pd.read_csv("/Users/varunnegandhi/Documents/Data Mining/IMDB Dataset.csv", encoding='latin1')
data['SentimentText'] = data['SentimentText'].str.lower()
data.head()

Unnamed: 0,SentimentText,Sentiment
0,one of the other reviewers has mentioned that ...,positive
1,a wonderful little production. <br /><br />the...,positive
2,i thought this was a wonderful way to spend ti...,positive
3,basically there's a family where a little boy ...,negative
4,"petter mattei's ""love in the time of money"" is...",positive


In [4]:
for i in range(len(data)):
    data['SentimentText'][i] = re.sub('<.*?>', '', data['SentimentText'][i])
    data['SentimentText'][i] = ''.join(ch for ch in data['SentimentText'][i] if ch not in punctuation)

In [5]:
data['Sentiment'] = data['Sentiment'].map({'positive': 1, 'negative': 0})
data.head()

Unnamed: 0,SentimentText,Sentiment
0,one of the other reviewers has mentioned that ...,1
1,a wonderful little production the filming tech...,1
2,i thought this was a wonderful way to spend ti...,1
3,basically theres a family where a little boy j...,0
4,petter matteis love in the time of money is a ...,1


In [6]:
data_x = data['SentimentText']
data_y = data['Sentiment']
cv = TfidfVectorizer(min_df=1, stop_words='english',max_features=5000)
x_train, x_test, y_train, y_test = train_test_split(data_x, data_y, test_size=0.2, random_state=4)
print(x_train.head())
print(x_test.head())
cv.fit(x_train)
train_vectors = cv.transform(x_train)
test_vectors = cv.transform(x_test)
print(train_vectors)
print(test_vectors)

16880    despite some moments in heavy rain an encounte...
18337    a fabulous filmwhich i have now watched severa...
11170    creepshow 2 is little more than a pale imitati...
17714    a pretty worthless made for television movie t...
28539    i think i read this someplace joe johnston dir...
Name: SentimentText, dtype: object
16477    at first look of the plot tagline i figured it...
5969     good lord how this ended up in our dvd player ...
46459    two sorcerers battle in the fourth dimensionon...
33297    caligula shares many of the same attributes as...
35047    imagine the big chill with a cast of twentysom...
Name: SentimentText, dtype: object
  (0, 4993)	0.10195110290258493
  (0, 4956)	0.09366153833369903
  (0, 4645)	0.15497335406626048
  (0, 4512)	0.08786757816554114
  (0, 4459)	0.08471884389782676
  (0, 4337)	0.14313494911324562
  (0, 4224)	0.16031826254701523
  (0, 4220)	0.1533308543619963
  (0, 4195)	0.12428343408210535
  (0, 4163)	0.16937165947774815
  (0, 4063)	0.1511738

In [7]:
print(x_train.shape)
print(type(x_train))
print(x_test.shape)
print("train vector:")
#print(train_vectors)
print("test vector")
#print(test_vectors)
print(train_vectors.shape)
print(test_vectors.shape)

(40000,)
<class 'pandas.core.series.Series'>
(10000,)
train vector:
test vector
(40000, 5000)
(10000, 5000)


In [38]:
classifier_linear = svm.SVC(kernel='linear')
classifier_linear.fit(train_vectors, y_train)
prediction_linear = classifier_linear.predict(test_vectors)
report = classification_report(y_test, prediction_linear, output_dict=True)
print(report)

{'0': {'precision': 0.8900651465798045, 'recall': 0.8681493248610008, 'f1-score': 0.8789706473663049, 'support': 5036}, '1': {'precision': 0.8694968553459119, 'recall': 0.8912167606768735, 'f1-score': 0.8802228412256268, 'support': 4964}, 'accuracy': 0.8796, 'macro avg': {'precision': 0.8797810009628582, 'recall': 0.8796830427689372, 'f1-score': 0.8795967442959658, 'support': 10000}, 'weighted avg': {'precision': 0.8798550468113002, 'recall': 0.8796, 'f1-score': 0.8795922363980724, 'support': 10000}}


In [42]:
review = """It's not even 5 days since i purchased this product.
I would say this a specially blended worst Phone in all formats.
ISSUE 1:
Have you ever heard of phone which gets drained even in standby mode during night?
Kindly please see the screenshot if you want to believe my statement.
My phone was in full charge at night 10:07 PM . I took this screenshot and went to sleep.
Then I woke up at morning and 6:35 AM and battery got drained by 56% in just standby condition.
If this is the case consider how many hours it will work, during day time.
It's not even 5 hours the battery is able to withstand.
ISSUE 2:
Apart from the battery, the next issue is the heating issue .I purchased a iron box recently from Bajaj in this sale.
But I realized this phone acts a very good Iron box than the Bajaj Iron box. I am using only my headphones to get connected in the call. I am not sure when this phone is will get busted due to this heating issue. It is definitely a challenge to hold this phone for even 1 minute. The heat that the phone is causing will definitely burn your hands and for man if you keep this phone in your pant pocket easily this will lead to infertility for you. Kindly please be aware about that.
Issue 3:
Even some unknown brands has a better touch sensitivity. The touch sensitivity is pathetic, if perform some operation it will easily take 1-2 minutes for the phone to response.
For your kind information my system has 73% of Memory free and the RAM is also 56% free.
Kindly please make this Review famous and lets make everyone aware of this issue with this phone.
Let's save people from buying this phone. There are people who don't even know what to do if this issue happens after 10 days from the date of purchase. So I feel at least this review will help people from purchasing this product in mere future."""
review_vector = cv.transform([review]) # vectorizing
print(classifier_linear.predict(review_vector))

[0]


In [44]:
data_test = pd.read_csv("/Users/varunnegandhi/Documents/Data Mining/dataset.csv", encoding='latin1')
data_test['SentimentText'] = data_test['SentimentText'].str.lower()
data_test.head()

Unnamed: 0,SentimentText,Sentiment
0,"first think another disney movie, might good, ...",1
1,"put aside dr. house repeat missed, desperate h...",0
2,"big fan stephen king's work, film made even gr...",1
3,watched horrid thing tv. needless say one movi...,0
4,truly enjoyed film. acting terrific plot. jeff...,1


In [46]:
data_test_x = data_test['SentimentText']
data_test_y = data_test['Sentiment']
test1_vector = cv.transform(data_test_x)

In [47]:
prediction_linear_test = classifier_linear.predict(test1_vector)
report = classification_report(data_test_y, prediction_linear_test, output_dict=True)
print(report)

{'0': {'precision': 0.9186277759363606, 'recall': 0.88688, 'f1-score': 0.902474763920547, 'support': 12500}, '1': {'precision': 0.8906588308072997, 'recall': 0.92144, 'f1-score': 0.9057879836426549, 'support': 12500}, 'accuracy': 0.90416, 'macro avg': {'precision': 0.9046433033718302, 'recall': 0.9041600000000001, 'f1-score': 0.904131373781601, 'support': 25000}, 'weighted avg': {'precision': 0.9046433033718301, 'recall': 0.90416, 'f1-score': 0.9041313737816008, 'support': 25000}}


In [48]:
classifier_linear = svm.SVC(kernel='poly')
classifier_linear.fit(train_vectors, y_train)



SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='poly', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

In [49]:
prediction_poly = classifier_linear.predict(test_vectors)
report = classification_report(y_test, prediction_poly, output_dict=True)
print(report)

{'0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 5036}, '1': {'precision': 0.4964, 'recall': 1.0, 'f1-score': 0.6634589681903235, 'support': 4964}, 'accuracy': 0.4964, 'macro avg': {'precision': 0.2482, 'recall': 0.5, 'f1-score': 0.33172948409516173, 'support': 10000}, 'weighted avg': {'precision': 0.24641296000000001, 'recall': 0.4964, 'f1-score': 0.3293410318096766, 'support': 10000}}


  'precision', 'predicted', average, warn_for)


In [50]:
classifier_rbf = svm.SVC(kernel='rbf')
classifier_rbf.fit(train_vectors, y_train)



SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='rbf', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

In [51]:
prediction_rbf = classifier_rbf.predict(test_vectors)
report = classification_report(y_test, prediction_rbf, output_dict=True)
print(report)

{'0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 5036}, '1': {'precision': 0.4964, 'recall': 1.0, 'f1-score': 0.6634589681903235, 'support': 4964}, 'accuracy': 0.4964, 'macro avg': {'precision': 0.2482, 'recall': 0.5, 'f1-score': 0.33172948409516173, 'support': 10000}, 'weighted avg': {'precision': 0.24641296000000001, 'recall': 0.4964, 'f1-score': 0.3293410318096766, 'support': 10000}}


  'precision', 'predicted', average, warn_for)


In [52]:
classifier_sigmoid = svm.SVC(kernel='sigmoid')
classifier_sigmoid.fit(train_vectors, y_train)



SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='sigmoid', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

In [9]:
data_test = pd.read_csv("/Users/varunnegandhi/Documents/Data Mining/dataset.csv", encoding='latin1')
data_test['SentimentText'] = data_test['SentimentText'].str.lower()
for i in range(len(data_test)):
    data_test['SentimentText'][i] = re.sub('<.*?>', '', data_test['SentimentText'][i])
    data_test['SentimentText'][i] = ''.join(ch for ch in data_test['SentimentText'][i] if ch not in punctuation)
data_test.head()
for n in range(110, 151, 10):
    print("Cost: ",n)
    print("Kernel: ",'linear')
    classifier_linear = svm.SVC(kernel='linear',C=n)
    classifier_linear.fit(train_vectors, y_train)
    prediction_linear = classifier_linear.predict(test_vectors)
    report = classification_report(y_test, prediction_linear, output_dict=True)
    print("Training Data:")
    print(report)
    print("Precision: ",report['weighted avg']['precision'])
    print("Recall: ",report['weighted avg']['recall'])
    print("F1-Score: ",report['weighted avg']['f1-score'])
    data_test_x = data_test['SentimentText']
    data_test_y = data_test['Sentiment']
    test1_vector = cv.transform(data_test_x)
    prediction_linear_test = classifier_linear.predict(test1_vector)
    report = classification_report(data_test_y, prediction_linear_test, output_dict=True)
    print("Test Data:")
    print(report)
    print("Precision: ",report['weighted avg']['precision'])
    print("Recall: ",report['weighted avg']['recall'])
    print("F1-Score: ",report['weighted avg']['f1-score'])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


Cost:  110
Kernel:  linear
Training Data:
{'0': {'precision': 0.8667883211678832, 'recall': 0.8488880063542494, 'f1-score': 0.857744783306581, 'support': 5036}, '1': {'precision': 0.8498421468034728, 'recall': 0.8676470588235294, 'f1-score': 0.8586523125996812, 'support': 4964}, 'accuracy': 0.8582, 'macro avg': {'precision': 0.858315233985678, 'recall': 0.8582675325888894, 'f1-score': 0.8581985479531311, 'support': 10000}, 'weighted avg': {'precision': 0.8583762402133898, 'recall': 0.8582, 'f1-score': 0.8581952808476759, 'support': 10000}}
Precision:  0.8583762402133898
Recall:  0.8582
F1-Score:  0.8581952808476759
Test Data:
{'0': {'precision': 0.9287862775384115, 'recall': 0.914, 'f1-score': 0.9213338171847909, 'support': 12500}, '1': {'precision': 0.9153476651704858, 'recall': 0.92992, 'f1-score': 0.9225762927100282, 'support': 12500}, 'accuracy': 0.92196, 'macro avg': {'precision': 0.9220669713544487, 'recall': 0.92196, 'f1-score': 0.9219550549474096, 'support': 25000}, 'weighted a

In [10]:
for n in range(110, 151, 10):
    for k in ['rbf','sigmoid']:        
        print("Cost: ",n)
        print("Kernel: ",k)
        classifier_linear = svm.SVC(kernel=k,C=n)
        classifier_linear.fit(train_vectors, y_train)
        prediction_linear = classifier_linear.predict(test_vectors)
        report = classification_report(y_test, prediction_linear, output_dict=True)
        print("Training Data:")
        print(report)
        print("Precision: ",report['weighted avg']['precision'])
        print("Recall: ",report['weighted avg']['recall'])
        print("F1-Score: ",report['weighted avg']['f1-score'])
        data_test_x = data_test['SentimentText']
        data_test_y = data_test['Sentiment']
        test1_vector = cv.transform(data_test_x)
        prediction_linear_test = classifier_linear.predict(test1_vector)
        report = classification_report(data_test_y, prediction_linear_test, output_dict=True)
        print("Test Data:")
        print(report)
        print("Precision: ",report['weighted avg']['precision'])
        print("Recall: ",report['weighted avg']['recall'])
        print("F1-Score: ",report['weighted avg']['f1-score'])

Cost:  110
Kernel:  rbf




Training Data:
{'0': {'precision': 0.8970745248772155, 'recall': 0.8341938046068308, 'f1-score': 0.8644922317110815, 'support': 5036}, '1': {'precision': 0.842956554447997, 'recall': 0.90290088638195, 'f1-score': 0.8718996206594689, 'support': 4964}, 'accuracy': 0.8683, 'macro avg': {'precision': 0.8700155396626063, 'recall': 0.8685473454943904, 'f1-score': 0.8681959261852752, 'support': 10000}, 'weighted avg': {'precision': 0.8702103643561513, 'recall': 0.8683, 'f1-score': 0.8681692595850611, 'support': 10000}}
Precision:  0.8702103643561513
Recall:  0.8683
F1-Score:  0.8681692595850611
Test Data:
{'0': {'precision': 0.9115186042371373, 'recall': 0.8192, 'f1-score': 0.862897109631752, 'support': 12500}, '1': {'precision': 0.8358274008426558, 'recall': 0.92048, 'f1-score': 0.8761136069443387, 'support': 12500}, 'accuracy': 0.86984, 'macro avg': {'precision': 0.8736730025398965, 'recall': 0.86984, 'f1-score': 0.8695053582880454, 'support': 25000}, 'weighted avg': {'precision': 0.8736730



Training Data:
{'0': {'precision': 0.8950548221078541, 'recall': 0.7942811755361397, 'f1-score': 0.8416622830089426, 'support': 5036}, '1': {'precision': 0.8126920990779244, 'recall': 0.9055197421434327, 'f1-score': 0.8565983801810385, 'support': 4964}, 'accuracy': 0.8495, 'macro avg': {'precision': 0.8538734605928893, 'recall': 0.8499004588397863, 'f1-score': 0.8491303315949905, 'support': 10000}, 'weighted avg': {'precision': 0.854169966395797, 'recall': 0.8495, 'f1-score': 0.849076561645171, 'support': 10000}}
Precision:  0.854169966395797
Recall:  0.8495
F1-Score:  0.849076561645171
Test Data:
{'0': {'precision': 0.9113757854262403, 'recall': 0.77744, 'f1-score': 0.8390968354703622, 'support': 12500}, '1': {'precision': 0.8059566157494594, 'recall': 0.9244, 'f1-score': 0.8611245668293773, 'support': 12500}, 'accuracy': 0.85092, 'macro avg': {'precision': 0.8586662005878498, 'recall': 0.85092, 'f1-score': 0.8501107011498698, 'support': 25000}, 'weighted avg': {'precision': 0.8586662



Training Data:
{'0': {'precision': 0.897933091838909, 'recall': 0.8367752184273233, 'f1-score': 0.8662760818172475, 'support': 5036}, '1': {'precision': 0.8451102317693612, 'recall': 0.9035052377115229, 'f1-score': 0.8733326842566449, 'support': 4964}, 'accuracy': 0.8699, 'macro avg': {'precision': 0.871521661804135, 'recall': 0.870140228069423, 'f1-score': 0.8698043830369462, 'support': 10000}, 'weighted avg': {'precision': 0.8717118241003855, 'recall': 0.8699, 'f1-score': 0.8697789792681644, 'support': 10000}}
Precision:  0.8717118241003855
Recall:  0.8699
F1-Score:  0.8697789792681644
Test Data:
{'0': {'precision': 0.9125877231944568, 'recall': 0.82184, 'f1-score': 0.8648398366797154, 'support': 12500}, '1': {'precision': 0.8379538674234156, 'recall': 0.92128, 'f1-score': 0.8776435620927486, 'support': 12500}, 'accuracy': 0.87156, 'macro avg': {'precision': 0.8752707953089363, 'recall': 0.87156, 'f1-score': 0.871241699386232, 'support': 25000}, 'weighted avg': {'precision': 0.875270



Training Data:
{'0': {'precision': 0.8965747330960854, 'recall': 0.8004368546465449, 'f1-score': 0.8457826269408308, 'support': 5036}, '1': {'precision': 0.817405523255814, 'recall': 0.9063255439161966, 'f1-score': 0.8595720290408865, 'support': 4964}, 'accuracy': 0.853, 'macro avg': {'precision': 0.8569901281759497, 'recall': 0.8533811992813707, 'f1-score': 0.8526773279908586, 'support': 10000}, 'weighted avg': {'precision': 0.8572751373313746, 'recall': 0.853, 'f1-score': 0.8526276861432983, 'support': 10000}}
Precision:  0.8572751373313746
Recall:  0.853
F1-Score:  0.8526276861432983
Test Data:
{'0': {'precision': 0.9118057493720346, 'recall': 0.78408, 'f1-score': 0.8431330379801281, 'support': 12500}, '1': {'precision': 0.8106097817697003, 'recall': 0.92416, 'f1-score': 0.863668647901013, 'support': 12500}, 'accuracy': 0.85412, 'macro avg': {'precision': 0.8612077655708674, 'recall': 0.85412, 'f1-score': 0.8534008429405706, 'support': 25000}, 'weighted avg': {'precision': 0.8612077



Training Data:
{'0': {'precision': 0.8975993201614616, 'recall': 0.8389594916600477, 'f1-score': 0.8672893359334907, 'support': 5036}, '1': {'precision': 0.846778764405819, 'recall': 0.90290088638195, 'f1-score': 0.8739397484644633, 'support': 4964}, 'accuracy': 0.8707, 'macro avg': {'precision': 0.8721890422836402, 'recall': 0.8709301890209988, 'f1-score': 0.870614542198977, 'support': 10000}, 'weighted avg': {'precision': 0.8723719962843607, 'recall': 0.8707, 'f1-score': 0.8705906007138655, 'support': 10000}}
Precision:  0.8723719962843607
Recall:  0.8707
F1-Score:  0.8705906007138655
Test Data:
{'0': {'precision': 0.9116867043847242, 'recall': 0.82504, 'f1-score': 0.8662019150008399, 'support': 12500}, '1': {'precision': 0.8402250146113384, 'recall': 0.92008, 'f1-score': 0.8783412249885444, 'support': 12500}, 'accuracy': 0.87256, 'macro avg': {'precision': 0.8759558594980312, 'recall': 0.87256, 'f1-score': 0.8722715699946921, 'support': 25000}, 'weighted avg': {'precision': 0.875955



Training Data:
{'0': {'precision': 0.8966659306690219, 'recall': 0.8063939634630659, 'f1-score': 0.8491374803972818, 'support': 5036}, '1': {'precision': 0.8217876073843904, 'recall': 0.9057211925866236, 'f1-score': 0.8617153809295638, 'support': 4964}, 'accuracy': 0.8557, 'macro avg': {'precision': 0.8592267690267061, 'recall': 0.8560575780248447, 'f1-score': 0.8554264306634227, 'support': 10000}, 'weighted avg': {'precision': 0.8594963309905308, 'recall': 0.8557, 'f1-score': 0.8553811502215067, 'support': 10000}}
Precision:  0.8594963309905308
Recall:  0.8557
F1-Score:  0.8553811502215067
Test Data:
{'0': {'precision': 0.9111111111111111, 'recall': 0.79048, 'f1-score': 0.846519597344185, 'support': 12500}, '1': {'precision': 0.8149770399152243, 'recall': 0.92288, 'f1-score': 0.8655786906771713, 'support': 12500}, 'accuracy': 0.85668, 'macro avg': {'precision': 0.8630440755131676, 'recall': 0.85668, 'f1-score': 0.8560491440106781, 'support': 25000}, 'weighted avg': {'precision': 0.863



Training Data:
{'0': {'precision': 0.8973327688399662, 'recall': 0.8417394757744241, 'f1-score': 0.8686475409836065, 'support': 5036}, '1': {'precision': 0.8489385898407885, 'recall': 0.9022965350523772, 'f1-score': 0.8748046875, 'support': 4964}, 'accuracy': 0.8718, 'macro avg': {'precision': 0.8731356793403773, 'recall': 0.8720180054134006, 'f1-score': 0.8717261142418032, 'support': 10000}, 'weighted avg': {'precision': 0.8733098983847744, 'recall': 0.8718, 'f1-score': 0.8717039485143443, 'support': 10000}}
Precision:  0.8733098983847744
Recall:  0.8718
F1-Score:  0.8717039485143443
Test Data:
{'0': {'precision': 0.9120028159098909, 'recall': 0.82912, 'f1-score': 0.8685886691250418, 'support': 12500}, '1': {'precision': 0.8433558228219419, 'recall': 0.92, 'f1-score': 0.8800122436486073, 'support': 12500}, 'accuracy': 0.87456, 'macro avg': {'precision': 0.8776793193659164, 'recall': 0.87456, 'f1-score': 0.8743004563868246, 'support': 25000}, 'weighted avg': {'precision': 0.87767931936



Training Data:
{'0': {'precision': 0.8956597983340641, 'recall': 0.8113582208101668, 'f1-score': 0.8514273807043136, 'support': 5036}, '1': {'precision': 0.8253034203751379, 'recall': 0.9041095890410958, 'f1-score': 0.8629109786579504, 'support': 4964}, 'accuracy': 0.8574, 'macro avg': {'precision': 0.860481609354601, 'recall': 0.8577339049256314, 'f1-score': 0.857169179681132, 'support': 10000}, 'weighted avg': {'precision': 0.8607348923152531, 'recall': 0.8574, 'f1-score': 0.8571278387284991, 'support': 10000}}
Precision:  0.8607348923152531
Recall:  0.8574
F1-Score:  0.8571278387284991
Test Data:
{'0': {'precision': 0.9111845123405817, 'recall': 0.79448, 'f1-score': 0.8488396940040174, 'support': 12500}, '1': {'precision': 0.8178143394085526, 'recall': 0.92256, 'f1-score': 0.8670350738694035, 'support': 12500}, 'accuracy': 0.85852, 'macro avg': {'precision': 0.8644994258745672, 'recall': 0.85852, 'f1-score': 0.8579373839367104, 'support': 25000}, 'weighted avg': {'precision': 0.8644



Training Data:
{'0': {'precision': 0.8972515856236787, 'recall': 0.8427323272438443, 'f1-score': 0.8691378251075158, 'support': 5036}, '1': {'precision': 0.8497153700189753, 'recall': 0.9020950846091862, 'f1-score': 0.8751221418800078, 'support': 4964}, 'accuracy': 0.8722, 'macro avg': {'precision': 0.873483477821327, 'recall': 0.8724137059265152, 'f1-score': 0.8721299834937618, 'support': 10000}, 'weighted avg': {'precision': 0.8736546081975041, 'recall': 0.8722, 'f1-score': 0.8721084399533807, 'support': 10000}}
Precision:  0.8736546081975041
Recall:  0.8722
F1-Score:  0.8721084399533807
Test Data:
{'0': {'precision': 0.9118575688475706, 'recall': 0.83176, 'f1-score': 0.869969040247678, 'support': 12500}, '1': {'precision': 0.8453449036623033, 'recall': 0.9196, 'f1-score': 0.8809104145911565, 'support': 12500}, 'accuracy': 0.87568, 'macro avg': {'precision': 0.8786012362549369, 'recall': 0.87568, 'f1-score': 0.8754397274194172, 'support': 25000}, 'weighted avg': {'precision': 0.87860



Training Data:
{'0': {'precision': 0.8949314770502501, 'recall': 0.8169181890389198, 'f1-score': 0.8541472023253399, 'support': 5036}, '1': {'precision': 0.8293540625578383, 'recall': 0.9026994359387591, 'f1-score': 0.8644738111314748, 'support': 4964}, 'accuracy': 0.8595, 'macro avg': {'precision': 0.8621427698040443, 'recall': 0.8598088124888394, 'f1-score': 0.8593105067284074, 'support': 10000}, 'weighted avg': {'precision': 0.8623788484962169, 'recall': 0.8595, 'f1-score': 0.8592733309367053, 'support': 10000}}
Precision:  0.8623788484962169
Recall:  0.8595
F1-Score:  0.8592733309367053
Test Data:
{'0': {'precision': 0.9114559561843907, 'recall': 0.7988, 'f1-score': 0.8514176081858879, 'support': 12500}, '1': {'precision': 0.8209327162691349, 'recall': 0.9224, 'f1-score': 0.8687135053682425, 'support': 12500}, 'accuracy': 0.8606, 'macro avg': {'precision': 0.8661943362267628, 'recall': 0.8606, 'f1-score': 0.8600655567770652, 'support': 25000}, 'weighted avg': {'precision': 0.866194