# Test Data (Model and Evaluate)

In [1]:
import pandas as pd
import numpy as np

In [2]:
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression

In [3]:
import pickle

trained_model = pickle.load(open('models/trained_model.pickle', 'rb'))
trained_model_filtered = pickle.load(open('models/trained_model_filtered.pickle', 'rb'))

In [4]:
combined_test = pd.read_csv('datasets/combined_test.csv')

In [5]:
combined_test.drop(columns = 'Unnamed: 0', inplace = True)
combined_test.head()

Unnamed: 0,cleaned_text,subreddit
0,samsung buy back guranteed buy back program sa...,0
1,kinda neat til little gboard trick select word...,0
2,bose qc ii promotion product documentation hel...,0
3,smartphone camera expect,0
4,pixel teardown alive kinda,0


In [6]:
X_test = combined_test['cleaned_text']
y_test = combined_test['subreddit']

In [7]:
trained_model.score(X_test, y_test)

0.944

In [8]:
trained_model_filtered.score(X_test, y_test)

0.888

### Model Score

We imported the trained model, as well as the model trained on data with brand and hardware names removed.  
The first model worked better as expected, but the second model did quite well too with an 88.8% accuracy score.

In [9]:
preds = trained_model.predict(X_test)

In [10]:
preds[:10]

array([0, 1, 0, 0, 0, 1, 0, 1, 0, 0])

In [11]:
confusion_matrix(y_test, preds)

array([[230,  20],
       [  8, 242]])

In [12]:
tn, fp, fn, tp = confusion_matrix(y_test, preds).ravel()

print("True Negatives: %s" % tn)
print("False Positives: %s" % fp)
print("False Negatives: %s" % fn)
print("True Positives: %s" % tp)

True Negatives: 230
False Positives: 20
False Negatives: 8
True Positives: 242


In [13]:
combined_test['preds'] = preds

In [14]:
misclassified = (combined_test['preds'] != combined_test['subreddit'])

In [15]:
misclass_sample = combined_test[misclassified].sample(5)
misclass_sample

Unnamed: 0,cleaned_text,subreddit,preds
54,spotify publicly testing version story,0,1
1,kinda neat til little gboard trick select word...,0,1
287,sqrd new app generates colorful qr code photo ...,1,0
148,various conflicting opinion best practice main...,0,1
269,message displayed home screen carrier google k...,1,0
