# Evaluating Calliope (v 0.1.2)
This notebook allows us to evaluate Calliope and output relevant metrics for documentation.

In [1]:
import string
import random
import nltk
import pickle5 as pickle
import pandas as pd

df = pd.read_csv('../data/combined_ms.csv')
data = df.to_numpy()

word_string = ''
documents = []

for point in data:
    curr_tuple = (point[0].translate(str.maketrans('', '', string.punctuation)).split(), point[1])
    documents.append(curr_tuple)
    word_string = word_string + point[0].translate(str.maketrans('', '', string.punctuation))

random.shuffle(documents)
# print(documents[0])

all_words = nltk.FreqDist(w.lower() for w in word_string.split())

word_features = list(all_words)[:5]

def document_features(document):
    document_words = set(document)
    features = {}
    for word in word_features:
        features['contains({})'.format(word)] = (word in document_words)
    return features

# Load pickled model:
calliope = pickle.load(open('../calliope.pickle', 'rb'))

In [None]:
# Ask for text input to classify:
test_banner = input("Input browser banner text: ")

print("'"+ test_banner + "' is " + calliope.classify(document_features(test_banner.split())))

## Validation
Testing using data that was part of the training data set.

In [12]:
# Creating validation set

print("Validation set:")
val_samples = random.sample(documents, 20)
print(*val_samples, sep = "\n")

Validation set:
(['Motor', 'Sport', 'uses', 'cookies', 'in', 'order', 'to', 'deliver', 'a', 'personalised', 'responsive', 'service', 'and', 'to', 'improve', 'the', 'site', 'We', 'remember', 'and', 'store', 'information', 'about', 'how', 'you', 'use', 'it', 'using', 'simple', 'text', 'files', 'called', 'cookies', 'which', 'sit', 'on', 'your', 'computer', 'which', 'are', 'completely', 'safe', 'and', 'secure', 'How', 'to', 'manage', 'cookies', 'Read', 'our', 'Privacy', 'Policy', 'to', 'give', 'you', 'more', 'detailed', 'information', 'about', 'your', 'rights', 'and', 'how', 'we', 'use', 'your', 'data'], 'GOOD')
(['This', 'site', 'uses', 'cookies', 'to', 'ensure', 'that', 'we', 'deliver', 'you', 'the', 'best', 'user', 'experience', 'By', 'continuing', 'to', 'browse', 'the', 'site', 'you', 'are', 'agreeing', 'to', 'our', 'use', 'of', 'cookies', 'For', 'more', 'information', 'please', 'see', 'our', 'COOKIE', 'POLICY'], 'BAD')
(['Like', 'most', 'websites', 'Channel', '4', 'uses', 'cookies', '

In [16]:
# Testing with validation set

from nltk.metrics import ConfusionMatrix

val_set = [(document_features(d), c) for (d, c) in val_samples]
print("ACCURACY (Validation):",(nltk.classify.accuracy(calliope, val_set))*100)
print("\nCONFUSION MATRIX (Validation):")
print(ConfusionMatrix([c for (d, c) in val_set], [calliope.classify(d) for (d, c) in val_set]))

ACCURACY (Validation): 90.0

CONFUSION MATRIX (Validation):
     |   G |
     | B O |
     | A O |
     | D D |
-----+-----+
 BAD |<9>1 |
GOOD | 1<9>|
-----+-----+
(row = reference; col = test)



## Testing

Testing using data not from the training data set.

In [19]:
# Creating test set

test_samples = [("Rappler uses cookies to customize user experience. By continued use, you agree to our privacy policy and accept our use of such cookies.", "GOOD"),
("We use cookies to ensure you get the best browsing experience. By continued use, you agree to our privacy policy and accept our use of such cookies. For further information, find out more.", "GOOD"),
("We use cookies to ensure you get the best experience on our website. By continuing, you are agreeing to our use of cookies. To find out more, please click this link.", "GOOD"),
("This site uses cookies. By continuing to browse the site, you are agreeing to our use of cookies.", "BAD"),
("This website uses cookies. By continuing to browse the website, you are agreeing to our use of cookies. Read More.", "BAD"),
("Welcome, Kapamilya! We use cookies to improve your browsing experience. Continuing to use this site means you agree to our use of cookies. Tell me more!", "GOOD"),
("We use cookies to ensure you get the best experience on PEP.ph. By continued use, you agree to our privacy policy and accept our use of such cookies. Find out more here.", "GOOD"),
("We use cookies to ensure we give you the best experience on our website.", "BAD"),
("Just so you know, we share cookies BeautyMNl uses cookies to give you the best shopping experience. Read on for our Privacy Policy", "BAD"),
("The IKEA website uses cookies, which make the site simpler to use. Find out more about browser cookies.", "BAD")]

word_string = ''
test_docs = []

for point in test_samples:
    curr_tuple = (point[0].translate(str.maketrans('', '', string.punctuation)).split(), point[1])
    test_docs.append(curr_tuple)
    word_string = word_string + point[0].translate(str.maketrans('', '', string.punctuation))

print("Testing set:")
print(*test_docs, sep = "\n")

Testing set:
(['Rappler', 'uses', 'cookies', 'to', 'customize', 'user', 'experience', 'By', 'continued', 'use', 'you', 'agree', 'to', 'our', 'privacy', 'policy', 'and', 'accept', 'our', 'use', 'of', 'such', 'cookies'], 'GOOD')
(['We', 'use', 'cookies', 'to', 'ensure', 'you', 'get', 'the', 'best', 'browsing', 'experience', 'By', 'continued', 'use', 'you', 'agree', 'to', 'our', 'privacy', 'policy', 'and', 'accept', 'our', 'use', 'of', 'such', 'cookies', 'For', 'further', 'information', 'find', 'out', 'more'], 'GOOD')
(['We', 'use', 'cookies', 'to', 'ensure', 'you', 'get', 'the', 'best', 'experience', 'on', 'our', 'website', 'By', 'continuing', 'you', 'are', 'agreeing', 'to', 'our', 'use', 'of', 'cookies', 'To', 'find', 'out', 'more', 'please', 'click', 'this', 'link'], 'GOOD')
(['This', 'site', 'uses', 'cookies', 'By', 'continuing', 'to', 'browse', 'the', 'site', 'you', 'are', 'agreeing', 'to', 'our', 'use', 'of', 'cookies'], 'BAD')
(['This', 'website', 'uses', 'cookies', 'By', 'continui

In [20]:
# Testing with test set

from nltk.metrics import ConfusionMatrix

test_set = [(document_features(d), c) for (d, c) in test_docs]
print("ACCURACY (Testing):",(nltk.classify.accuracy(calliope, test_set))*100)
print("\nCONFUSION MATRIX (Testing):")
print(ConfusionMatrix([c for (d, c) in test_set], [calliope.classify(d) for (d, c) in test_set]))

ACCURACY (Testing): 80.0

CONFUSION MATRIX (Testing):
     |   G |
     | B O |
     | A O |
     | D D |
-----+-----+
 BAD |<5>. |
GOOD | 2<3>|
-----+-----+
(row = reference; col = test)

