# **Classifier #1**
Train: IMDB   
Test: Amazon

Atulya Shetty and Payton Walker

## **Build Classifier:**

In [0]:
pip install flair # must install flair package if first execution

In [0]:
from flair.data import Sentence
from flair.models import TextClassifier

# create sentiment classifier (provided by Flair)
classifier = TextClassifier.load('en-sentiment') #English-sentiment model pre-trained with IMDB movie reviews.

## **Import Test Data:**

In [0]:
import pandas as pd
import numpy as np

# Read in Amazon test data
input_data = pd.read_csv('data/S1/test.csv',low_memory=False)

# Extract text column
test_attr = input_data[['reviewText']]

# Extract label column
test_label = input_data[['overall']]

In [0]:
# Extract text and label values
X = test_attr.values
Y = test_label.values

# Prepare list of TRUE labels
actual = np.array(test_label)
#actual

## **Make Predictions:**

In [0]:
from flair.data import Sentence

predicted = []
pred = []

# make predictions for each review in test set and save in predictions array
for review in X:
  s = Sentence(review[0])
  p = classifier.predict(s)
  predicted.append(s.labels[0].value)

In [0]:
# map 0, 1 values to POS, NEG predictions
pred = list(map(lambda x: 1 if x == "POSITIVE" else 0, predicted))

pred = np.array(pred)
pred

array([0, 0, 0, ..., 1, 0, 0])

## **Performance Evaluation:**

In [0]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# generate and print performance statistics
accuracy = accuracy_score(actual, pred) * 100
precision = precision_score(actual, pred) * 100
recall = recall_score(actual, pred) * 100
f1 = f1_score(actual, pred)

print ('Accuracy is {:.4f}'.format(accuracy))
print('Precision is {:.4f}'.format(precision))
print('Recall is {:.4f}'.format(recall))
print('F1 Score is {:.4f}'.format(f1))

Accuracy is 67.9600
Precision is 69.6156
Recall is 63.7400
F1 Score is 0.6655
