In [1]:
import pandas as pd
import joblib
from joblib import load
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.pipeline import make_pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
import re

from sklearn.naive_bayes import BernoulliNB #naive bayes
from sklearn import tree #decision tree
from sklearn import neural_network

In [5]:
df = pd.read_csv('training_data.csv')

In [3]:
df.describe()

Unnamed: 0,1
count,294.0
mean,1.673469
std,0.469743
min,1.0
25%,1.0
50%,2.0
75%,2.0
max,2.0


In [6]:
df = df.dropna()
x = df['data'].values
y = df['label'].values


In [10]:
df

Unnamed: 0,label,data
0,1,"Access- Ryan Hinz Aloha, May we have access fo..."
1,1,Account We spoke with someone a few weeks ago ...
2,1,"Addition to email Hey guys, I need to add chri..."
3,1,"Bevspot Access Aloha, Requesting access for Ve..."
4,1,"BevSpot Account Aloha, We are unable to access..."
...,...,...
290,2,Re: Francisco Fabara has invited you to BevSpo...
291,2,RE: HER FOOD -- How are you supporting Her? Yo...
292,2,Re: Your order for Chevalier Theatre: Beverage...
293,2,Re: Your order for HOBNOB- Halcyon: Beverage h...


In [40]:
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=1, stratify=y, train_size = .75)

In [41]:
# naive bayes
bernoulli_nb_pipeline = make_pipeline(TfidfVectorizer(
        min_df = 2,
        ngram_range = (1, 2),
        stop_words = 'english',
        max_features = 800,
        strip_accents = 'unicode',
        norm = 'l2'
        ), BernoulliNB(alpha = .00001))
bernoulli_nb_pipeline.fit(x_train, y_train)
y_test_pred = bernoulli_nb_pipeline.predict(x_test)
print(classification_report(y_test, y_test_pred))

              precision    recall  f1-score   support

           1       0.82      0.75      0.78        24
           2       0.88      0.92      0.90        50

    accuracy                           0.86        74
   macro avg       0.85      0.83      0.84        74
weighted avg       0.86      0.86      0.86        74



In [43]:
# neural network
neural_network_pipeline = make_pipeline(TfidfVectorizer(
        min_df = 2,
        ngram_range = (1, 2),
        #stop_words = 'english',
        max_features = 2000,
        strip_accents = 'unicode',
        norm = 'l2'
        ), neural_network.MLPClassifier(solver='adam',
                                        hidden_layer_sizes=(10,10,5),
                                        activation = 'logistic',
                                        max_iter = 2000,
                                       ))
neural_network_pipeline.fit(x_train, y_train)
y_test_pred = neural_network_pipeline.predict(x_test)
print(classification_report(y_test, y_test_pred))

              precision    recall  f1-score   support

           1       0.85      0.92      0.88        24
           2       0.96      0.92      0.94        50

    accuracy                           0.92        74
   macro avg       0.90      0.92      0.91        74
weighted avg       0.92      0.92      0.92        74



In [44]:
neural_network_pipeline.fit(x, y)
joblib.dump(neural_network_pipeline, "clf.joblib")
clf = load('clf.joblib')

In [None]:
# decision tree
decision_tree_pipeline = make_pipeline(TfidfVectorizer(
        min_df = 1,
        ngram_range = (1, 2),
        stop_words = 'english',
        max_features = 1150,
        strip_accents = 'unicode',
        norm = 'l2'
        ), tree.DecisionTreeClassifier())
decision_tree_pipeline.fit(x_train, y_train)
y_test_pred = decision_tree_pipeline.predict(x_test)
print(classification_report(y_test, y_test_pred))

In [None]:
bernoulli_nb_pipeline.fit(x, y)

In [None]:
joblib.dump(bernoulli_nb_pipeline, "clf.joblib")

In [None]:
clf = load('clf.joblib')

In [None]:
data_dict = {'text': "Service cancellation . Good afternoon . I&#39;m the new operator at Casa Mezcal . I would like to cancel our membership/service with Bevspot . We no longer use this app. Please confirm when it is done. I will block any"}
text = [data_dict['text']]
clf.predict(text)[0]

In [None]:
1 == 1.0