In [1]:
import pandas as pd
import joblib
from joblib import load
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.pipeline import make_pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
import re

from sklearn.naive_bayes import BernoulliNB #naive bayes
from sklearn import tree #decision tree
from sklearn import neural_network

In [2]:
df = pd.read_csv('training_data.csv')

In [None]:
df.describe()

In [3]:
df = df.dropna()
x = df['data'].values
y = df['label'].values


In [4]:
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=100, stratify=y, train_size = .8)

In [None]:
# naive bayes
bernoulli_nb_pipeline = make_pipeline(TfidfVectorizer(
        min_df = 2,
        ngram_range = (1, 2),
        #stop_words = 'english',
        max_features = 1150,
        strip_accents = 'unicode',
        norm = 'l2'
        ), BernoulliNB(alpha = .001))
bernoulli_nb_pipeline.fit(x_train, y_train)
y_test_pred = bernoulli_nb_pipeline.predict(x_test)
print(classification_report(y_test, y_test_pred))

In [7]:
# neural network
neural_network_pipeline = make_pipeline(TfidfVectorizer(
        min_df = 2,
        ngram_range = (1, 2),
        max_features = 700,
        strip_accents = 'unicode',
        norm = 'l2'
        ), neural_network.MLPClassifier(solver='adam',
                                        hidden_layer_sizes=(25,10,5),
                                        activation = 'logistic',
                                        max_iter = 2000,
                                       ))
neural_network_pipeline.fit(x_train, y_train)
y_test_pred = neural_network_pipeline.predict(x_test)
print(classification_report(y_test, y_test_pred))

              precision    recall  f1-score   support

         1.0       0.90      0.90      0.90        20
         2.0       0.96      0.96      0.96        48

    accuracy                           0.94        68
   macro avg       0.93      0.93      0.93        68
weighted avg       0.94      0.94      0.94        68



In [8]:
neural_network_pipeline.fit(x, y)
joblib.dump(neural_network_pipeline, "clf.joblib")
clf = load('clf.joblib')

In [None]:
# decision tree
decision_tree_pipeline = make_pipeline(TfidfVectorizer(
        min_df = 1,
        ngram_range = (1, 2),
        stop_words = 'english',
        max_features = 1150,
        strip_accents = 'unicode',
        norm = 'l2'
        ), tree.DecisionTreeClassifier())
decision_tree_pipeline.fit(x_train, y_train)
y_test_pred = decision_tree_pipeline.predict(x_test)
print(classification_report(y_test, y_test_pred))

In [None]:
bernoulli_nb_pipeline.fit(x, y)

In [None]:
joblib.dump(bernoulli_nb_pipeline, "clf.joblib")

In [None]:
clf = load('clf.joblib')

In [None]:
data_dict = {'text': "Service cancellation . Good afternoon . I&#39;m the new operator at Casa Mezcal . I would like to cancel our membership/service with Bevspot . We no longer use this app. Please confirm when it is done. I will block any"}
text = [data_dict['text']]
clf.predict(text)[0]

In [None]:
1 == 1.0