In [None]:
import pickle
from nltk.classify import ClassifierI
from statistics import mode
import pandas as pd
import numpy as np
import re
import string
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer

stop_words = set(stopwords.words('english'))

def load_dataset(filename, cols):
    dataset = pd.read_csv(filename, encoding='latin-1')
    dataset.columns = cols
    return dataset

def remove_unwanted_cols(dataset, cols):
    for col in cols:
        del dataset[col]
    return dataset


def preprocess_tweet_text(tweet):
    tweet.lower()
    # Remove urls
    tweet = re.sub(r"http\S+|www\S+|https\S+", '', tweet, flags=re.MULTILINE)
    # Remove user @ references and '#' from tweet
    tweet = re.sub(r'\@\w+|\#','', tweet)
    # Remove punctuations
    tweet = tweet.translate(str.maketrans('', '', string.punctuation))
    # Remove stopwords
    tweet_tokens = word_tokenize(tweet)
    filtered_words = [w for w in tweet_tokens if not w in stop_words]
    
    return " ".join(filtered_words)


def get_feature_vector(train_fit):
    vector = TfidfVectorizer(sublinear_tf=True)
    vector.fit(train_fit)
    return vector


def int_to_string(sentiment):
    if sentiment == 0:
        return "Negative"
    elif sentiment == 2:
        return "Neutral"
    else:
        return "Positive"
    
open_file = open("models/tf_vector.pickle","rb")
tf_vector = pickle.load(open_file)
open_file.close()

open_file = open("models/NB_model.pickle","rb")
NB_model = pickle.load(open_file)
open_file.close()

open_file = open("models/BNB_model.pickle","rb")
BNB_model = pickle.load(open_file)
open_file.close()

open_file = open("models/LR_model.pickle","rb")
LR_model = pickle.load(open_file)
open_file.close()

open_file = open("models/LSVC_model.pickle","rb")
LSVC_model = pickle.load(open_file)
open_file.close()


class VoteClassifier(ClassifierI):
    def __init__(self, *classifiers):
        self._classifiers = classifiers

    def predict(self, text):
        votes = []
        
        text=preprocess_tweet_text(text)
        test_feature = tf_vector.transform(np.array(text).ravel())
        
        for c in self._classifiers:
            v = c.predict(test_feature)
            if v[0]==0:
                votes.append('neg')
            else:
                votes.append('pos')
        return mode(votes)

    def confidence(self, text):
        votes = []
        text=preprocess_tweet_text(text)
        test_feature = tf_vector.transform(np.array(text).ravel())
        for c in self._classifiers:
            v = c.predict(test_feature)
            if v[0]==0:
                votes.append('neg')
            else:
                votes.append('pos')

        choice_votes = votes.count(mode(votes))
        conf = choice_votes / len(votes)
        return conf


voted_classifier = VoteClassifier(
                                  NB_model,LR_model,BNB_model,LSVC_model)

def sentiment(text):
    
    return voted_classifier.predict(text),voted_classifier.confidence(text)

