# Myers-Briggs Type Indicator demo

Intructions: Execute the two cells below to load the functions and then enter text in space provided to estimate the MBTI personality

**Note: This notebook requires SpaCy and IPython widgets to be installed**

In [None]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
import re
import pickle
import numpy as np
from ipywidgets import widgets, interact

import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use("ggplot")

# python -m spacy download en_core_web_sm
nlp = spacy.load("en_core_web_sm")


def tokeniser(sentence):
 
    # Remove ||| from kaggle dataset
    sentence = re.sub("[]|||[]", " ", sentence)

    # remove reddit subreddit urls
    sentence = re.sub("/r/[0-9A-Za-z]", "", sentence)

    # remove MBTI types
    MBTI_types = ['INFJ', 'ENTP', 'INTP', 'INTJ', 'ENTJ', 'ENFJ', 'INFP', 'ENFP',
              'ISFP', 'ISTP', 'ISFJ', 'ISTJ', 'ESTP', 'ESFP', 'ESTJ', 'ESFJ',
              'MBTI']
    MBTI_types = [ti.lower() for ti in MBTI_types] + [ti.lower() + 's' for ti in MBTI_types]

    tokens = nlp(sentence)

    tokens = [ti for ti in tokens if ti.lower_ not in STOP_WORDS]
    tokens = [ti for ti in tokens if not ti.is_space]
    tokens = [ti for ti in tokens if not ti.is_punct]
    tokens = [ti for ti in tokens if not ti.like_num]
    tokens = [ti for ti in tokens if not ti.like_url]
    tokens = [ti for ti in tokens if not ti.like_email]
    tokens = [ti for ti in tokens if ti.lower_ not in MBTI_types]


    # lemmatize
    tokens = [ti.lemma_ for ti in tokens if ti.lemma_ not in STOP_WORDS]
    tokens = [ti for ti in tokens if len(ti) > 1]

    return tokens

dummy_fn = lambda x:x


with open('./pickle files/cv.pickle', 'rb') as f:
    cv = pickle.load(f)
    
with open('./pickle files/idf_transformer.pickle', 'rb') as f:
    idf_transformer = pickle.load(f)
    
# loading the pickle files with the classifiers
with open('./pickle files/LR_clf_IE_kaggle.pickle', 'rb') as f:
    lr_ie = pickle.load(f)
with open('./pickle files/LR_clf_JP_kaggle.pickle', 'rb') as f:
    lr_jp = pickle.load(f)
with open('./pickle files/LR_clf_NS_kaggle.pickle', 'rb') as f:
    lr_ns = pickle.load(f)
with open('./pickle files/LR_clf_TF_kaggle.pickle', 'rb') as f:
    lr_tf = pickle.load(f)


def eval_string(my_post, show_graph=False):
    c = cv.transform([tokeniser(my_post)])
    x = idf_transformer.transform(c)
    
    ie = lr_ie.predict_proba(x).flatten()
    ns = lr_ns.predict_proba(x).flatten()
    tf = lr_tf.predict_proba(x).flatten()
    jp = lr_jp.predict_proba(x).flatten()
    
    probs = np.vstack([ie, ns, tf, jp])
    
    names = ["Introversion - Extroversion", 
             "Intuiting - Sensing", 
             "Thinking - Feeling", 
             "Judging - Perceiving"]
    
    for i, dim in enumerate(names):
        print(f"{dim:28s}: {probs[i,1]:.3f} - {probs[i, 0]:.3f}")
        
    if show_graph:
        fig = plt.figure(figsize=(6,6))
        ax = fig.gca()
        
        xlabels = ["Introversion (I)", "Intuiting (N)", "Thinking (T)", "Judging (J)"]
        ax.barh(xlabels, [1, 1, 1, 1])
        ax.barh(xlabels, [ie[1], ns[1], tf[1], jp[1]])
        
        ax.set_xlim([0, 1])
        ax.set_xlabel("Propensity")
        
        plt.show(fig)

# Type in some text

In [None]:
interact(eval_string, my_post=widgets.Textarea( value='', 
                                               placeholder='Enter in some text', 
                                               description='Input:',
                                               disabled=False)
        )
