In [1]:
import pickle
from sklearn.feature_extraction.text import CountVectorizer
import plotly.express as px
import pandas as pd
import re
import plotly.io as pio
pio.renderers.default = 'notebook'

In [2]:
cEXT = pickle.load( open( "data/models/cEXT.p", "rb"))
cNEU = pickle.load( open( "data/models/cNEU.p", "rb"))
cAGR = pickle.load( open( "data/models/cAGR.p", "rb"))
cCON = pickle.load( open( "data/models/cCON.p", "rb"))
cOPN = pickle.load( open( "data/models/cOPN.p", "rb"))
vectorizer_31 = pickle.load( open( "data/models/vectorizer_31.p", "rb"))
vectorizer_30 = pickle.load( open( "data/models/vectorizer_30.p", "rb"))

In [3]:
# def predict_personality(text):
#     scentences = re.split("(?<=[.!?]) +", text)
#     text_vector_31 = vectorizer_31.transform(scentences)
#     text_vector_30 = vectorizer_30.transform(scentences)
#     EXT = cEXT.predict(text_vector_31)
#     NEU = cNEU.predict(text_vector_30)
#     AGR = cAGR.predict(text_vector_31)
#     CON = cCON.predict(text_vector_31)
#     OPN = cOPN.predict(text_vector_31)
#     return [EXT[0], NEU[0], AGR[0], CON[0], OPN[0]]


In [4]:
# 输出为bigefive的置信度
def predict_personality(text):
    # Split text into sentences
    sentences = re.split("(?<=[.!?]) +", Extraversion)
    
    # Transform sentences into feature vectors
    text_vector_31 = vectorizer_31.transform(sentences)
    text_vector_30 = vectorizer_30.transform(sentences)
    
    # Predict probabilities for each personality dimension
    prob_EXT = cEXT.predict_proba(text_vector_31)  # Predict probabilities for EXT
    prob_NEU = cNEU.predict_proba(text_vector_30)  # Predict probabilities for NEU
    prob_AGR = cAGR.predict_proba(text_vector_31)  # Predict probabilities for AGR
    prob_CON = cCON.predict_proba(text_vector_31)  # Predict probabilities for CON
    prob_OPN = cOPN.predict_proba(text_vector_31)  # Predict probabilities for OPN
    
    # Extract the confidence scores (probability of the predicted class)
    confidence_EXT = prob_EXT.max()  # Confidence for EXT
    confidence_NEU = prob_NEU.max()  # Confidence for NEU
    confidence_AGR = prob_AGR.max()  # Confidence for AGR
    confidence_CON = prob_CON.max()  # Confidence for CON
    confidence_OPN = prob_OPN.max()  # Confidence for OPN
    
    return [confidence_EXT, confidence_NEU, confidence_AGR, confidence_CON, confidence_OPN]

In [5]:
text = 'It is important to note that each of the five personality factors represents a range between two extremes. For example, extraversion represents a continuum between extreme extraversion and extreme introversion. In the real world, most people lie somewhere in between the two polar ends of each dimension. These five categories are usually described as follows.'

In [6]:
Extraversion='I feel an overwhelming surge of excitement when surrounded by a crowd; my energy is contagious, and I thrive on the vibrant atmosphere of social gatherings.'

In [7]:
predictions = predict_personality(text)
print("predicted personality:", predictions)
df = pd.DataFrame(dict(r=predictions, theta=['EXT','NEU','AGR', 'CON', 'OPN']))
fig = px.line_polar(df, r='r', theta='theta', line_close=True)
# # fig.show()
# fig.write_image("personality_plot.png", format="png")

predicted personality: [0.5636255180008869, 0.58, 0.6629771396018606, 0.554767820751249, 0.7000641385621587]
