In [1]:
import pandas as pd
import numpy as np

import sklearn
from sklearn.cross_validation import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn import linear_model
from sklearn import tree
from sklearn import metrics
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import VotingClassifier

import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

import scipy
import string

import matplotlib.pyplot as plt
%matplotlib inline

<i><b>Read nonverbal and gender data</b></i>
<p>Peronality scores are divided into 2 levels (high vs. not high).</p>

In [2]:
ngdata = pd.read_csv("nvgd_bi.csv")
ngdata.head()

Unnamed: 0,vlogId,mean.pitch,sd.pitch,mean.conf.pitch,sd.conf.pitch,mean.loc.apeak,sd.loc.apeak,mean.num.apeak,mean.energy,sd.d.energy,time.speaking,num.turns,hogv.entropy,hogv.median,gender,Extr,Agr,Cons,Emot,Open
0,VLOG1,178.15,0.38358,1.2526,0.4544,0.018525,0.38232,4.169,0.061449,0.025597,0.60796,0.44839,7.026606,0.14787,1,1,1,1,1,2
1,VLOG3,239.32,0.36474,1.2205,0.41543,0.027022,0.75389,9.6661,0.002103,0.001229,0.51374,0.50013,4.006787,0.008571,2,1,1,1,2,1
2,VLOG5,173.5,0.47636,1.1678,0.50508,0.021466,0.64251,5.9906,0.003113,0.002611,0.70205,0.31675,7.016616,0.57479,1,2,2,2,2,2
3,VLOG6,201.28,0.27454,1.4996,0.40633,0.0295,1.0196,10.359,0.032137,0.014806,0.75993,0.29976,3.465855,0.008744,1,2,1,1,1,2
4,VLOG7,275.68,0.48758,1.0312,0.42298,0.017109,0.68817,5.0138,0.1286,0.04323,0.60069,0.34916,7.16026,0.285714,1,1,1,1,1,1


In [3]:
X = ngdata.columns[1:15]
ngdata_X = ngdata[X]
y = ngdata.columns[15:20]
ngdata_y = ngdata[y]

In [4]:
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split(
    ngdata_X, ngdata_y, train_size=0.7, random_state=0)

<h3>k-NN</h3>

In [5]:
knn = KNeighborsClassifier()
params ={'n_neighbors':list(range(1,15))}
model = sklearn.grid_search.GridSearchCV(knn, params, cv=10)
model.fit(X_train, y_train["Extr"]);
model.best_params_

{'n_neighbors': 13}

In [6]:
knn_Extr = KNeighborsClassifier(n_neighbors=13)
knn_Extr.fit(X_train, y_train["Extr"]) 
y_hat = knn_Extr.predict(X_test)
scores = sklearn.cross_validation.cross_val_score(knn_Extr,X_test, y_test["Extr"],cv=10)
print("Extraversion:")
print(metrics.classification_report(y_test["Extr"],y_hat))
print("Accuracy:", metrics.accuracy_score(y_test["Extr"], y_hat))
print("MSE:", metrics.mean_squared_error(y_test['Extr'],y_hat))

Extraversion:
             precision    recall  f1-score   support

          1       0.74      0.85      0.79        86
          2       0.46      0.31      0.37        36

avg / total       0.66      0.69      0.67       122

Accuracy: 0.688524590164
MSE: 0.311475409836


In [7]:
model.fit(X_train, y_train["Agr"]);
model.best_params_

{'n_neighbors': 10}

In [8]:
knn_Agr = KNeighborsClassifier(n_neighbors=10)
knn_Agr.fit(X_train, y_train["Agr"]) 
y_hat = knn_Agr.predict(X_test)
scores = sklearn.cross_validation.cross_val_score(knn_Extr,X_test, y_test["Agr"],cv=10)
print("Agreeableness:")
print(metrics.classification_report(y_test["Agr"],y_hat))
print("Accuracy:", metrics.accuracy_score(y_test["Agr"], y_hat))
print("MSE:", metrics.mean_squared_error(y_test['Agr'],y_hat))

Agreeableness:
             precision    recall  f1-score   support

          1       0.70      0.88      0.78        86
          2       0.23      0.08      0.12        36

avg / total       0.56      0.65      0.59       122

Accuracy: 0.647540983607
MSE: 0.352459016393


In [9]:
model.fit(X_train, y_train["Cons"]);
model.best_params_

{'n_neighbors': 12}

In [10]:
knn_Cons = KNeighborsClassifier(n_neighbors=12)
knn_Cons.fit(X_train, y_train["Cons"]) 
y_hat = knn_Cons.predict(X_test)
scores = sklearn.cross_validation.cross_val_score(knn_Extr,X_test, y_test["Cons"],cv=10)
print("Conscientiousness:")
print(metrics.classification_report(y_test["Cons"],y_hat))
print("Accuracy:", metrics.accuracy_score(y_test["Cons"], y_hat))
print("MSE:", metrics.mean_squared_error(y_test['Cons'],y_hat))

Conscientiousness:
             precision    recall  f1-score   support

          1       0.72      0.96      0.82        89
          2       0.00      0.00      0.00        33

avg / total       0.53      0.70      0.60       122

Accuracy: 0.696721311475
MSE: 0.303278688525


In [11]:
model.fit(X_train, y_train["Emot"]);
model.best_params_

{'n_neighbors': 6}

In [12]:
knn_Emot = KNeighborsClassifier(n_neighbors=6)
knn_Emot.fit(X_train, y_train["Emot"]) 
y_hat = knn_Emot.predict(X_test)
scores = sklearn.cross_validation.cross_val_score(knn_Extr,X_test, y_test["Emot"],cv=10)
print("Emotional Stability:")
print(metrics.classification_report(y_test["Emot"],y_hat))
print("Accuracy:", metrics.accuracy_score(y_test["Emot"], y_hat))
print("MSE:",metrics.mean_squared_error(y_test['Emot'],y_hat))

Emotional Stability:
             precision    recall  f1-score   support

          1       0.65      0.84      0.74        81
          2       0.28      0.12      0.17        41

avg / total       0.53      0.60      0.55       122

Accuracy: 0.598360655738
MSE: 0.401639344262


In [13]:
model.fit(X_train, y_train["Open"]);
model.best_params_

{'n_neighbors': 10}

In [14]:
knn_Open = KNeighborsClassifier(n_neighbors=10)
knn_Open.fit(X_train, y_train["Open"]) 
y_hat = knn_Open.predict(X_test)
scores = sklearn.cross_validation.cross_val_score(knn_Extr,X_test, y_test["Open"],cv=10)
print("Openness to Experience:")
print(metrics.classification_report(y_test["Open"],y_hat))
print("Accuracy:", metrics.accuracy_score(y_test["Open"], y_hat))
print("MSE:",metrics.mean_squared_error(y_test['Open'],y_hat))

Openness to Experience:
             precision    recall  f1-score   support

          1       0.74      0.99      0.85        90
          2       0.50      0.03      0.06        32

avg / total       0.68      0.74      0.64       122

Accuracy: 0.737704918033
MSE: 0.262295081967


<h3>SVM</h3>

In [15]:
svm_Extr=sklearn.svm.SVC(probability=True)
svm_Extr.fit(X_train, y_train["Extr"])
y_hat=svm_Extr.predict(X_test)
scores = sklearn.cross_validation.cross_val_score(svm_Extr,X_test, y_test["Extr"],cv=10)
print("Extraversion:")
print(metrics.classification_report(y_test["Extr"],y_hat))
print("Accuracy:",metrics.accuracy_score(y_test["Extr"], y_hat))
print("MSE:", metrics.mean_squared_error(y_test['Extr'],y_hat))

Extraversion:
             precision    recall  f1-score   support

          1       0.72      0.88      0.80        86
          2       0.41      0.19      0.26        36

avg / total       0.63      0.68      0.64       122

Accuracy: 0.680327868852
MSE: 0.319672131148


In [16]:
svm_Agr=sklearn.svm.SVC(probability=True)
svm_Agr.fit(X_train, y_train["Agr"])
y_hat=svm_Agr.predict(X_test)
scores = sklearn.cross_validation.cross_val_score(svm_Agr,X_test, y_test["Agr"],cv=5)
print("Agreeableness:")
print(metrics.classification_report(y_test["Agr"],y_hat))
print("Accuracy is:",metrics.accuracy_score(y_test["Agr"], y_hat))
print("MSE:", metrics.mean_squared_error(y_test['Agr'],y_hat))

Agreeableness:
             precision    recall  f1-score   support

          1       0.70      0.86      0.77        86
          2       0.25      0.11      0.15        36

avg / total       0.57      0.64      0.59       122

Accuracy is: 0.639344262295
MSE: 0.360655737705


In [17]:
svm_Cons=sklearn.svm.SVC(probability=True)
svm_Cons.fit(X_train, y_train["Cons"])
y_hat=svm_Cons.predict(X_test)
scores = sklearn.cross_validation.cross_val_score(svm_Cons,X_test, y_test["Cons"],cv=5)
print("Conscientiousness:")
print(metrics.classification_report(y_test["Cons"],y_hat))
print("Accuracy is:",metrics.accuracy_score(y_test["Cons"], y_hat))
print("MSE:", metrics.mean_squared_error(y_test['Cons'],y_hat))

Conscientiousness:
             precision    recall  f1-score   support

          1       0.73      0.99      0.84        89
          2       0.00      0.00      0.00        33

avg / total       0.53      0.72      0.61       122

Accuracy is: 0.72131147541
MSE: 0.27868852459


In [18]:
svm_Emot=sklearn.svm.SVC(probability=True)
svm_Emot.fit(X_train, y_train["Emot"])
y_hat=svm_Emot.predict(X_test)
scores = sklearn.cross_validation.cross_val_score(svm_Emot,X_test, y_test["Emot"],cv=5)
print("Emotional Stability:")
print(metrics.classification_report(y_test["Emot"],y_hat))
print("Accuracy is:",metrics.accuracy_score(y_test["Emot"], y_hat))
print("MSE:", metrics.mean_squared_error(y_test['Emot'],y_hat))

Emotional Stability:
             precision    recall  f1-score   support

          1       0.69      0.90      0.78        81
          2       0.50      0.20      0.28        41

avg / total       0.63      0.66      0.61       122

Accuracy is: 0.66393442623
MSE: 0.33606557377


In [19]:
svm_Open=sklearn.svm.SVC(probability=True)
svm_Open.fit(X_train, y_train["Open"])
y_hat=svm_Open.predict(X_test)
scores = sklearn.cross_validation.cross_val_score(svm_Open,X_test, y_test["Open"],cv=5)
print("Openness to Experience:")
print(metrics.classification_report(y_test["Open"],y_hat))
print("Accuracy is:",metrics.accuracy_score(y_test["Open"], y_hat))
print("MSE:",metrics.mean_squared_error(y_test['Open'],y_hat))

Openness to Experience:
             precision    recall  f1-score   support

          1       0.75      1.00      0.86        90
          2       1.00      0.06      0.12        32

avg / total       0.82      0.75      0.66       122

Accuracy is: 0.754098360656
MSE: 0.245901639344


<h3>Decision tree</h3>

In [20]:
tree_Extr = tree.DecisionTreeClassifier()
tree_Extr.fit(X_train, y_train["Extr"])
y_hat=tree_Extr.predict(X_test)
scores = sklearn.cross_validation.cross_val_score(tree_Extr,X_test, y_test["Extr"],cv=10)
print("Extraversion:")
print(metrics.classification_report(y_test["Extr"],y_hat))
print("Accuracy:",metrics.accuracy_score(y_test["Extr"], y_hat))
print("MSE:", metrics.mean_squared_error(y_test['Extr'],y_hat))

Extraversion:
             precision    recall  f1-score   support

          1       0.69      0.70      0.69        86
          2       0.26      0.25      0.25        36

avg / total       0.56      0.57      0.56       122

Accuracy: 0.565573770492
MSE: 0.434426229508


In [21]:
tree_Agr = tree.DecisionTreeClassifier()
tree_Agr.fit(X_train, y_train["Agr"])
y_hat=tree_Agr.predict(X_test)
scores = sklearn.cross_validation.cross_val_score(tree_Agr,X_test, y_test["Agr"],cv=10)
print("Agreeableness:")
print(metrics.classification_report(y_test["Agr"],y_hat))
print("Accuracy is:",metrics.accuracy_score(y_test["Agr"], y_hat))
print("MSE:", metrics.mean_squared_error(y_test['Agr'],y_hat))

Agreeableness:
             precision    recall  f1-score   support

          1       0.69      0.65      0.67        86
          2       0.27      0.31      0.29        36

avg / total       0.57      0.55      0.56       122

Accuracy is: 0.549180327869
MSE: 0.450819672131


In [22]:
tree_Cons = tree.DecisionTreeClassifier()
tree_Cons.fit(X_train, y_train["Cons"])
y_hat=tree_Cons.predict(X_test)
scores = sklearn.cross_validation.cross_val_score(tree_Cons,X_test, y_test["Cons"],cv=10)
print("Conscientiousness:")
print(metrics.classification_report(y_test["Cons"],y_hat))
print("Accuracy is:",metrics.accuracy_score(y_test["Cons"], y_hat))
print("MSE:", metrics.mean_squared_error(y_test['Cons'],y_hat))

Conscientiousness:
             precision    recall  f1-score   support

          1       0.74      0.79      0.76        89
          2       0.30      0.24      0.27        33

avg / total       0.62      0.64      0.63       122

Accuracy is: 0.639344262295
MSE: 0.360655737705


In [23]:
tree_Emot = tree.DecisionTreeClassifier()
tree_Emot.fit(X_train, y_train["Emot"])
y_hat=tree_Emot.predict(X_test)
scores = sklearn.cross_validation.cross_val_score(tree_Emot,X_test, y_test["Emot"],cv=10)
print("Emotional Stability:")
print(metrics.classification_report(y_test["Emot"],y_hat))
print("Accuracy is:",metrics.accuracy_score(y_test["Emot"], y_hat))
print("MSE:", metrics.mean_squared_error(y_test['Emot'],y_hat))

Emotional Stability:
             precision    recall  f1-score   support

          1       0.62      0.68      0.65        81
          2       0.24      0.20      0.21        41

avg / total       0.49      0.52      0.50       122

Accuracy is: 0.516393442623
MSE: 0.483606557377


In [24]:
tree_Open = tree.DecisionTreeClassifier()
tree_Open.fit(X_train, y_train["Open"])
y_hat=tree_Open.predict(X_test)
scores = sklearn.cross_validation.cross_val_score(tree_Open,X_test, y_test["Open"],cv=10)
print("Openness to Experience:")
print(metrics.classification_report(y_test["Open"],y_hat))
print("Accuracy is:",metrics.accuracy_score(y_test["Open"], y_hat))
print("MSE:",metrics.mean_squared_error(y_test['Open'],y_hat))

Openness to Experience:
             precision    recall  f1-score   support

          1       0.73      0.66      0.69        90
          2       0.24      0.31      0.27        32

avg / total       0.60      0.57      0.58       122

Accuracy is: 0.565573770492
MSE: 0.434426229508


<i><b>Read verbal data</b></i>

In [25]:
trans_all=[]
filename=("VLOG1.txt","VLOG3.txt","VLOG5.txt","VLOG6.txt","VLOG7.txt","VLOG8.txt","VLOG9.txt","VLOG10.txt","VLOG11.txt","VLOG12.txt","VLOG13.txt","VLOG14.txt","VLOG15.txt","VLOG16.txt","VLOG17.txt","VLOG18.txt","VLOG19.txt","VLOG21.txt","VLOG22.txt","VLOG23.txt","VLOG24.txt","VLOG25.txt","VLOG26.txt","VLOG27.txt","VLOG28.txt","VLOG29.txt","VLOG30.txt","VLOG31.txt","VLOG32.txt","VLOG33.txt","VLOG35.txt","VLOG36.txt","VLOG37.txt","VLOG38.txt","VLOG39.txt","VLOG42.txt","VLOG43.txt","VLOG44.txt","VLOG45.txt","VLOG46.txt","VLOG47.txt","VLOG48.txt","VLOG49.txt","VLOG50.txt","VLOG51.txt","VLOG52.txt","VLOG53.txt","VLOG54.txt","VLOG55.txt","VLOG56.txt","VLOG57.txt","VLOG58.txt","VLOG59.txt","VLOG60.txt","VLOG61.txt","VLOG62.txt","VLOG63.txt","VLOG64.txt","VLOG65.txt","VLOG66.txt","VLOG67.txt","VLOG68.txt","VLOG69.txt","VLOG70.txt","VLOG71.txt","VLOG72.txt","VLOG73.txt","VLOG74.txt","VLOG75.txt","VLOG76.txt","VLOG77.txt","VLOG78.txt","VLOG79.txt","VLOG80.txt","VLOG81.txt","VLOG83.txt","VLOG84.txt","VLOG86.txt","VLOG87.txt","VLOG88.txt","VLOG89.txt","VLOG90.txt","VLOG91.txt","VLOG92.txt","VLOG93.txt","VLOG94.txt","VLOG96.txt","VLOG97.txt","VLOG98.txt","VLOG99.txt","VLOG100.txt","VLOG102.txt","VLOG103.txt","VLOG104.txt","VLOG105.txt","VLOG106.txt","VLOG107.txt","VLOG108.txt","VLOG109.txt","VLOG110.txt","VLOG111.txt","VLOG112.txt","VLOG113.txt","VLOG114.txt","VLOG116.txt","VLOG117.txt","VLOG118.txt","VLOG119.txt","VLOG120.txt","VLOG122.txt","VLOG123.txt","VLOG124.txt","VLOG125.txt","VLOG126.txt","VLOG127.txt","VLOG129.txt","VLOG130.txt","VLOG131.txt","VLOG132.txt","VLOG133.txt","VLOG134.txt","VLOG135.txt","VLOG136.txt","VLOG137.txt","VLOG138.txt","VLOG140.txt","VLOG141.txt","VLOG142.txt","VLOG143.txt","VLOG144.txt","VLOG145.txt","VLOG146.txt","VLOG147.txt","VLOG148.txt","VLOG149.txt","VLOG150.txt","VLOG151.txt","VLOG152.txt","VLOG153.txt","VLOG154.txt","VLOG155.txt","VLOG156.txt","VLOG158.txt","VLOG159.txt","VLOG160.txt","VLOG161.txt","VLOG162.txt","VLOG163.txt","VLOG164.txt","VLOG165.txt","VLOG166.txt","VLOG167.txt","VLOG168.txt","VLOG169.txt","VLOG170.txt","VLOG171.txt","VLOG172.txt","VLOG173.txt","VLOG174.txt","VLOG175.txt","VLOG176.txt","VLOG178.txt","VLOG179.txt","VLOG180.txt","VLOG181.txt","VLOG182.txt","VLOG183.txt","VLOG184.txt","VLOG186.txt","VLOG187.txt","VLOG189.txt","VLOG190.txt","VLOG191.txt","VLOG192.txt","VLOG193.txt","VLOG194.txt","VLOG195.txt","VLOG196.txt","VLOG197.txt","VLOG198.txt","VLOG199.txt","VLOG201.txt","VLOG202.txt","VLOG204.txt","VLOG205.txt","VLOG207.txt","VLOG209.txt","VLOG210.txt","VLOG212.txt","VLOG213.txt","VLOG214.txt","VLOG216.txt","VLOG217.txt","VLOG218.txt","VLOG219.txt","VLOG220.txt","VLOG221.txt","VLOG222.txt","VLOG223.txt","VLOG224.txt","VLOG225.txt","VLOG226.txt","VLOG227.txt","VLOG228.txt","VLOG229.txt","VLOG232.txt","VLOG233.txt","VLOG234.txt","VLOG235.txt","VLOG236.txt","VLOG237.txt","VLOG239.txt","VLOG241.txt","VLOG242.txt","VLOG243.txt","VLOG244.txt","VLOG246.txt","VLOG247.txt","VLOG248.txt","VLOG249.txt","VLOG250.txt","VLOG251.txt","VLOG252.txt","VLOG253.txt","VLOG254.txt","VLOG255.txt","VLOG256.txt","VLOG257.txt","VLOG259.txt","VLOG260.txt","VLOG261.txt","VLOG262.txt","VLOG263.txt","VLOG264.txt","VLOG265.txt","VLOG266.txt","VLOG267.txt","VLOG268.txt","VLOG269.txt","VLOG270.txt","VLOG271.txt","VLOG272.txt","VLOG273.txt","VLOG274.txt","VLOG275.txt","VLOG276.txt","VLOG277.txt","VLOG278.txt","VLOG279.txt","VLOG280.txt","VLOG281.txt","VLOG282.txt","VLOG283.txt","VLOG284.txt","VLOG285.txt","VLOG286.txt","VLOG287.txt","VLOG288.txt","VLOG289.txt","VLOG290.txt","VLOG291.txt","VLOG292.txt","VLOG293.txt","VLOG294.txt","VLOG295.txt","VLOG296.txt","VLOG297.txt","VLOG298.txt","VLOG299.txt","VLOG300.txt","VLOG301.txt","VLOG302.txt","VLOG303.txt","VLOG304.txt","VLOG305.txt","VLOG306.txt","VLOG307.txt","VLOG308.txt","VLOG309.txt","VLOG310.txt","VLOG311.txt","VLOG312.txt","VLOG313.txt","VLOG314.txt","VLOG315.txt","VLOG317.txt","VLOG318.txt","VLOG319.txt","VLOG320.txt","VLOG321.txt","VLOG322.txt","VLOG323.txt","VLOG324.txt","VLOG325.txt","VLOG326.txt","VLOG327.txt","VLOG328.txt","VLOG329.txt","VLOG330.txt","VLOG331.txt","VLOG332.txt","VLOG333.txt","VLOG334.txt","VLOG335.txt","VLOG336.txt","VLOG337.txt","VLOG338.txt","VLOG339.txt","VLOG340.txt","VLOG341.txt","VLOG343.txt","VLOG344.txt","VLOG345.txt","VLOG346.txt","VLOG347.txt","VLOG348.txt","VLOG350.txt","VLOG351.txt","VLOG352.txt","VLOG353.txt","VLOG355.txt","VLOG357.txt","VLOG358.txt","VLOG359.txt","VLOG360.txt","VLOG361.txt","VLOG363.txt","VLOG364.txt","VLOG365.txt","VLOG366.txt","VLOG367.txt","VLOG368.txt","VLOG369.txt","VLOG370.txt","VLOG371.txt","VLOG372.txt","VLOG373.txt","VLOG374.txt","VLOG375.txt","VLOG376.txt","VLOG377.txt","VLOG378.txt","VLOG379.txt","VLOG380.txt","VLOG381.txt","VLOG382.txt","VLOG384.txt","VLOG385.txt","VLOG386.txt","VLOG387.txt","VLOG388.txt","VLOG389.txt","VLOG390.txt","VLOG391.txt","VLOG392.txt","VLOG393.txt","VLOG394.txt","VLOG395.txt","VLOG396.txt","VLOG397.txt","VLOG398.txt","VLOG399.txt","VLOG400.txt","VLOG401.txt","VLOG402.txt","VLOG403.txt","VLOG404.txt","VLOG405.txt","VLOG406.txt","VLOG407.txt","VLOG408.txt","VLOG409.txt","VLOG410.txt","VLOG411.txt","VLOG412.txt","VLOG413.txt","VLOG414.txt","VLOG415.txt","VLOG416.txt","VLOG417.txt","VLOG418.txt","VLOG419.txt","VLOG420.txt","VLOG421.txt","VLOG422.txt","VLOG423.txt","VLOG424.txt","VLOG425.txt","VLOG426.txt","VLOG427.txt","VLOG428.txt","VLOG430.txt","VLOG431.txt","VLOG432.txt","VLOG433.txt","VLOG434.txt","VLOG435.txt","VLOG436.txt","VLOG437.txt","VLOG438.txt","VLOG439.txt","VLOG440.txt","VLOG441.txt","VLOG442.txt"
)
for file in filename:
    trans = open(file).read()
    trans_all.append(trans)
len(trans_all)

404

In [26]:
X_verbtrain, X_verbtest, y_verbtrain, y_verbtest = sklearn.cross_validation.train_test_split(
    trans_all, ngdata_y, train_size=0.7, random_state=0)

<h3>Verbal-NB</h3>

In [27]:
vectorizer = CountVectorizer(stop_words="english")
# fit the vectorizer to the training documents
X_train_vec = vectorizer.fit_transform(X_verbtrain)
X_test_vec = vectorizer.transform(X_verbtest)

In [28]:
model_Extr = MultinomialNB()
model_Extr.fit(X_train_vec, y_verbtrain["Extr"])
y_hat = model_Extr.predict(X_test_vec)
print("Extraversion: ", metrics.classification_report(y_verbtest["Extr"], y_hat))
print("accuracy is: ", metrics.accuracy_score(y_verbtest["Extr"], y_hat))
print("MSE:", metrics.mean_squared_error(y_test['Extr'],y_hat))

Extraversion:               precision    recall  f1-score   support

          1       0.74      0.81      0.77        86
          2       0.41      0.31      0.35        36

avg / total       0.64      0.66      0.65       122

accuracy is:  0.66393442623
MSE: 0.33606557377


In [29]:
model_Agr = MultinomialNB()
model_Agr.fit(X_train_vec, y_train["Agr"])
y_hat = model_Agr.predict(X_test_vec)
print("classification: ", metrics.classification_report(y_verbtest["Agr"], y_hat))
print("accuracy is: ", metrics.accuracy_score(y_verbtest["Agr"], y_hat))
print("MSE:", metrics.mean_squared_error(y_test['Agr'],y_hat))

classification:               precision    recall  f1-score   support

          1       0.72      0.94      0.82        86
          2       0.50      0.14      0.22        36

avg / total       0.66      0.70      0.64       122

accuracy is:  0.704918032787
MSE: 0.295081967213


In [30]:
model_Cons = MultinomialNB()
model_Cons.fit(X_train_vec, y_train["Cons"])
y_hat = model_Cons.predict(X_test_vec)
print("classification: ", metrics.classification_report(y_verbtest["Cons"], y_hat))
print("accuracy is: ", metrics.accuracy_score(y_verbtest["Cons"], y_hat))
print("MSE:", metrics.mean_squared_error(y_test['Cons'],y_hat))

classification:               precision    recall  f1-score   support

          1       0.75      0.96      0.84        89
          2       0.50      0.12      0.20        33

avg / total       0.68      0.73      0.66       122

accuracy is:  0.729508196721
MSE: 0.270491803279


In [31]:
model_Emot = MultinomialNB()
model_Emot.fit(X_train_vec, y_train["Emot"])
y_hat = model_Emot.predict(X_test_vec)
print("classification: ", metrics.classification_report(y_verbtest["Emot"], y_hat))
print("accuracy is: ", metrics.accuracy_score(y_verbtest["Emot"], y_hat))
print("MSE:", metrics.mean_squared_error(y_test['Emot'],y_hat))

classification:               precision    recall  f1-score   support

          1       0.67      0.86      0.75        81
          2       0.35      0.15      0.21        41

avg / total       0.56      0.62      0.57       122

accuracy is:  0.622950819672
MSE: 0.377049180328


In [32]:
model_Open = MultinomialNB()
model_Open.fit(X_train_vec, y_train["Open"])
y_hat = model_Open.predict(X_test_vec)
print("classification: ", metrics.classification_report(y_verbtest["Open"], y_hat))
print("accuracy is: ", metrics.accuracy_score(y_verbtest["Open"], y_hat))
print("MSE:",metrics.mean_squared_error(y_test['Open'],y_hat))

classification:               precision    recall  f1-score   support

          1       0.74      0.92      0.82        90
          2       0.30      0.09      0.14        32

avg / total       0.63      0.70      0.64       122

accuracy is:  0.704918032787
MSE: 0.295081967213


<h3>Voting Classifier</h3>

In [33]:
import scipy

In [34]:
ensemble_Extr = VotingClassifier(estimators=[('dt', tree_Extr), ('knn', knn_Extr), ('svm', svm_Extr),('nb',model_Extr)],
                                 voting='soft', weights=[1,1,1,1])
X_comtrain = scipy.sparse.hstack((X_train_vec, X_train))
X_comtest=scipy.sparse.hstack((X_test_vec,X_test))
ensemble_Extr.fit(X_comtrain,y_train["Extr"])
y_hat = ensemble_Extr.predict(X_comtest)
print("Extraversion: ", metrics.classification_report(y_test["Extr"], y_hat))
print("accuracy is: ", metrics.accuracy_score(y_test["Extr"], y_hat))
print("MSE:",metrics.mean_squared_error(y_test['Extr'],y_hat))

Extraversion:               precision    recall  f1-score   support

          1       0.76      0.91      0.83        86
          2       0.58      0.31      0.40        36

avg / total       0.70      0.73      0.70       122

accuracy is:  0.729508196721
MSE: 0.270491803279


In [35]:
ensemble_Agr = VotingClassifier(estimators=[('dt', tree_Agr), ('knn', knn_Agr), ('svm', svm_Agr),('nb',model_Agr)],
                                 voting='soft')
X_comtrain = scipy.sparse.hstack((X_train_vec, X_train))
X_comtest=scipy.sparse.hstack((X_test_vec,X_test))
ensemble_Agr.fit(X_comtrain,y_train["Agr"])
y_hat = ensemble_Agr.predict(X_comtest)
print("Extraversion: ", metrics.classification_report(y_test["Agr"], y_hat))
print("accuracy is: ", metrics.accuracy_score(y_test["Agr"], y_hat))
print("MSE:",metrics.mean_squared_error(y_test['Agr'],y_hat))

Extraversion:               precision    recall  f1-score   support

          1       0.70      0.95      0.81        86
          2       0.20      0.03      0.05        36

avg / total       0.55      0.68      0.58       122

accuracy is:  0.680327868852
MSE: 0.319672131148


In [36]:
ensemble_Cons = VotingClassifier(estimators=[('dt', tree_Cons), ('knn', knn_Cons), ('svm', svm_Cons),('nb',model_Cons)],
                                 voting='soft')
X_comtrain = scipy.sparse.hstack((X_train_vec, X_train))
X_comtest=scipy.sparse.hstack((X_test_vec,X_test))
ensemble_Cons.fit(X_comtrain,y_train["Cons"])
y_hat = ensemble_Cons.predict(X_comtest)
print("Extraversion: ", metrics.classification_report(y_test["Cons"], y_hat))
print("accuracy is: ", metrics.accuracy_score(y_test["Cons"], y_hat))
print("MSE:",metrics.mean_squared_error(y_test['Cons'],y_hat))

Extraversion:               precision    recall  f1-score   support

          1       0.74      1.00      0.85        89
          2       1.00      0.06      0.11        33

avg / total       0.81      0.75      0.65       122

accuracy is:  0.745901639344
MSE: 0.254098360656


In [37]:
ensemble_Emot = VotingClassifier(estimators=[('dt', tree_Emot), ('knn', knn_Emot), ('svm', svm_Emot),('nb',model_Emot)],
                                 voting='soft')
X_comtrain = scipy.sparse.hstack((X_train_vec, X_train))
X_comtest=scipy.sparse.hstack((X_test_vec,X_test))
ensemble_Emot.fit(X_comtrain,y_train["Emot"])
y_hat = ensemble_Emot.predict(X_comtest)
print("Extraversion: ", metrics.classification_report(y_test["Emot"], y_hat))
print("accuracy is: ", metrics.accuracy_score(y_test["Emot"], y_hat))
print("MSE:",metrics.mean_squared_error(y_test['Emot'],y_hat))

Extraversion:               precision    recall  f1-score   support

          1       0.66      1.00      0.80        81
          2       0.00      0.00      0.00        41

avg / total       0.44      0.66      0.53       122

accuracy is:  0.66393442623
MSE: 0.33606557377


  'precision', 'predicted', average, warn_for)


In [38]:
ensemble_Open = VotingClassifier(estimators=[('dt', tree_Open), ('knn', knn_Open), ('svm', svm_Open),('nb',model_Open)],
                                 voting='soft')
X_comtrain = scipy.sparse.hstack((X_train_vec, X_train))
X_comtest=scipy.sparse.hstack((X_test_vec,X_test))
ensemble_Open.fit(X_comtrain,y_train["Emot"])
y_hat = ensemble_Open.predict(X_comtest)
print("Extraversion: ", metrics.classification_report(y_test["Open"], y_hat))
print("accuracy is: ", metrics.accuracy_score(y_test["Open"], y_hat))
print("MSE:",metrics.mean_squared_error(y_test['Open'],y_hat))

Extraversion:               precision    recall  f1-score   support

          1       0.73      0.98      0.84        90
          2       0.00      0.00      0.00        32

avg / total       0.54      0.72      0.62       122

accuracy is:  0.72131147541
MSE: 0.27868852459
