## Simple Classifiers

In [34]:
import pandas as pd
import numpy as np

In [3]:
# Read data from tsv into dataframe
trainData = pd.read_csv ('../data/LIAR-PLUS/dataset/tsv/train2.tsv', sep='\t', header=None)
testData  = pd.read_csv ('../data/LIAR-PLUS/dataset/tsv/test2.tsv', sep='\t', header=None)

# Add collumn names in the data.
columnNames = ['#', 'jsonIndex', 'truthValue', 'statement', 'theme', 'speaker', 'position', 'party', 'state','a1', 'a2', 'a3', 'a4', 'a5', 'location', 'explanation']
trainData.columns = columnNames
testData.columns = columnNames

In [4]:
# Remove unwanted columns.
def removeCols(dataFrame, colsList=[]):
    for colName in colsList:
        if colName in dataFrame.columns:
            del dataFrame[colName]

In [5]:
# Select and remove unwanted columns
unwantedCollumns = ['#','jsonIndex', 'theme', 'speaker', 'position', 'party', 'state','a1', 'a2', 'a3', 'a4', 'a5', 'location']

removeCols(trainData, unwantedCollumns)
removeCols(testData, unwantedCollumns)

# Remove NaN values and fix indexing.
trainData.dropna(inplace=True)
trainData.reset_index(drop=True, inplace=True)

testData.dropna(inplace=True)
testData.reset_index(drop=True, inplace=True)

In [6]:
# Transform truth values from six classes to two. 'True' and 'False'
def transformTruthValues(dataFrame, trueLabels=[], falseLabels=[]):
    
    trueList  = []
    falseList = []

    for index, row in dataFrame.iterrows():
        if row['truthValue'] in trueLabels:
            dataFrame.at[index, 'truthValue'] = 'true'
            trueList.append(dataFrame.iloc[index].values)
        else:
            dataFrame.at[index, 'truthValue'] = 'false'
            falseList.append(dataFrame.iloc[index].values)

    basicColumns = ['truthValue', 'statement', 'explanation']
    trueData  = pd.DataFrame(trueList,  columns=['truthValue', 'statement', 'explanation'])
    falseData = pd.DataFrame(falseList, columns=['truthValue', 'statement', 'explanation'])
    
    return trueData, falseData

In [7]:
# Transform truth values from six classes to two. 'True' and 'False'

trueLabels  = ['mostly-true', 'true']
falseLabels = ['pants-fire', 'false', 'barely-true', 'half-true']

trueTrainData, falseTrainData = transformTruthValues(trainData, trueLabels, falseLabels)
trueTestData,  falseTestData  = transformTruthValues(testData, trueLabels, falseLabels)

In [8]:
# Set pandas options show that dataframes appear not truncated.
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

# Select a small number of data.
selectedTrainData = falseTrainData[: 20]
selectedTestData  = falseTestData[: 20]

# Temporarily add explanationClass manually.
trainExplanationClass = ['unfounded', 'emphasis', 'exaggeration', 'emphasis', 'exaggeration', 
                    'unfounded', 'emphasis', 'unfounded', 'exaggeration', 'emphasis',
                    'unfounded', 'exaggeration', 'unfounded', 'exaggeration', 'exaggeration',
                    'distortion', 'distortion', 'unfounded', 'emphasis', 'distortion']
selectedTrainData['explanationClass'] = trainExplanationClass

testExplanationClass = ['unfounded', 'emphasis', 'exaggeration', 'emphasis', 'exaggeration', 
                    'unfounded', 'emphasis', 'unfounded', 'exaggeration', 'emphasis',
                    'unfounded', 'exaggeration', 'unfounded', 'exaggeration', 'exaggeration',
                    'distortion', 'distortion', 'unfounded', 'emphasis', 'distortion']
selectedTestData['explanationClass'] = testExplanationClass


display(selectedTrainData)
display(selectedTestData)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selectedTrainData['explanationClass'] = trainExplanationClass
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selectedTestData['explanationClass'] = testExplanationClass


Unnamed: 0,truthValue,statement,explanation,explanationClass
0,False,Says the Annies List political group supports third-trimester abortions on demand.,"That's a premise that he fails to back up. Annie's List makes no bones about being comfortable with candidates who oppose further restrictions on late-term abortions. Then again, this year its backing two House candidates who voted for more limits.",unfounded
1,False,When did the decline of coal start? It started when natural gas took off that started to begin in (President George W.) Bushs administration.,"Surovell said the decline of coal ""started when natural gas took off That started to begin in President (George W. ) Bushs administration. ""No doubt, natural gas has been gaining ground on coal in generating electricity. The trend started in the 1990s but clearly gained speed during the Bush administration when the production of natural gas -- a competitor of coal -- picked up. But analysts give little credit or blame to Bush for that trend. They note that other factors, such as technological innovation, entrepreneurship and policies of previous administrations, had more to do with laying the groundwork for the natural gas boom.",emphasis
2,False,Health care reform legislation is likely to mandate free sex change surgeries.,"The release may have a point that Mikulskis comment could open the door to ""medically necessary"" coverage which conceivably may include sex-change operations. But it's unclear whether her amendment will remain in the legislation, and there's nothing specific in the legislation on sex-change procedures and nothing else solid that indicates such coverage will be provided. The news release cherry-picked a few fleeting references to gender and sexual orientation in completely unrelated contexts to argue that proposed health care legislation would mandate free sex-change surgeries (and allow them for illegal aliens, no less).",exaggeration
3,False,The economic turnaround started at the end of my term.,"Crist said that the economic ""turnaround started at the end of my term. ""During Crists last year in office, Floridas economy experienced notable gains in personal income and industrial production, and more marginal improvements in the unemployment rate and in payroll employment. But GDP didnt grow again until Scott took office. Economists say Crist deserves some credit for the economic turnaround because he accepted federal stimulus dollars, but they add that any state is inevitably buffeted by national and international trends far beyond their control.",emphasis
4,False,Jim Dunnam has not lived in the district he represents for years now.,"But determining that would take significant detective work, far more than a few photos. A broader interpretation would allow for the possibility Dunnam hasnt lived exclusively in his House district for years, but instead flits between -- and lives in -- both houses.",exaggeration
5,False,"I'm the only person on this stage who has worked actively just last year passing, along with Russ Feingold, some of the toughest ethics reform since Watergate.","However, it was not that bill, but another one, sponsored by Majority Leader Harry Reid and introduced five days earlier on Jan. 4, 2007 that eventually became law. Obama was not a cosponsor, but he did successfully offer an amendment concerning lobbyist disclosure and the bill included some elements from the one he had introduced the previous year. Obama, as well as Biden, Clinton and Dodd all voted for it. In the House, Kucinich also voted for it.",unfounded
6,False,"However, it took $19.5 million in Oregon Lottery funds for the Port of Newport to eventually land the new NOAA Marine Operations Center-Pacific.",But Johnson is correct that many other factors played a role and for the Lottery to suggest that its money was the final piece is incomplete. The statement is partially accurate -- Lottery money made the lease competitive -- but it leaves out important details. Astoria dropped out; Newports bid was better on technical fronts than the others; legislative leaders pulled together to set aside the dollars and bonding authority.,emphasis
7,False,"Since 2000, nearly 12 million Americans have slipped out of the middle class and into poverty.","So where does this leave us?On Sanders side, he used a solid number, and hes clearly right about the underlying trend -- despite some ups and downs, there are more impoverished Americans today than there were in 2000, both measured by raw numbers and as a percentage of the population. But we think he goes too far when he suggests these 12 million Americans simply fell from the middle class into poverty. And he ignores the fact that as those 12 million Americans were slipping into poverty, many, many Americans were simultaneously climbing out of poverty.",unfounded
8,False,Most of the (Affordable Care Act) has already in some sense been waived or otherwise suspended.,"With all the talk about problems with the health care law, its easy to forget just how many corners of health insurance policy the law affects. Will seemed guilty of that on Fox News Sunday when he said most of the law has been waived or suspended. Obama has made at least two major course corrections in the law. He delayed the employer mandate by one year and allowed individuals dealing with canceled plans to buy insurance that fails to meet minimum standards under the law. One way to put these changes into context is to look at the number of people they affect. While precise data are missing, the combined impact could be in the range of 1 million Americans. On the other side of the ledger, 3. 1 million young adults have gained access to coverage through the law, another 18 million people are subject to the individual mandate and millions more have benefited from requirements that health insurance companies spend premiums on care.",exaggeration
9,False,"In this last election in November, ... 63 percent of the American people chose not to vote, ... 80 percent of young people, (and) 75 percent of low-income workers chose not to vote.","Sanders said that ""in this last election in November, . . . 63 percent of the American people chose not to vote, . . . 80 percent of young people, (and) 75 percent of low-income workers chose not to vote. ""Sanders was too loose with some of his numbers and his wording, but he has a point that rates of non-voting among Americans, and especially among younger and poorer Americans, are high.",emphasis


Unnamed: 0,truthValue,statement,explanation,explanationClass
0,False,Wisconsin is on pace to double the number of layoffs this year.,"She cited layoff notices received by the state. But those arent actual layoffs. In the time frame she cited the states added about 30,300 jobs.",unfounded
1,False,Says John McCain has done nothing to help the vets.,"Trump said that McCain ""has done nothing to help the vets. ""While many veterans groups have had their differences with McCain over the years over specific legislation and his general approach to veterans issues, thats not the same as saying hes done ""nothing"" for veterans. In fact, just within the past two years, McCain has sponsored and helped enact several major provisions to help veterans. He also devotes a significant portion of his office staff to offer veterans on casework.",emphasis
2,False,Suzanne Bonamici supports a plan that will cut choice for Medicare Advantage seniors.,"But spending still goes up. In addition, many outside factors can affect the cost and range of benefits, making it impossible to know how Medicare Advantage might change. While the statement from Cornilles is partially accurate, it is taken out of context and ignores important details on a politically volatile subject.",exaggeration
3,False,"When asked by a reporter whether hes at the center of a criminal scheme to violate campaign laws, Gov. Scott Walker nodded yes.","Our rating A Democratic Party web video making the rounds on social media shows a grim-faced Gov. Scott Walker appearing to bob his head yes to a reporters question about whether he was at the center of a ""criminal scheme"" to evade campaign finance laws. In real life, the governor answered an emphatic ""no"" -- not surprising given hes been denying any wrongdoing since new documents were released in the John Doe investigation.",emphasis
4,False,"Says Vice President Joe Biden ""admits that the American people are being scammed"" with the economic stimulus package.","Boehner may be technically correct that Biden mentioned people being scammed in the roundtable meeting. But Boehner incorrectly suggests the vice president called the stimulus a scam and he fails to note that Biden promised ""to expose abuses whenever they were detected,"" as the AP reported.",exaggeration
5,False,We know that more than half of Hillary Clintons meetings while she was secretary of state were given to major contributors to the Clinton Foundation.,"Pence said, ""We know that more than half of Hillary Clinton's meetings while she was secretary of state were given to major contributors of the Clinton Foundation. ""Pence inaccurately described an Associated Press report. The report found that of 154 meetings Clinton took with private individuals over about half of her time as secretary of state, 85 were with people who had donated to the foundation. But the analysis leaves out thousands of meetings Clinton took, including every time she met with employees of both U. S. and foreign governments. It also only covers part of her time as secretary of state.",unfounded
6,False,We know there are more Democrats in Georgia than Republicans. We know that for a fact.,"While more people voted Democrat in the 2008 primary, Republicans dominated in the 2010 statewide races, and more Georgians voted on the GOP ballot in July. The polling data says its close, but election results and the most recent makeup of the Georgia Legislature suggest there are more Georgians who consider themselves Republican.",emphasis
7,False,PolitiFact Texas says Congressman Edwards attacks on Bill Flores are false.,We informed Mackowiak of the date conflict. He later told us that the campaign had fixed the date and sent new copies of the ad to the outlets running the ads.,unfounded
8,False,Denali is the Kenyan word for black power.,"The word ""Denali"" doesnt show up in Swahili, one of Kenyas two national languages. Instead, its Koyukon Athabaskan for ""high"" or ""tall.",exaggeration
9,False,"Says 57 percent of federal spending goes to the military and just 1 percent goes to food and agriculture, including food stamps.","A social-media meme says that 57 percent of federal spending goes to the military and just 1 percent goes to food and agriculture, including food stamps. To get numbers that approximate this, the pie chart cherry-picks just discretionary spending. But that means the pie chart represents only about one-third of federal spending. Once you include the 60 percent of the budget that is mandatory spending, the military share plunges from 57 percent to 16 percent, and the categories that include Social Security, Medicare and Medicaid collectively account for a majority of federal spending. Spending on food and agriculture is still small, but it does quadruple from 1 percent to 4 percent. Due to its skewed methodology, the pie chart offers a deeply distorted picture of federal spending.",emphasis


#### Data preprocessing steps

<ul>
  <li>Remove statements with short explanations.(&lt;=5 sentences)</li>
  <li>Remove stop words</li>
  <li>Stem words</li>
</ul>

In [51]:
# Create a list containing statemens + explanations

trainStateExpl = []
yTrainLabels   = selectedTrainData['explanationClass'].tolist()
testStateExpl  = []
yTestLabels    = selectedTestData['explanationClass'].tolist()

for index, row in selectedTrainData.iterrows():
    trainStateExpl.append(row['statement'] + row['explanation'])

for index, row in selectedTestData.iterrows():
    testStateExpl.append(row['statement'] + row['explanation'])

totalStateExpl = trainStateExpl + testStateExpl
yTotalLabels = np.array(yTrainLabels + yTestLabels)

In [112]:
import nltk
from sklearn.neighbors import KNeighborsClassifier
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [31]:
# Convert words to number using Bag of Words (Bow)
vectorizer = CountVectorizer(max_features=30, min_df=5, max_df=0.7)
vectorizedTrain = vectorizer.fit_transform(trainStateExpl).toarray()
vectorizedTest  = vectorizer.fit_transform(testStateExpl).toarray()
vectorizedTotal = vectorizer.fit_transform(totalStateExpl).toarray()

# Convert Bow values according to TfIdf
tfidfconverter = TfidfTransformer()
XTrain = tfidfconverter.fit_transform(vectorizedTrain).toarray()
XTest = tfidfconverter.fit_transform(vectorizedTest).toarray()
XTotal = tfidfconverter.fit_transform(vectorizedTotal).toarray()

### Dummy Classifier

In [152]:
from sklearn.dummy import DummyClassifier

# Dummy Classifier With Method Stratified
dummy_clf = DummyClassifier(strategy="stratified")
dummy_clf.fit(XTrain, yTrainLabels)

yPredLabels = dummy_clf.predict(XTest)

# Evaluate Results
print(confusion_matrix(yTestLabels,yPredLabels))
print(classification_report(yTestLabels,yPredLabels))
print(accuracy_score(yTestLabels,yPredLabels))

[[1 0 0 2]
 [0 3 0 2]
 [1 2 0 3]
 [0 1 1 4]]
              precision    recall  f1-score   support

  distortion       0.50      0.33      0.40         3
    emphasis       0.50      0.60      0.55         5
exaggeration       0.00      0.00      0.00         6
   unfounded       0.36      0.67      0.47         6

    accuracy                           0.40        20
   macro avg       0.34      0.40      0.35        20
weighted avg       0.31      0.40      0.34        20

0.4


### Random Forest

In [12]:
from sklearn.ensemble import RandomForestClassifier

# Random Forest Classification Method
randForestClas = RandomForestClassifier(n_estimators=1000, random_state=0)
randForestClas.fit(XTrain,yTrainLabels)

yPredLabels = randForestClas.predict(XTest)

# Evaluate Results
print(confusion_matrix(yTestLabels,yPredLabels))
print(classification_report(yTestLabels,yPredLabels))
print(accuracy_score(yTestLabels,yPredLabels))

[[0 0 1 2]
 [0 3 1 1]
 [1 1 2 2]
 [0 2 2 2]]
              precision    recall  f1-score   support

  distortion       0.00      0.00      0.00         3
    emphasis       0.50      0.60      0.55         5
exaggeration       0.33      0.33      0.33         6
   unfounded       0.29      0.33      0.31         6

    accuracy                           0.35        20
   macro avg       0.28      0.32      0.30        20
weighted avg       0.31      0.35      0.33        20

0.35


In [116]:
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

numberOfSplits = 4
kf  = KFold(n_splits=numberOfSplits, shuffle=True, random_state=1)
skf = StratifiedKFold(n_splits=numberOfSplits, shuffle=True, random_state=1)

In [98]:
meanAccuracy = 0.0
for train, test in kf.split(XTotal):
    Xtrain, Xtest, Ytrain, Ytest = XTotal[train], XTotal[test], yTotalLabels[train], yTotalLabels[test]
    randForestClas.fit(Xtrain,Ytrain)
    
    yPred = randForestClas.predict(Xtest)
    meanAccuracy += accuracy_score(Ytest,yPred)

print("Mean Accuracy Score for Random Forest is: ", round((meanAccuracy / numberOfSplits),2))

Mean Accuracy Score for Random Forest is:  0.22


In [117]:
meanAccuracy = 0.0
for train, test in skf.split(XTotal,yTotalLabels):
    Xtrain, Xtest, Ytrain, Ytest = XTotal[train], XTotal[test], yTotalLabels[train], yTotalLabels[test]
    randForestClas.fit(Xtrain,Ytrain)
    
    yPred = randForestClas.predict(Xtest)
    meanAccuracy += accuracy_score(Ytest,yPred)

print("Mean Accuracy Score for Random Forest is: ", round((meanAccuracy / numberOfSplits),2))

Mean Accuracy Score for Random Forest is:  0.28


### K-Neighbors

In [72]:
from sklearn.neighbors import KNeighborsClassifier

# K-Neighbors Classifier
kNeighborsClas = KNeighborsClassifier(n_neighbors=3)
kNeighborsClas.fit(XTrain,yTrainLabels)

yPredLabels = kNeighborsClas.predict(XTest)

# Evaluate Results
print(confusion_matrix(yTestLabels,yPredLabels))
print(classification_report(yTestLabels,yPredLabels))
print(accuracy_score(yTestLabels,yPredLabels))

[[0 0 1 2]
 [1 0 1 3]
 [1 2 2 1]
 [2 1 1 2]]
              precision    recall  f1-score   support

  distortion       0.00      0.00      0.00         3
    emphasis       0.00      0.00      0.00         5
exaggeration       0.40      0.33      0.36         6
   unfounded       0.25      0.33      0.29         6

    accuracy                           0.20        20
   macro avg       0.16      0.17      0.16        20
weighted avg       0.20      0.20      0.19        20

0.2


In [99]:
meanAccuracy = 0.0
for train, test in kf.split(XTotal):
    Xtrain, Xtest, Ytrain, Ytest = XTotal[train], XTotal[test], yTotalLabels[train], yTotalLabels[test]
    kNeighborsClas.fit(Xtrain,Ytrain)
    
    yPred = kNeighborsClas.predict(Xtest)
    meanAccuracy += accuracy_score(Ytest,yPred)

print("Mean Accuracy Score for kNeighbors is: ", round((meanAccuracy / numberOfSplits),2))

Mean Accuracy Score for kNeighbors is:  0.22


In [118]:
meanAccuracy = 0.0
for train, test in skf.split(XTotal,yTotalLabels):
    Xtrain, Xtest, Ytrain, Ytest = XTotal[train], XTotal[test], yTotalLabels[train], yTotalLabels[test]
    kNeighborsClas.fit(Xtrain,Ytrain)
    
    yPred = kNeighborsClas.predict(Xtest)
    meanAccuracy += accuracy_score(Ytest,yPred)

print("Mean Accuracy Score for kNeighbors is: ", round((meanAccuracy / numberOfSplits),2))

Mean Accuracy Score for kNeighbors is:  0.28


### SVM

In [102]:
from sklearn.svm import SVC

# SVM Classifier
svmClas = SVC()
svmClas.fit(XTrain,yTrainLabels)

yPredLabels = svmClas.predict(XTest)

# Evaluate Results
print(confusion_matrix(yTestLabels,yPredLabels))
print(classification_report(yTestLabels,yPredLabels))
print(accuracy_score(yTestLabels,yPredLabels))

[[0 0 1 2]
 [0 0 1 4]
 [0 0 1 5]
 [0 0 2 4]]
              precision    recall  f1-score   support

  distortion       0.00      0.00      0.00         3
    emphasis       0.00      0.00      0.00         5
exaggeration       0.20      0.17      0.18         6
   unfounded       0.27      0.67      0.38         6

    accuracy                           0.25        20
   macro avg       0.12      0.21      0.14        20
weighted avg       0.14      0.25      0.17        20

0.25


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [103]:
meanAccuracy = 0.0
for train, test in kf.split(XTotal):
    Xtrain, Xtest, Ytrain, Ytest = XTotal[train], XTotal[test], yTotalLabels[train], yTotalLabels[test]
    svmClas.fit(Xtrain,Ytrain)
    
    yPred = svmClas.predict(Xtest)
    meanAccuracy += accuracy_score(Ytest,yPred)

print("Mean Accuracy Score for kNeighbors is: ", round((meanAccuracy / numberOfSplits),2))

Mean Accuracy Score for kNeighbors is:  0.22


In [119]:
meanAccuracy = 0.0
for train, test in skf.split(XTotal,yTotalLabels):
    Xtrain, Xtest, Ytrain, Ytest = XTotal[train], XTotal[test], yTotalLabels[train], yTotalLabels[test]
    svmClas.fit(Xtrain,Ytrain)
    
    yPred = svmClas.predict(Xtest)
    meanAccuracy += accuracy_score(Ytest,yPred)

print("Mean Accuracy Score for kNeighbors is: ", round((meanAccuracy / numberOfSplits),2))

Mean Accuracy Score for kNeighbors is:  0.38


### Gaussian Process

In [15]:
from sklearn.gaussian_process import GaussianProcessClassifier

# Gaussian Process Classifier
gaussianProcClas = GaussianProcessClassifier()
gaussianProcClas.fit(XTrain,yTrainLabels)

yPredLabels = gaussianProcClas.predict(XTest)

# Evaluate Results
print(confusion_matrix(yTestLabels,yPredLabels))
print(classification_report(yTestLabels,yPredLabels))
print(accuracy_score(yTestLabels,yPredLabels))

[[0 0 1 2]
 [0 0 1 4]
 [0 1 1 4]
 [0 0 2 4]]
              precision    recall  f1-score   support

  distortion       0.00      0.00      0.00         3
    emphasis       0.00      0.00      0.00         5
exaggeration       0.20      0.17      0.18         6
   unfounded       0.29      0.67      0.40         6

    accuracy                           0.25        20
   macro avg       0.12      0.21      0.15        20
weighted avg       0.15      0.25      0.17        20

0.25


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [107]:
meanAccuracy = 0.0
for train, test in kf.split(XTotal):
    Xtrain, Xtest, Ytrain, Ytest = XTotal[train], XTotal[test], yTotalLabels[train], yTotalLabels[test]
    gaussianProcClas.fit(Xtrain,Ytrain)
    
    yPred = gaussianProcClas.predict(Xtest)
    meanAccuracy += accuracy_score(Ytest,yPred)

print("Mean Accuracy Score for kNeighbors is: ", round((meanAccuracy / numberOfSplits),2))

Mean Accuracy Score for kNeighbors is:  0.22


In [120]:
meanAccuracy = 0.0
for train, test in skf.split(XTotal,yTotalLabels):
    Xtrain, Xtest, Ytrain, Ytest = XTotal[train], XTotal[test], yTotalLabels[train], yTotalLabels[test]
    gaussianProcClas.fit(Xtrain,Ytrain)
    
    yPred = gaussianProcClas.predict(Xtest)
    meanAccuracy += accuracy_score(Ytest,yPred)

print("Mean Accuracy Score for kNeighbors is: ", round((meanAccuracy / numberOfSplits),2))

Mean Accuracy Score for kNeighbors is:  0.43


### Decision Tree

In [16]:
from sklearn.tree import DecisionTreeClassifier

# Decision Tree Classifier
decisionTreeClas = DecisionTreeClassifier()
decisionTreeClas.fit(XTrain,yTrainLabels)

yPredLabels = decisionTreeClas.predict(XTest)

# Evaluate Results
print(confusion_matrix(yTestLabels,yPredLabels))
print(classification_report(yTestLabels,yPredLabels))
print(accuracy_score(yTestLabels,yPredLabels))

[[1 0 0 2]
 [1 2 1 1]
 [1 2 1 2]
 [2 1 1 2]]
              precision    recall  f1-score   support

  distortion       0.20      0.33      0.25         3
    emphasis       0.40      0.40      0.40         5
exaggeration       0.33      0.17      0.22         6
   unfounded       0.29      0.33      0.31         6

    accuracy                           0.30        20
   macro avg       0.30      0.31      0.29        20
weighted avg       0.32      0.30      0.30        20

0.3


In [108]:
meanAccuracy = 0.0
for train, test in kf.split(XTotal):
    Xtrain, Xtest, Ytrain, Ytest = XTotal[train], XTotal[test], yTotalLabels[train], yTotalLabels[test]
    decisionTreeClas.fit(Xtrain,Ytrain)
    
    yPred = decisionTreeClas.predict(Xtest)
    meanAccuracy += accuracy_score(Ytest,yPred)

print("Mean Accuracy Score for kNeighbors is: ", round((meanAccuracy / numberOfSplits),2))

Mean Accuracy Score for kNeighbors is:  0.2


In [121]:
meanAccuracy = 0.0
for train, test in skf.split(XTotal,yTotalLabels):
    Xtrain, Xtest, Ytrain, Ytest = XTotal[train], XTotal[test], yTotalLabels[train], yTotalLabels[test]
    decisionTreeClas.fit(Xtrain,Ytrain)
    
    yPred = decisionTreeClas.predict(Xtest)
    meanAccuracy += accuracy_score(Ytest,yPred)

print("Mean Accuracy Score for kNeighbors is: ", round((meanAccuracy / numberOfSplits),2))

Mean Accuracy Score for kNeighbors is:  0.2


### Ada Boost

In [17]:
from sklearn.ensemble import AdaBoostClassifier

# Ada Boost Classifier
adaBoostClas = AdaBoostClassifier()
adaBoostClas.fit(XTrain,yTrainLabels)

yPredLabels = adaBoostClas.predict(XTest)

# Evaluate Results
print(confusion_matrix(yTestLabels,yPredLabels))
print(classification_report(yTestLabels,yPredLabels))
print(accuracy_score(yTestLabels,yPredLabels))

[[1 0 0 2]
 [1 0 1 3]
 [1 0 1 4]
 [2 0 2 2]]
              precision    recall  f1-score   support

  distortion       0.20      0.33      0.25         3
    emphasis       0.00      0.00      0.00         5
exaggeration       0.25      0.17      0.20         6
   unfounded       0.18      0.33      0.24         6

    accuracy                           0.20        20
   macro avg       0.16      0.21      0.17        20
weighted avg       0.16      0.20      0.17        20

0.2


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [109]:
meanAccuracy = 0.0
for train, test in kf.split(XTotal):
    Xtrain, Xtest, Ytrain, Ytest = XTotal[train], XTotal[test], yTotalLabels[train], yTotalLabels[test]
    adaBoostClas.fit(Xtrain,Ytrain)
    
    yPred = adaBoostClas.predict(Xtest)
    meanAccuracy += accuracy_score(Ytest,yPred)

print("Mean Accuracy Score for kNeighbors is: ", round((meanAccuracy / numberOfSplits),2))

Mean Accuracy Score for kNeighbors is:  0.22


In [122]:
meanAccuracy = 0.0
for train, test in skf.split(XTotal,yTotalLabels):
    Xtrain, Xtest, Ytrain, Ytest = XTotal[train], XTotal[test], yTotalLabels[train], yTotalLabels[test]
    adaBoostClas.fit(Xtrain,Ytrain)
    
    yPred = adaBoostClas.predict(Xtest)
    meanAccuracy += accuracy_score(Ytest,yPred)

print("Mean Accuracy Score for kNeighbors is: ", round((meanAccuracy / numberOfSplits),2))

Mean Accuracy Score for kNeighbors is:  0.18


### Naive Bayes

In [18]:
from sklearn.naive_bayes import GaussianNB

# Naive Bayes Classifier
gaussianNBClas = GaussianNB()
gaussianNBClas.fit(XTrain,yTrainLabels)

yPredLabels = gaussianNBClas.predict(XTest)

# Evaluate Results
print(confusion_matrix(yTestLabels,yPredLabels))
print(classification_report(yTestLabels,yPredLabels))
print(accuracy_score(yTestLabels,yPredLabels))

[[0 1 2 0]
 [0 2 1 2]
 [0 2 2 2]
 [0 1 2 3]]
              precision    recall  f1-score   support

  distortion       0.00      0.00      0.00         3
    emphasis       0.33      0.40      0.36         5
exaggeration       0.29      0.33      0.31         6
   unfounded       0.43      0.50      0.46         6

    accuracy                           0.35        20
   macro avg       0.26      0.31      0.28        20
weighted avg       0.30      0.35      0.32        20

0.35


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [110]:
meanAccuracy = 0.0
for train, test in kf.split(XTotal):
    Xtrain, Xtest, Ytrain, Ytest = XTotal[train], XTotal[test], yTotalLabels[train], yTotalLabels[test]
    gaussianNBClas.fit(Xtrain,Ytrain)
    
    yPred = gaussianNBClas.predict(Xtest)
    meanAccuracy += accuracy_score(Ytest,yPred)

print("Mean Accuracy Score for kNeighbors is: ", round((meanAccuracy / numberOfSplits),2))

Mean Accuracy Score for kNeighbors is:  0.3


In [123]:
meanAccuracy = 0.0
for train, test in skf.split(XTotal,yTotalLabels):
    Xtrain, Xtest, Ytrain, Ytest = XTotal[train], XTotal[test], yTotalLabels[train], yTotalLabels[test]
    gaussianNBClas.fit(Xtrain,Ytrain)
    
    yPred = gaussianNBClas.predict(Xtest)
    meanAccuracy += accuracy_score(Ytest,yPred)

print("Mean Accuracy Score for kNeighbors is: ", round((meanAccuracy / numberOfSplits),2))

Mean Accuracy Score for kNeighbors is:  0.35


### Quadratic Discriminant Analysis

In [19]:
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

# Quadratic Discriminant Analysis
quadrDAClas = QuadraticDiscriminantAnalysis()
quadrDAClas.fit(XTrain,yTrainLabels)

yPredLabels = quadrDAClas.predict(XTest)

# Evaluate Results
print(confusion_matrix(yTestLabels,yPredLabels))
print(classification_report(yTestLabels,yPredLabels))
print(accuracy_score(yTestLabels,yPredLabels))

[[1 0 0 2]
 [0 0 2 3]
 [0 0 3 3]
 [0 1 1 4]]
              precision    recall  f1-score   support

  distortion       1.00      0.33      0.50         3
    emphasis       0.00      0.00      0.00         5
exaggeration       0.50      0.50      0.50         6
   unfounded       0.33      0.67      0.44         6

    accuracy                           0.40        20
   macro avg       0.46      0.38      0.36        20
weighted avg       0.40      0.40      0.36        20

0.4




In [111]:
meanAccuracy = 0.0
for train, test in kf.split(XTotal):
    Xtrain, Xtest, Ytrain, Ytest = XTotal[train], XTotal[test], yTotalLabels[train], yTotalLabels[test]
    quadrDAClas.fit(Xtrain,Ytrain)
    
    yPred = quadrDAClas.predict(Xtest)
    meanAccuracy += accuracy_score(Ytest,yPred)

print("Mean Accuracy Score for kNeighbors is: ", round((meanAccuracy / numberOfSplits),2))

Mean Accuracy Score for kNeighbors is:  0.25




In [124]:
meanAccuracy = 0.0
for train, test in skf.split(XTotal,yTotalLabels):
    Xtrain, Xtest, Ytrain, Ytest = XTotal[train], XTotal[test], yTotalLabels[train], yTotalLabels[test]
    quadrDAClas.fit(Xtrain,Ytrain)
    
    yPred = quadrDAClas.predict(Xtest)
    meanAccuracy += accuracy_score(Ytest,yPred)

print("Mean Accuracy Score for kNeighbors is: ", round((meanAccuracy / numberOfSplits),2))

Mean Accuracy Score for kNeighbors is:  0.28


