In [54]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

In [55]:
seed = 42

In [56]:
df = pd.read_csv('party_data.csv', sep=',', header=0)
X = pd.get_dummies(df.iloc[:, :-1])
y = pd.get_dummies(df.iloc[:, -1], drop_first=True)

In [57]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

In [58]:
# KNN
ks = [3,5,11,17]
for k in ks:
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, y_train.values.ravel())
    y_pred = knn.predict(X_test)
    print(f'KNN with k={k} accuracy: {accuracy_score(y_test, y_pred)}') 

KNN with k=3 accuracy: 0.685
KNN with k=5 accuracy: 0.72
KNN with k=11 accuracy: 0.77
KNN with k=17 accuracy: 0.75


In [59]:
# Logistic Regression
penalty = ['l2', None]
for o in penalty:
    log_reg = LogisticRegression(penalty=o)
    log_reg.fit(X_train, y_train.values.ravel())
    y_pred = log_reg.predict(X_test)
    print(f'Logistic Regression with penalty={o} accuracy: {accuracy_score(y_test, y_pred)}')

Logistic Regression with penalty=l2 accuracy: 0.695
Logistic Regression with penalty=None accuracy: 0.68


In [60]:
# Decision Tree
criteria = ['gini', 'entropy']
for c in criteria:
    dt = DecisionTreeClassifier(criterion=c)
    dt.fit(X_train, y_train.values.ravel())
    y_pred = dt.predict(X_test)
    print(f'Decision Tree with criterion={c} accuracy: {accuracy_score(y_test, y_pred)}')


Decision Tree with criterion=gini accuracy: 0.65
Decision Tree with criterion=entropy accuracy: 0.655
