In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn import decomposition


In [2]:
# Read the dataset
data = pd.read_csv("./TrainOnMe-6.csv")
data["x6"] = data["x6"].fillna("Empty")
indices_to_drop1 = data.index[data.iloc[:,6] == "Ostra stationen"]
data = data.drop(indices_to_drop1, axis=0)
indices_to_drop2 = data.index[data.iloc[:,1] == "?"]
data = data.drop(indices_to_drop2, axis=0)
indices_to_drop3 = data.index[data.iloc[:,0] == "yckelharpa"]
data = data.drop(indices_to_drop3, axis=0)
indices_to_drop4 = data.index[data.iloc[:,0] == "erpent"]
data = data.drop(indices_to_drop4, axis=0)
indices_to_drop5 = data.index[data.iloc[:,0] == "ragspel"]
data = data.drop(indices_to_drop5, axis=0)

data = data[(data['x11'] == "True") | (data['x11'] == "False")]
data = data[(data['x12'] == "True") | (data['x12'] == "False")]
data = data.replace("True", 1.0)
data = data.replace("False", 0.0)
data = data.dropna()


# Divide the dataset into attributes and labels
y = data.iloc[:, 0].values
X = data.drop(['y'], axis=1)

# Encode y and x6
y = pd.get_dummies(y).values
np.set_printoptions(threshold=np.inf)
y = np.argmax(y, axis=1)

X_encoded = pd.get_dummies(X.iloc[:, 5], prefix=X.columns[5])
X = pd.concat([X.iloc[:, :5], X_encoded, X.iloc[:, 6:]], axis=1)

# Divide dataset into train set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [3]:

# Use boosting decision tree, random forest
dct_clf1 = DecisionTreeClassifier(max_depth=X_train.shape[1] / 2 - 1)
dct_clf2 = DecisionTreeClassifier(max_depth=X_train.shape[1] / 2)

rf_clf1 = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
rf_clf2 = RandomForestClassifier(n_estimators=200, max_depth=15, random_state=2022)
rf_clf3 = RandomForestClassifier(n_estimators=50, max_depth=5, random_state=666)

ada_clf1 = AdaBoostClassifier(base_estimator=dct_clf1, n_estimators=50, learning_rate=0.1, random_state=42)
ada_clf2 = AdaBoostClassifier(base_estimator=dct_clf1, n_estimators=100, learning_rate=0.5, random_state=2022)
ada_clf3 = AdaBoostClassifier(base_estimator=dct_clf1, n_estimators=20, learning_rate=0.01, random_state=666)


# Create a voting classifier
voting_clf = VotingClassifier(estimators=[ 
    ('rf1', rf_clf1),
    ('rf2', rf_clf2),
    ('rf3', rf_clf3),
    ('ada1', ada_clf1),
    ('ada2', ada_clf2),
    ('ada3', ada_clf3)], voting='soft')

# Fit the voting classifier on the training data
voting_clf.fit(X_train, y_train)
ada_clf1.fit(X_train, y_train)
rf_clf1.fit(X_train, y_train)

# Evaluate the accuracy of the voting classifier on the test data
y_pred_vote = voting_clf.predict(X_test)
y_pred_ada = ada_clf1.predict(X_test)
y_pred_rf = rf_clf1.predict(X_test)
accuracy_vote = accuracy_score(y_test, y_pred_vote)
accuracy_ada = accuracy_score(y_test, y_pred_ada)
accuracy_rf = accuracy_score(y_test, y_pred_rf)
print("Voting Classifier Accuracy:", accuracy_vote)
print("Ada Classifier Accuracy:", accuracy_ada)
print("Rf Classifier Accuracy:", accuracy_rf)

Voting Classifier Accuracy: 0.7286432160804021
Ada Classifier Accuracy: 0.6984924623115578
Rf Classifier Accuracy: 0.7085427135678392


In [None]:
val_Set = pd.read_csv("./EvaluateOnMe-6.csv")
val_Set["x6"] = val_Set["x6"].fillna("Empty")
val_Set = val_Set.replace("True", 1.0)
val_Set = val_Set.replace("False", 0.0)
val_encoded = pd.get_dummies(val_Set.iloc[:, 5], prefix=val_Set.columns[5])
val_X = pd.concat([val_Set.iloc[:, :5], val_encoded, val_Set.iloc[:, 6:]], axis=1)
y_out = voting_clf.predict(val_X)

with open('./Results.txt', 'w') as f:
	for i in range(len(y_out)):
		if(y_out[i] == 0):
			f.write("%s\n" % "Dragspel")
		if(y_out[i] == 1):
			f.write("%s\n" % "Nyckelharpa")
		if(y_out[i] == 2):
			f.write("%s\n" % "Serpent")