In [1]:
from sklearn.neighbors import KNeighborsClassifier, NearestCentroid
from sklearn.neural_network import MLPClassifier
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from collections import Counter

In [2]:
def eval_model(model, test_data, test_labels):
    predictions = model.predict(test_data)
    #print(predictions)
    predictions = list(np.around(np.array(predictions),0))
    accuracy = accuracy_score(test_labels, predictions)
    #print(accuracy)
    return accuracy

In [3]:
def other_class(n_classes, current_class):
    """
    Returns a list of class indices excluding the class indexed by class_ind
    :param nb_classes: number of classes in the task
    :param class_ind: the class index to be omitted
    :return: one random class that != class_ind
    """
    #print(current_class)
    if current_class < 0 or current_class >= n_classes:
        error_str = "class_ind must be within the range (0, nb_classes - 1)"
        raise ValueError(error_str)

    other_class_list = list(range(n_classes))
    other_class_list.remove(current_class)
    other_class = np.random.choice(other_class_list)
    return other_class

def inject_noise(n_classes, y_, noise_level):
    y = y_.copy()
    if noise_level > 100 or noise_level < 0:
        raise ValueError('Noise level can not be bigger than 100 or smaller than 0')

    noisy_idx = np.random.choice(len(y), int(len(y)*noise_level/100.0), replace = False)
    for i in noisy_idx:
        y[i] = other_class(n_classes, y[i])

    return y

In [4]:
# Load testing data
dataset = "thermostat-original"
test_labels = np.loadtxt("data/" + dataset + "-test-labels.txt")[0:6000]
test_data = np.loadtxt("data/" + dataset + "-test-data.txt")[0:6000]
# Load training data
train_labels = np.loadtxt("data/" + dataset + "-train-labels.txt")
train_data = np.loadtxt("data/" + dataset + "-train-data.txt")
n_classes = len(list(set(list(train_labels))))

In [5]:
print(Counter(list(train_labels)).keys()) # equals to list(set(words))
print(Counter(list(train_labels)).values()) # counts the elements' frequency

[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]
[1217, 1281, 1332, 1244, 1259, 1231, 1243, 1273, 1308, 1335, 1277]


KNN thermostat test, 10 time averaged results are showed in the end of output

In [9]:
# KNN thermostat
knnmodel = KNeighborsClassifier(n_neighbors=4, weights = 'distance')
acc_list_knn = []
repeat = 10
for i in range(repeat):
    for noise in [0,10,20,30,40,50,60,70,80,90,100]:
        train_noisy_labels = inject_noise(n_classes, train_labels, noise)
        knnmodel.fit(train_data, train_noisy_labels)
        acc = eval_model(knnmodel, test_data, test_labels)
        acc_list_knn.append(acc)
        print("noise_level, accuracy", noise, acc)
avg_result = np.zeros(11)
for i in range(repeat):
    for j in range(11):
        avg_result[j] += acc_list_knn[j+i*11]
average = avg_result/repeat
print(average)

('noise_level, accuracy', 0, 0.9803333333333333)
('noise_level, accuracy', 10, 0.9513333333333334)
('noise_level, accuracy', 20, 0.9003333333333333)
('noise_level, accuracy', 30, 0.8305)
('noise_level, accuracy', 40, 0.741)
('noise_level, accuracy', 50, 0.6205)
('noise_level, accuracy', 60, 0.49083333333333334)
('noise_level, accuracy', 70, 0.36483333333333334)
('noise_level, accuracy', 80, 0.22133333333333333)
('noise_level, accuracy', 90, 0.099)
('noise_level, accuracy', 100, 0.0005)
('noise_level, accuracy', 0, 0.9803333333333333)
('noise_level, accuracy', 10, 0.9475)
('noise_level, accuracy', 20, 0.9005)
('noise_level, accuracy', 30, 0.8236666666666667)
('noise_level, accuracy', 40, 0.7311666666666666)
('noise_level, accuracy', 50, 0.6246666666666667)
('noise_level, accuracy', 60, 0.5001666666666666)
('noise_level, accuracy', 70, 0.364)
('noise_level, accuracy', 80, 0.22066666666666668)
('noise_level, accuracy', 90, 0.10216666666666667)
('noise_level, accuracy', 100, 0.001666666666

MLP thermostat test, 10 time averaged results are showed in the end of output

In [10]:
# MLP thermostat
mlpmodel = MLPClassifier(solver='adam', hidden_layer_sizes=(28,28), random_state=1)
acc_list_mlp = []
repeat = 10
for i in range(repeat):
    for noise in [0,10,20,30,40,50,60,70,80,90,100]:
        train_noisy_labels = inject_noise(n_classes, train_labels, noise)
        mlpmodel.fit(train_data, train_noisy_labels)
        acc = eval_model(mlpmodel, test_data, test_labels)
        acc_list_mlp.append(acc)
        print("noise_level, accuracy", noise, acc)
avg_result = np.zeros(11)
for i in range(repeat):
    for j in range(11):
        avg_result[j] += acc_list_mlp[j+i*11]
average = avg_result/repeat
print(average)

('noise_level, accuracy', 0, 0.8956666666666667)




('noise_level, accuracy', 10, 0.8763333333333333)
('noise_level, accuracy', 20, 0.8798333333333334)
('noise_level, accuracy', 30, 0.8601666666666666)
('noise_level, accuracy', 40, 0.8495)
('noise_level, accuracy', 50, 0.8421666666666666)
('noise_level, accuracy', 60, 0.8295)
('noise_level, accuracy', 70, 0.7818333333333334)
('noise_level, accuracy', 80, 0.7225)
('noise_level, accuracy', 90, 0.1085)
('noise_level, accuracy', 100, 0.0008333333333333334)
('noise_level, accuracy', 0, 0.8956666666666667)
('noise_level, accuracy', 10, 0.8841666666666667)
('noise_level, accuracy', 20, 0.8676666666666667)
('noise_level, accuracy', 30, 0.844)
('noise_level, accuracy', 40, 0.8558333333333333)
('noise_level, accuracy', 50, 0.8451666666666666)
('noise_level, accuracy', 60, 0.8206666666666667)
('noise_level, accuracy', 70, 0.7906666666666666)
('noise_level, accuracy', 80, 0.7308333333333333)
('noise_level, accuracy', 90, 0.2985)
('noise_level, accuracy', 100, 0.0015)
('noise_level, accuracy', 0, 0.

NearestCentroid thermostat test, 10 time averaged results are showed in the end of output

In [11]:
# NearestCentroid thermostat
ncmodel = NearestCentroid()
acc_list_nc = []
repeat = 10
for i in range(repeat):
    for noise in [0,10,20,30,40,50,60,70,80,90,100]:
        train_noisy_labels = inject_noise(n_classes, train_labels, noise)
        ncmodel.fit(train_data, train_noisy_labels)
        acc = eval_model(ncmodel, test_data, test_labels)
        acc_list_nc.append(acc)
        print("noise_level, accuracy", noise, acc)
avg_result = np.zeros(11)
for i in range(repeat):
    for j in range(11):
        avg_result[j] += acc_list_nc[j+i*11]
average = avg_result/repeat
print(average)

('noise_level, accuracy', 0, 0.6176666666666667)
('noise_level, accuracy', 10, 0.5923333333333334)
('noise_level, accuracy', 20, 0.5691666666666667)
('noise_level, accuracy', 30, 0.5398333333333334)
('noise_level, accuracy', 40, 0.5145)
('noise_level, accuracy', 50, 0.561)
('noise_level, accuracy', 60, 0.4855)
('noise_level, accuracy', 70, 0.47433333333333333)
('noise_level, accuracy', 80, 0.4435)
('noise_level, accuracy', 90, 0.13433333333333333)
('noise_level, accuracy', 100, 0.0)
('noise_level, accuracy', 0, 0.6176666666666667)
('noise_level, accuracy', 10, 0.594)
('noise_level, accuracy', 20, 0.5868333333333333)
('noise_level, accuracy', 30, 0.5636666666666666)
('noise_level, accuracy', 40, 0.529)
('noise_level, accuracy', 50, 0.5716666666666667)
('noise_level, accuracy', 60, 0.4696666666666667)
('noise_level, accuracy', 70, 0.495)
('noise_level, accuracy', 80, 0.4776666666666667)
('noise_level, accuracy', 90, 0.21083333333333334)
('noise_level, accuracy', 100, 0.0)
('noise_level, 