In [1]:
import sys
sys.path.append('..')

from utils.misc import *
from utils.classifier import *
from utils.visualiser import *

## Loading pre-processed data

In [2]:
# train
df_train = pd.read_csv("../data/train/df_train.csv", index_col=0)

df_train_b0 = pd.read_csv("../data/train/df_train_b0.csv", index_col=0)
df_train_b1 = pd.read_csv("../data/train/df_train_b1.csv", index_col=0)
df_train_b2 = pd.read_csv("../data/train/df_train_b2.csv", index_col=0)

## Loading clusters

In [3]:
clusters_b0 = np.load("../results/clusters_b0.npy", allow_pickle=True).item()
clusters_b1 = np.load("../results/clusters_b1.npy", allow_pickle=True).item()
clusters_b2 = np.load("../results/clusters_b2.npy", allow_pickle=True).item()

## Parameter tuning using SVM

### Building 0 (k = 10)

In [4]:
kwargs = {
    'data'        : df_train_b0,
    'clusters'    : clusters_b0,
    'k'           : 10,
    'classifier'  :'svm',
    'cv'          : 10
}

b0_sample_count, b0_room_count, b0_params, b0_scores = tune_room_in_cluster(**kwargs)

### Building 1 (k = 10)

In [5]:
kwargs = {
    'data'        : df_train_b1,
    'clusters'    : clusters_b1,
    'k'           : 10,
    'classifier'  :'svm',
    'cv'          : 10
}

b1_sample_count, b1_room_count, b1_params, b1_scores = tune_room_in_cluster(**kwargs)

### Building 2 (k = 4)

In [6]:
kwargs = {
    'data'        : df_train_b2,
    'clusters'    : clusters_b2,
    'k'           : 4,
    'classifier'  :'svm',
    'cv'          : 10
}

b2_sample_count, b2_room_count, b2_params, b2_scores = tune_room_in_cluster(**kwargs)

## Results

### Building 0 (k=10)

In [7]:
b0_sample_count

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
f0,54,,,296.0,20,,477.0,211.0,,
f1,95,3.0,1.0,365.0,157,10.0,492.0,233.0,,
f2,122,260.0,258.0,,396,382.0,8.0,,17.0,
f3,116,245.0,237.0,,372,407.0,,,,14.0


In [8]:
b0_room_count

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
f0,5,,,24.0,4,,25.0,18.0,,
f1,6,2.0,1.0,25.0,10,2.0,32.0,19.0,,
f2,7,16.0,16.0,,23,23.0,5.0,,2.0,
f3,7,15.0,16.0,,25,26.0,,,,3.0


In [9]:
b0_params

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
f0,{'C': 0.05},,,{'C': 1},{'C': 0.01},,{'C': 1},{'C': 1},,
f1,{'C': 0.05},,,{'C': 0.5},{'C': 0.5},,{'C': 0.1},{'C': 1},,
f2,{'C': 0.01},{'C': 0.05},{'C': 0.1},,{'C': 0.5},{'C': 0.5},,,{'C': 0.001},
f3,{'C': 0.05},{'C': 0.5},{'C': 0.1},,{'C': 1},{'C': 1},,,,


In [10]:
b0_scores

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
f0,87.33% ± 8.47%,,,85.63% ± 8.63%,95.0% ± 11.31%,,75.38% ± 10.38%,81.06% ± 3.87%,,
f1,92.56% ± 10.45%,,,79.91% ± 12.27%,88.75% ± 7.54%,,69.21% ± 15.45%,80.27% ± 5.42%,,
f2,94.36% ± 6.18%,67.69% ± 17.56%,71.92% ± 16.56%,,67.71% ± 18.12%,71.33% ± 15.04%,,,nan% ± nan%,
f3,89.85% ± 6.26%,73.05% ± 15.92%,71.67% ± 16.19%,,62.74% ± 14.0%,69.46% ± 12.88%,,,,


### Building 1 (k = 10)

In [11]:
b1_sample_count

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
f0,606.0,160.0,31.0,306.0,,117,,148,,
f1,219.0,499.0,127.0,6.0,20.0,364,11.0,106,91.0,41.0
f2,1.0,218.0,3.0,,340.0,125,48.0,534,127.0,
f3,,,,,249.0,76,10.0,526,50.0,


In [12]:
b1_room_count

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
f0,44.0,8.0,4.0,9.0,,6,,14,,
f1,13.0,15.0,9.0,1.0,1.0,12,2.0,14,6.0,2.0
f2,1.0,13.0,1.0,,13.0,8,4.0,42,9.0,
f3,,,,,10.0,4,1.0,33,4.0,


In [13]:
b1_params

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
f0,{'C': 0.5},{'C': 0.05},{'C': 0.05},{'C': 0.5},,{'C': 0.5},,{'C': 0.5},,
f1,{'C': 1},{'C': 0.1},{'C': 0.1},,,{'C': 0.1},{'C': 0.001},{'C': 0.05},{'C': 0.01},{'C': 0.001}
f2,,{'C': 0.05},,,{'C': 1},{'C': 0.1},{'C': 0.01},{'C': 1},{'C': 0.05},
f3,,,,,{'C': 0.05},{'C': 0.005},,{'C': 0.5},{'C': 0.01},


In [14]:
b1_scores

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
f0,65.58% ± 9.18%,96.25% ± 4.32%,97.5% ± 5.66%,96.11% ± 3.04%,,89.77% ± 6.25%,,81.86% ± 12.07%,,
f1,85.41% ± 5.87%,72.18% ± 11.56%,67.63% ± 6.81%,,,87.75% ± 6.81%,nan% ± nan%,54.73% ± 4.95%,93.56% ± 6.42%,nan% ± nan%
f2,,89.5% ± 7.18%,,,90.88% ± 5.91%,85.51% ± 5.52%,98.0% ± 4.52%,89.9% ± 4.11%,98.46% ± 2.32%,
f3,,,,,87.48% ± 8.27%,83.57% ± 12.66%,,78.72% ± 5.97%,94.0% ± 6.91%,


### Building 2 (k = 4)

In [15]:
b2_sample_count

Unnamed: 0,0,1,2,3
f0,35,36,,1835.0
f1,533,21,168.0,1439.0
f2,1006,5,532.0,33.0
f3,1983,114,599.0,13.0
f4,650,42,410.0,


In [16]:
b2_room_count

Unnamed: 0,0,1,2,3
f0,5,3,,67.0
f1,29,4,14.0,82.0
f2,59,2,24.0,18.0
f3,98,12,28.0,3.0
f4,49,7,27.0,


In [17]:
b2_params

Unnamed: 0,0,1,2,3
f0,{'C': 0.05},{'C': 0.005},,{'C': 0.5}
f1,{'C': 0.05},{'C': 0.005},{'C': 0.05},{'C': 0.1}
f2,{'C': 0.1},,{'C': 0.05},
f3,{'C': 1},{'C': 0.5},{'C': 0.1},
f4,{'C': 1},{'C': 0.01},{'C': 0.05},


In [18]:
b2_scores

Unnamed: 0,0,1,2,3
f0,90.0% ± 12.5%,97.5% ± 5.66%,,81.25% ± 4.62%
f1,75.03% ± 7.71%,91.67% ± 12.88%,85.77% ± 10.7%,81.23% ± 5.6%
f2,83.63% ± 6.93%,,87.65% ± 5.66%,
f3,75.2% ± 5.86%,91.36% ± 5.89%,78.66% ± 7.22%,
f4,59.08% ± 22.54%,91.5% ± 10.4%,49.02% ± 26.58%,


In [23]:
# scatter3d(df_train_b0, clusters_b0, k=10, title='Building 0 (k = 10)', save=True)

In [24]:
# scatter3d(df_train_b1, clusters_b1, k=10, title='Building 1 (k = 10)', save=True)

In [25]:
# scatter3d(df_train_b2, clusters_b2, k=4, title='Building 2 (k = 4)', save=True)