In [None]:
import json

# Sample dictionary
classification_dict = dict()
classification_dict['FanSUPSpeedAct'] = {'substance':'speed', 
                                        'component':'Fan',
                                        'subsystem':None}
classification_dict['FanSUPSpeedSet'] = {'substance':'speed', 
                                        'component':'Fan',
                                        'subsystem':None}
classification_dict['fAHUTempEHAADS'] = {'substance':'temp', 
                                        'component':None,
                                        'subsystem':None}
classification_dict['fAHUTempODAADS'] = {'substance':'temp', 
                                        'component':None,
                                        'subsystem':'ODA'}
classification_dict['fAHUTempSUPADS'] = {'substance':'temp', 
                                        'component':None,
                                        'subsystem':'SUP'}
classification_dict['fAHUTempSUPSetADS'] = {'substance':'temp', 
                                        'component':None,
                                        'subsystem':'SUP'}
classification_dict['fAHUPHValveActADS'] = {'substance':'valve', 
                                            'component':'heater',
                                            'subsystem':'ODA'}




In [None]:
import json
from collections import Counter

dataset = "AHU_prin_summer_2023_stanscaler_RULES"
dataset_dir = './SimTSC/datasets/EBC'
dataset_dir = os.path.join(dataset_dir, dataset)
df_test = pd.read_csv(os.path.join(dataset_dir, dataset+'_SEPTEST.tsv'), sep='\t', header=None)
y_test = df_test.iloc[:,0:2]

# Specify the filename of JSON file having all class predictions (substance, comp, subsys)
read_filename = os.path.join(dataset+'_classes_dict.json')

# Open the file in read mode with appropriate encoding (assuming UTF-8)
with open(read_filename, "r", encoding="utf-8") as f:
  # Use json.load to parse the JSON data from the file
  classes_dict = json.load(f)

# Get indices by column index
category_indices = {}
# for i in range(len(y_test.columns)):
category = y_test.iloc[:, 0].unique() # Assuming the first unique value defines the category (might need adjustment)
for point in category:
    category_indices[point] = list(y_test.loc[y_test.iloc[:, 0] == point].index)

# Define dict for storing point-wise classes i.e a triple of (substance, comp, subsys) for each datapoint
pointwise_dict = dict()

# Get the results
for point, indices in category_indices.items():
    # print(f"Category: {point}, Indices: {indices}")
    
    tmp_substance = []
    tmp_comp = []
    tmp_subsys = []
    for index in indices:
        tmp_substance.append(classes_dict['substance'][index])
        tmp_comp.append(classes_dict['component'][index])
        tmp_subsys.append(classes_dict['subsystem'][index])

    # Create a Counter object to count element occurrences
    substance_counts = Counter(tmp_substance)
    comp_counts = Counter(tmp_comp)
    subsys_counts = Counter(tmp_subsys)
    # Find the most common value (assuming there's a single most frequent value)
    most_common_substance = substance_counts.most_common(1)  # Get the most frequent element (limit to 1)
    most_frequent_substance = most_common_substance[0][0]
    most_common_comp = comp_counts.most_common(1)  # Get the most frequent element (limit to 1)
    most_frequent_comp = most_common_comp[0][0]
    most_common_subsys = subsys_counts.most_common(1)  # Get the most frequent element (limit to 1)
    most_frequent_subsys = most_common_subsys[0][0]

    pointwise_dict[point] = {'substance':most_frequent_substance, 
                            'component':most_frequent_comp,
                            'subsystem':most_frequent_subsys}
    
for point, classes in pointwise_dict.items():
    print(f"Datapoint: {point}, Classes: {classes}")

# Open a file for writing in text mode (assuming UTF-8 encoding)
save_filename = os.path.join(dataset+'_pointwise_preds_dict.json')
with open(save_filename, "w", encoding="utf-8") as f:
    # Use json.dump to write the dictionary to the file
    json.dump(pointwise_dict, f)

print(f"Dictionary saved to {save_filename}")


In [None]:
# Calculate 'soft' and 'hard' classification accuracy for rules

import json

dataset = "AHU_prin_summer_2023_stanscaler_RULES"
dataset_dir = './SimTSC/datasets/EBC'
dataset_dir = os.path.join(dataset_dir, dataset)
subs_test = pd.read_csv(os.path.join(dataset_dir, dataset+'_TEST_ed.tsv'), sep='\t', header=None)
subs_test = subs_test.iloc[:,0].tolist()
comp_test = pd.read_csv(os.path.join(dataset_dir, dataset+'_TEST_comp.tsv'), sep='\t', header=None)
comp_test = comp_test.iloc[:,0].tolist()
subsys_test = pd.read_csv(os.path.join(dataset_dir, dataset+'_TEST_subsys.tsv'), sep='\t', header=None)
subsys_test = subsys_test.iloc[:,0].tolist()

# Specify the filename of JSON file having all class predictions (substance, comp, subsys)
read_filename = os.path.join(dataset+'_classes_dict.json')

# Open the file in read mode with appropriate encoding (assuming UTF-8)
with open(read_filename, "r", encoding="utf-8") as f:
  # Use json.load to parse the JSON data from the file
  pred_classes_dict = json.load(f)

subs_matches = [i for i, (a, b) in enumerate(zip(classes_dict['substance'], subs_test)) if str(a) == str(b)]
print(len(subs_matches))
comp_matches = [i for i, (a, b) in enumerate(zip(classes_dict['component'], comp_test)) if str(a) == str(b)]
print(len(comp_matches))
subsys_matches = [i for i, (a, b) in enumerate(zip(classes_dict['subsystem'], subsys_test)) if str(a) == str(b)]
print(len(subsys_matches))

# print non-matching pairs (true value, prediction) for Component class
# not_comp_matches = [x for x in list(range(0,len(comp_test))) if x not in comp_matches]
# for idx in not_comp_matches:
#   print(f"truth: {comp_test[idx]} & prediction: {classes_dict['component'][idx]}")
  
total_len = len(comp_test)
soft_counts = 0
hard_counts = 0
for i in range(0,total_len):
  soft_counts += float(1/3)*(i in subs_matches) + float(1/3)*(i in comp_matches) + float(1/3)*(i in subsys_matches)
  if i in subs_matches and i in comp_matches and i in subsys_matches:
    hard_counts += 1

soft_acc = (soft_counts / total_len)*100
hard_acc = (hard_counts / total_len)*100
print(f"Soft accuracy: {soft_acc}%")
print(f"Hard accuracy: {hard_acc}%")


### 1NN Classifier (Baseline) 

In [None]:
# 1NN classifier (baseline)

# loading library
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Read dataset
dataset_dir = './SimTSC/datasets/EBC'
dataset_name = 'AHU_principal_Winter_2023_unnorm'
dataset_dir = os.path.join(dataset_dir, dataset_name)
df_train = pd.read_csv(os.path.join(dataset_dir, dataset_name+'_TRAIN_RAW.tsv'), sep='\t', header=None)
df_test = pd.read_csv(os.path.join(dataset_dir, dataset_name+'_TEST_RAW.tsv'), sep='\t', header=None)
df_val = pd.read_csv(os.path.join(dataset_dir, dataset_name+'_VAL_RAW.tsv'), sep='\t', header=None)

Y_train = df_train[df_train.columns[0]].astype(np.str)
Y_test = df_test[df_test.columns[0]].astype(np.str)
Y_val = df_val[df_val.columns[0]].astype(np.str)

Y_train = Y_train.values
Y_test = Y_test.values
Y_val = Y_val.values
Y = np.concatenate((Y_train, Y_test, Y_val), axis=0)

X_train = df_train.drop(columns=[0]).astype(np.float32).values
X_test = df_test.drop(columns=[0]).astype(np.float32).values
X_val = df_val.drop(columns=[0]).astype(np.float32).values

# instantiate learning model
knn = KNeighborsClassifier(n_neighbors=1)

# fitting the model
knn.fit(X_train, Y_train)

# predict the response
pred = knn.predict(X_test)

# evaluate accuracy

print('\nThe accuracy of the classifier is {}%'.format(accuracy_score(Y_test, pred)*100))