In [1]:
import itertools
import pandas as pd

# INPUT
training_data_filename = 'data1.csv'

# NOTE: sensors are assumed limited to their closest room (i.e., not adjacent rooms)
sensor_locs = {
    'motion_sensor1': ('r1',),
    'motion_sensor2': ('r14',),
    'motion_sensor3': ('r19',),
    'motion_sensor4': ('r28',),
    'motion_sensor5': ('r29',),
    'motion_sensor6': ('r32',),
    'camera1': ('r3',),
    'camera2': ('r21',),
    'camera3': ('r25',),
    'camera4': ('r34',),

    # NOTE: doors are two-way
    'door_sensor1': ('r2', 'r3'),
    'door_sensor2': ('c1', 'c2'),
    'door_sensor3': ('r20', 'r26'),
    'door_sensor4': ('c1', 'r28')
}

In [2]:
# Read entire dataset
df = pd.read_csv(training_data_filename, header=[0], index_col=[0])

# Split into features and truth
features_df, targets_df = df.iloc[:, :17], df.iloc[:, 17:]
features_names, targets_names = features_df.columns, targets_df.columns

# Confirm shapes
print(df.shape)
print(features_df.shape)
print(targets_df.shape)
df.head()

(2400, 54)
(2400, 17)
(2400, 37)


Unnamed: 0,motion_sensor1,motion_sensor2,motion_sensor3,motion_sensor4,motion_sensor5,motion_sensor6,camera1,camera2,camera3,camera4,...,r28,r29,r30,r31,r32,r33,r34,c1,c2,outside
0,no motion,no motion,no motion,no motion,motion,motion,0,0,0,0,...,0,0,0,0,0,0,0,0,0,40
1,no motion,motion,no motion,no motion,motion,no motion,0,0,0,0,...,0,0,0,0,0,0,0,0,0,40
2,no motion,no motion,no motion,motion,no motion,no motion,0,0,0,0,...,0,0,0,0,0,0,0,1,0,39
3,no motion,motion,no motion,no motion,no motion,no motion,0,0,0,0,...,0,0,0,0,0,0,0,0,0,37
4,no motion,motion,no motion,no motion,no motion,no motion,0,0,0,0,...,0,0,0,0,0,0,0,1,0,38


In [3]:
def get_sensor_df(df: pd.DataFrame, sensor_name: str) -> pd.DataFrame:
    cols = (sensor_name,) + sensor_locs[sensor_name]
    return df.loc[:, cols]

print(get_sensor_df(df, 'motion_sensor1').head())
print(get_sensor_df(df, 'door_sensor1').head())
print(get_sensor_df(df, 'camera1').head())

  motion_sensor1  r1
0      no motion   0
1      no motion   0
2      no motion   0
3      no motion   0
4      no motion   0
   door_sensor1  r2  r3
0             0   0   0
1             0   0   0
2             0   0   0
3             0   0   0
4             0   0   0
   camera1  r3
0        0   0
1        0   0
2        0   0
3        0   0
4        0   0


In [13]:
def make_counts_table(df: pd.DataFrame) -> pd.DataFrame:
    return df.value_counts(ascending=True, normalize=True).reset_index(name='count')

def make_confusion_matrix(s1: pd.Series, s2: pd.Series) -> pd.DataFrame:
    return pd.crosstab(s1, s2, margins=True)

first_pair_df = get_sensor_df(df, 'motion_sensor1')
first_pair_df['r1'] = first_pair_df['r1'].apply(lambda x: 'on' if x > 0 else 'off')
counts_table = make_counts_table(first_pair_df)

print(counts_table)
counts_table.query('motion_sensor1 == "no motion" & r1 == "on"')
# print(make_confusion_matrix(first_pair_df.iloc[:, 0], first_pair_df.iloc[:, 1]))

  motion_sensor1   r1     count
0      no motion   on  0.036250
1         motion  off  0.045000
2      no motion  off  0.237917
3         motion   on  0.680833


Unnamed: 0,motion_sensor1,r1,count
0,no motion,on,0.03625
