In [None]:
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv(
    "data/sample_temperature_data_for_coding_challenge.csv",
    sep=','
)
time= pd.to_datetime(df['datetime'])
df['date']=time
df = df.set_index(df['date'])

In [None]:
# filter rows based on property_name
df_heat = df[df['property_name']=="heating_temperature"]
df_cool = df[df['property_name']=="cooling_temperature"]

# empty dataframe to be appended
df_h_ = pd.DataFrame()
df_c_ = pd.DataFrame()

# The anomaly is detected when the temperature difference 
# of the heating temp and cooling temp is below a threshold
# at a certain hour
# This means that either the cooler is overheated or the heater is not functioning

# threshold
thold = 5

# loop through dates
listdate = sorted(set(df['date'].dt.date.tolist()))
for d in listdate:
    df_h = df_heat[(df_heat['date'].dt.date == d)]
    df_c = df_cool[(df_cool['date'].dt.date == d)]
    df_h['anomaly']=[False]*len(df_h)
    df_c['anomaly']=[False]*len(df_c)

    # find the hour that includes both heating temp and cooling temp
    hours_c = set(sorted(df_c['date'].dt.hour.tolist()))
    hours_h = set(sorted(df_h['date'].dt.hour.tolist()))
    intersections = hours_h.intersection(hours_c)

    # loop through the hours
    for i in intersections:
        heat_mean = df_h[(df_h['date'].dt.hour==i)]["temperature"].mean(numeric_only=True)
        cool_mean = df_c[(df_c['date'].dt.hour==i)]["temperature"].mean(numeric_only=True)

        # difference in the temperature means
        diff = abs(heat_mean-cool_mean)

        # if difference in temp is below a threshold -> ANOMALY
        if diff<thold: 
            df_c.loc[(df_c['date'].dt.hour==i),'anomaly']=bool(True)
            df_h.loc[(df_h['date'].dt.hour==i),'anomaly']=bool(True)
    # append to dataframe
    df_h_=pd.concat([df_h_, df_h])
    df_c_=pd.concat([df_c_, df_c])

# recover dataframe and sort
df_recovered=pd.concat([df_c_, df_h_])
df_recovered=df_recovered.sort_values(by='datetime',)
df_recovered['anomaly']=df_recovered['anomaly'].astype('bool')

# save to csv
df_recovered.to_csv('anomaly_detected_dataset.csv')

In [None]:

# plot data points, RED: anomaly, Blue: normal
X = df_recovered['datetime']
time = [datetime.fromisoformat(x.split(".")[0]).timestamp() for x in X]
df_recovered['timestamp']=time
df_heat = df_recovered[df_recovered['property_name']=="heating_temperature"]
df_cool = df_recovered[df_recovered['property_name']=="cooling_temperature"]
fig, ax = plt.subplots(2,1,figsize=(15,4))
for t, temp, anom in zip(df_heat['timestamp'], df_heat["temperature"],df_heat["anomaly"]):
    ax[0].scatter(t,temp, color="red" if anom else "blue" )
for t, temp, anom in zip(df_cool['timestamp'], df_cool["temperature"],df_cool["anomaly"]):
    ax[1].scatter(t,temp, color="red" if anom else "blue" )
plt.show()