In [1]:
from app import Errors
import itertools
import pandas as pd
import os
from math import pi
from datetime import timedelta

In [2]:
from bokeh.models import ColumnDataSource
from bokeh.plotting import show, output_notebook, figure
from bokeh.palettes import Category20c
from bokeh import palettes
from bokeh import layouts
from bokeh.transform import cumsum
output_notebook()

In [28]:
raw_data = Errors().error_heatmap()

In [29]:
error_heatmap = raw_data
error_heatmap['location'] = error_heatmap.apply(lambda row: f"{os.path.basename(row['filename'])}:{row['line_number']}", axis=1)
error_heatmap = error_heatmap.drop(columns=['filename', 'line_number'])
error_heatmap

Unnamed: 0_level_0,Unnamed: 1_level_0,error_count,end_of_day,location
device,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
PTL_RD_AT_003,2020-02-24,3,2020-02-25,Database.py:111
PTL_RD_AT_000,2020-02-24,1,2020-02-25,Database.py:104
PTL_RD_AT_000,2020-02-24,6,2020-02-25,Database.py:111
PTL_RD_AT_000,2020-02-24,5,2020-02-25,Database.py:113
PTL_RD_AT_000,2020-02-24,1,2020-02-25,Database.py:117
...,...,...,...,...
PTL_RD_ES_008,2020-03-13,2,2020-03-14,Database.py:111
PTL_RD_ES_009,2020-03-13,1,2020-03-14,Database.py:111
PTL_RD_ES_012,2020-03-13,2,2020-03-14,Database.py:111
PTL_RD_ES_006,2020-03-13,1,2020-03-14,watchdog.py:282


In [143]:
def color_palette(size):
    palette_generator = itertools.cycle(palettes.Category20[20] + palettes.Set3[12] + palettes.Category20b[20])
    c = [color for color, _ in zip(palette_generator, range(num_locations))]
    return c

In [144]:
locations = pd.DataFrame(error_heatmap.location.unique(), columns=['location'])
locations['colors'] = palette(len(locations.location))
locations

Unnamed: 0,location,colors
0,Database.py:111,#1f77b4
1,Database.py:104,#aec7e8
2,Database.py:113,#ff7f0e
3,Database.py:117,#ffbb78
4,base_events.py:1285,#2ca02c
5,logging_test.py:53,#98df8a
6,Serial.py:132,#d62728
7,watchdog.py:95,#ff9896
8,watchdog.py:129,#9467bd
9,Lighting.py:284,#c5b0d5


In [145]:
eh = error_heatmap.reset_index()
errors_by_day = eh.groupby(['device', 'date']).sum().rename(columns=dict(error_count='errors_by_day'))
eh = eh.join(errors_by_day, on=['device', 'date'])
eh['error_count_normalized'] = eh.error_count / eh.errors_by_day
eh = eh.merge(locations, on = ['location'])
eh = eh.set_index(['device', 'date', eh.index]).sort_index()
eh

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,error_count,end_of_day,location,errors_by_day,error_count_normalized,colors
device,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
PTL_LT_AT_001,2020-03-09,94,17,2020-03-10,Serial.py:44,17,1.000000,#7f7f7f
PTL_RD_AT_000,2020-02-24,1,6,2020-02-25,Database.py:111,15,0.400000,#1f77b4
PTL_RD_AT_000,2020-02-24,74,1,2020-02-25,Database.py:104,15,0.066667,#aec7e8
PTL_RD_AT_000,2020-02-24,75,5,2020-02-25,Database.py:113,15,0.333333,#ff7f0e
PTL_RD_AT_000,2020-02-24,76,1,2020-02-25,Database.py:117,15,0.066667,#ffbb78
...,...,...,...,...,...,...,...,...
PTL_RD_ES_012,2020-03-12,126,2,2020-03-13,watchdog.py:261,75,0.026667,#dbdb8d
PTL_RD_ES_012,2020-03-12,146,2,2020-03-13,watchdog.py:301,75,0.026667,#9edae5
PTL_RD_ES_012,2020-03-13,73,2,2020-03-14,Database.py:111,12,0.166667,#1f77b4
PTL_RD_ES_012,2020-03-13,151,5,2020-03-14,Serial.py:155,12,0.416667,#ffffb3


In [164]:
dates = eh.index.get_level_values(1).unique().sort_values()
x_range = min(dates) - timedelta(hours=12), max(dates) + timedelta(hours=12)

In [165]:
figures = {}

for device in eh.index.get_level_values(0).unique():
    p = figure(plot_height=300, plot_width=500, title=f"Error heatmap for {device}", toolbar_location=None,
               tools="hover", tooltips="@location: @error_count Errors", x_axis_type='datetime', x_range=x_range)

    device_data = eh.loc[device]
    for date in device_data.index.get_level_values(0).unique():
        data_source = ColumnDataSource(device_data.loc[date])
        p.vbar(bottom=cumsum('error_count_normalized', include_zero=True),
               top=cumsum('error_count_normalized'),
               x=date, width=timedelta(days=1)/2, source=data_source, fill_color='colors')

    figures[device] = p

In [166]:
show(layouts.column(*list(figures.values())))

# Compute time intervals where the ptl was online

In [164]:
from app import db
from db import DeadManPackage
import pandas as pd
from datetime import timedelta

In [225]:
query = db.session.query(DeadManPackage.device, DeadManPackage.timestamp)

In [226]:
colors = ['#00cc00', '#ff0000']

In [397]:
data = pd.DataFrame(query.all())
# compute the time difference between two consecutive rows
data['delay'] = data.groupby('device').timestamp.diff()
# The first row for each device has no 'delay' value. 
# We assume this is the start of a connected interval. 
# To do this we set the delay to be bigger than the threshold
# for dataloss
data = data.fillna(timedelta(minutes=2))
# A dataloss happened, if the delay is bigger than a threshold
# If the row 'connected' contains 1 then no data loss happend at this time
data['data_loss'] = (data.delay <= timedelta(seconds=90)).astype(int)
# compute the difference of consecutive data_loss values to see where
# the connection status changed
data['keep_row'] = data.groupby('device').data_loss.diff(periods=-1)
# We merge intervals of consecutive 'data loss' or 'connection' intervals by
# keeping only rows where the value of 'keep_rows' is not zero
data = data[data.keep_row != 0.0]
# the timestamp row now marks the beginning of an interval where
# the connection status did not change.
data = data.rename(columns=dict(timestamp='begin'))
# compute the duration of each interval
data['duration'] = data.groupby('device').begin.diff(periods=-1).abs()
data['end'] = data.begin + data.duration
data['connected'] = 1 - data.data_loss
data = data.drop(columns=['delay', 'keep_row', 'data_loss'])
data = data.sort_index()
data['color'] = colors[0]
data.loc[data.connected == 0, 'color'] = '#ff0000'

In [367]:
from datetime import datetime

In [375]:
# since the end of the last connected interval is still in the future
# it contains a NaT value. We set this to the last received timestamp for
# each device in the data set.
#end_of_last_interval = data_raw.groupby('device').timestamp.max()

In [398]:
#data.loc[data.end.isna(), 'end'] = end_of_last_interval
#data = data.reset_index()
data = data.dropna()
data = data.set_index(['device', data.index]).sort_index()
#data.duration = data.end - data.begin
data

Unnamed: 0_level_0,Unnamed: 1_level_0,begin,duration,end,connected,color
device,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
PTL_RD_AT_000,80,2020-03-12 12:25:35.196265,0 days 03:03:33.452463,2020-03-12 15:29:08.648728,1,#00cc00
PTL_RD_AT_000,994,2020-03-12 15:29:08.648728,0 days 00:01:53.820413,2020-03-12 15:31:02.469141,0,#ff0000
PTL_RD_AT_000,996,2020-03-12 15:31:02.469141,0 days 09:28:45.484429,2020-03-13 00:59:47.953570,1,#00cc00
PTL_RD_AT_000,10298,2020-03-13 00:59:47.953570,0 days 00:02:14.093244,2020-03-13 01:02:02.046814,0,#ff0000
PTL_RD_AT_000,10327,2020-03-13 01:02:02.046814,0 days 11:41:09.469620,2020-03-13 12:43:11.516434,1,#00cc00
PTL_RD_AT_000,21385,2020-03-13 12:43:11.516434,0 days 00:02:45.886590,2020-03-13 12:45:57.403024,0,#ff0000
PTL_RD_AT_000,21419,2020-03-13 12:45:57.403024,0 days 12:13:16.397983,2020-03-14 00:59:13.801007,1,#00cc00
PTL_RD_AT_000,33790,2020-03-14 00:59:13.801007,0 days 00:02:47.392036,2020-03-14 01:02:01.193043,0,#ff0000
PTL_RD_AT_000,33818,2020-03-14 01:02:01.193043,1 days 19:08:48.584410,2020-03-15 20:10:49.777453,1,#00cc00
PTL_RD_AT_001,0,2020-03-12 11:01:27.834999,0 days 01:14:13.202586,2020-03-12 12:15:41.037585,1,#00cc00


In [399]:
device = 'PTL_RD_ES_012'
device_data = data.loc[device]
data_source = ColumnDataSource(device_data)
xrange = device_data.begin.min(), device_data.end.max()
fig = figure(x_axis_type="datetime", x_range=x_range, y_range=(0, 0.25),
             plot_height=50, plot_width=800)
fig.hbar(y=0.125, left='begin', right='end', height=0.25, color='color', source=data_source)
fig.yaxis.visible=False
fig.toolbar.logo = None
fig.toolbar_location = None
show(fig)