In [17]:
import os
from glob import glob
import pandas as pd
import re

import datetime
from pumping_station_enum import PUMPING_STATION_ENUM as ps
import numpy as np
from tqdm import tqdm

In [2]:
df = pd.read_pickle('../../output/no_interpolation_all.pkl', compression='gzip')

In [3]:
df = df[df.pumping_station == ps.PST232]

In [4]:
df['current_1'] = df.apply(lambda r: r.currents[0], axis=1)
df['current_2'] = df.apply(lambda r: r.currents[1], axis=1)
df.drop(columns=['currents', 'pumping_station'], inplace=True)

In [5]:
df

Unnamed: 0_level_0,water_level,outflow_level,current_tot,current_1,current_2
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-05-01 00:00:00.000,1.13,0.0,0.0,0.0,0.0
2020-05-01 00:01:31.268,1.15,0.0,0.0,0.0,0.0
2020-05-01 00:05:41.157,1.17,0.0,0.0,0.0,0.0
2020-05-01 00:11:02.430,1.19,0.0,0.0,0.0,0.0
2020-05-01 00:15:12.358,1.21,0.0,0.0,0.0,0.0
...,...,...,...,...,...
2021-08-31 23:55:44.561,0.72,0.0,0.0,0.0,0.0
2021-08-31 23:55:56.089,0.71,0.0,0.0,0.0,0.0
2021-08-31 23:56:07.602,0.72,0.0,0.0,0.0,0.0
2021-08-31 23:57:50.968,0.73,0.0,0.0,0.0,0.0


Current of 100 on one or both of the motors:

In [6]:
df_c = df[(df.current_1 == 100) | (df.current_2 == 100)]

In [7]:
df_c

Unnamed: 0_level_0,water_level,outflow_level,current_tot,current_1,current_2
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-10-14 09:33:46.615,0.55,51.12,200.0,100.0,100.0
2020-10-14 09:34:02.387,0.65,51.12,200.0,100.0,100.0
2020-10-14 09:34:17.753,0.74,0.00,200.0,100.0,100.0
2020-10-14 09:34:27.986,0.79,0.00,200.0,100.0,100.0
2020-10-14 09:34:38.235,0.87,0.00,200.0,100.0,100.0
...,...,...,...,...,...
2021-02-22 15:27:49.776,0.71,500.00,200.0,100.0,100.0
2021-02-22 15:28:35.418,0.72,500.00,200.0,100.0,100.0
2021-02-22 15:28:58.893,0.73,500.00,200.0,100.0,100.0
2021-02-22 15:29:21.386,0.74,500.00,200.0,100.0,100.0


In [8]:
df_cg = df_c['current_tot'].resample('D').count().rename('count')
df_cg = df_cg[df_cg > 0]
df_cg

time
2020-10-14      36
2020-12-09       2
2020-12-14     926
2020-12-15    1158
2020-12-16    2025
2020-12-17    2455
2020-12-18     983
2021-01-09      66
2021-02-22      29
2021-05-18       1
Name: count, dtype: int64

In [9]:
print(f'{len(df_c)} occurences spread out over {len(df_cg)} days')

7681 occurences spread out over 10 days


There one consecutive between 2020-12-14 and 2020-12-18 where p1 sensor fails most of the time, but sometimes works

In [10]:
df_problem_20201214_20201218 = df['2020-12-14':'2020-12-18']

# Calculate average outflow when p1 or p2 or (p1+p2) is active
## Create TURNED-ON cycles

In [40]:
# Correct period:
df_correct = df['2020-11-01':'2020-11-30']
df_correct[(df_correct.current_1 > 0) & (df_correct.current_2 == 0)]

Unnamed: 0_level_0,water_level,outflow_level,current_tot,current_1,current_2
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-11-01 01:19:25.855,1.25,0.00,7.4,7.4,0.0
2020-11-01 01:19:30.972,1.25,0.00,8.6,8.6,0.0
2020-11-01 01:19:32.579,1.22,55.44,8.6,8.6,0.0
2020-11-01 01:19:37.696,1.20,55.44,8.6,8.6,0.0
2020-11-01 01:19:42.672,1.17,55.44,8.6,8.6,0.0
...,...,...,...,...,...
2020-11-24 08:55:43.316,0.57,59.04,8.6,8.6,0.0
2020-11-24 08:55:54.829,0.51,59.04,8.6,8.6,0.0
2020-11-24 10:05:28.314,0.21,0.00,21.8,21.8,0.0
2020-11-24 10:05:29.609,0.21,59.04,21.8,21.8,0.0


In [48]:
current_tolerance = 0
current_change_threshold = 1

outflow_tolerance = 0
outflow_delay, outflow_change_threshold = 2, 10

# Fields added as column to DF
cycle_nrs = []  # Which cycle number. NaN means no cycle
cycle_steps = []  # How far are we progressed in current cycle
cycle_states = []  # [P1], [P2], or [P1+P2]
cycle_transitions = []  # [][P1], [][P2], [][P1+P2], [P1][P1+P2], [P2][P1+P2], [P1+P2][P1], [P1+P2][P2], [P1][], [P2][], [P1+P2][],
errors = []

cycle_count = 0
cycle_step = 0

def append(cycle_nr, cycle_step_l, cycle_state, cycle_transition):
    cycle_nrs.append(cycle_nr)
    cycle_steps.append(cycle_step_l)
    cycle_states.append(cycle_state)
    cycle_transitions.append(cycle_transition)
    errors.append(None)
    global cycle_step
    cycle_step += 1

def append_error(e):
    cycle_nrs.append(np.nan)
    cycle_steps.append(np.nan)
    cycle_states.append(None)
    cycle_transitions.append(None)
    errors.append(e)
    global cycle_step
    cycle_step += 1


def flowing_current(c):
    return c > current_tolerance


def flowing_outflow(o):
    return o > outflow_tolerance


for ix, (date, now) in tqdm(enumerate(df.iterrows()), total=len(df)):
    # Skip first 3 rows and last row  => We cannot compare to previous rows
    if (ix < 2) | (ix > len(df)-2):
        append(np.nan, np.nan, "", "")
    previous, previous_l2, previous_l3 = df.iloc[ix - 1], df.iloc[ix - 2], df.iloc[ix - 3]
    next, next_l2, next_l3 = df.iloc[ix + 1],df.iloc[ix + 2],df.iloc[ix + 3]

    # No outflow and motors are disabled
    if (now.current_tot == 0) & (now.outflow_level == 0):
        append(np.nan, np.nan, "", "")

    # Check for State Changes:
    # ==================================================================================
    # Transition:  [][P1], [][P2], [][P1+P2], [P1][P1+P2], [P2][P1+P2],  # CURRENT INCREASE
    if flowing_current(now.current_tot) & (now.current_tot - previous.current_tot > current_change_threshold):
        if flowing_outflow(now.outflow_level):
            append_error("Expected delay in outflow not found")
            continue
        if not flowing_outflow(next_l3.outflow_level):
            append_error("Outflow does not start after 3 samples")
            continue
        # Transition:  [][P1], [][P2], [][P1+P2]
        if not flowing_current(previous.current_tot):
            if not (flowing_current(now.current_1) | flowing_current(now.current_2)):
                append_error("Increase in current, but pumps are disabled")
                continue
            # Transition:  [][P1]
            if (flowing_current(now.current_1)) & (not flowing_current(now.current_2)):
                append(cycle_count, cycle_step, "[P1]","[][P1]")
            # Transition:  [][P2]
            elif (flowing_current(now.current_2)) & (not flowing_current(now.current_1)):
                append(cycle_count, cycle_step, "[P2]","[][P2]")
            # Transition:  [][P1+P2]
            elif (flowing_current(now.current_2)) & (flowing_current(now.current_1)):
                append(cycle_count, cycle_step, "[P1,P2]","[][P1,P2]")
            else:
                raise Exception(f"Impossible state with {date}")

        # Transition:  [P1][P1+P2], [P2][P1+P2]
        else:
            both_draw_current = flowing_current(now.current_1) & flowing_current(previous.current_2)
            if not both_draw_current:
                append_error("Current increased but a second pump is not activated")
                continue
            # Transition:  [P1][P1+P2]
            if (flowing_current(previous.current_1)) & (not flowing_current(previous.current_2)):
                append(cycle_count, cycle_step, "[P1,P2]","[P1][P1,P2]")
            # Transition:  [P2][P1+P2]
            elif (flowing_current(previous.current_2)) & (not flowing_current(previous.current_1)):
                append(cycle_count, cycle_step, "[P1,P2]","[P2][P1,P2]")
            else:
                raise Exception(f"Impossible state with {date}")
    # Transition:  [P1+P2][P1], [P1+P2][P2]                     # CURRENT DECREASE
    elif flowing_current(now.current_tot) & (previous.current_tot - now.current_tot > current_change_threshold):
        if flowing_current(now.current_1) & flowing_current(now.current_2):
            append_error("Current decreased but both pumps still in operation")
            continue
        # Transition:  [P1+P2][P1]
        if (flowing_current(now.current_1)) & (not flowing_current(now.current_2)):
            append(cycle_count, cycle_step, "[P1]","[P1,P2][P1]")
        if (flowing_current(now.current_2)) & (not flowing_current(now.current_1)):
            append(cycle_count, cycle_step, "[P2]","[P1,P2][P2]")
        else:
            raise Exception(f"Impossible state with {date}")
    # No Transition: Stable on P1, P2 or P1,P2
    elif flowing_current(now.current_tot):  # CURRENT STABLE ON
        if not abs(previous.current_tot - now.current_tot) <= current_change_threshold:
            append_error("Current is fluctuating")
            continue
        if not flowing_outflow(now.outflow_level):
            append_error("Pump(s) is running dry")
            continue
        if not next.water_level < now.water_level:
            append_error('Water level does not decrease while pumps are on for a while')
            continue
        if (flowing_current(now.current_1)) & (not flowing_current(now.current_2)):
            append(cycle_count, cycle_step, "[P1]","")
        elif (flowing_current(now.current_2)) & (not flowing_current(now.current_1)):
            append(cycle_count, cycle_step, "[P2]","")
        elif (flowing_current(now.current_2)) & (flowing_current(now.current_1)):
            append(cycle_count, cycle_step, "[P1,P2]","")
        else:
            raise Exception(f"Impossible state with {date}")
    # Transition:  [P1][], [P2][], [P1+P2][]
    elif not flowing_current(now.current_tot):
        # Transition:  [P1][], [P2][], [P1+P2][]
        if flowing_outflow(now.outflow_level):
            if not flowing_current(previous_l3.current_tot):
                append_error("Pump is emitting water but operation stopped long time ago")
                continue
            # Transition:  [P1][]
            if (flowing_current(previous_l3.current_1)) & (not flowing_current(previous_l3.current_2)):
                append(cycle_count, cycle_step, "[P1]","[P1][]")
            # Transition:  [P2][]
            elif (flowing_current(previous_l3.current_2)) & (not flowing_current(previous_l3.current_1)):
                append(cycle_count, cycle_step, "[P2]","[P2][]")
            # Transition:  [P1+P2][]
            elif (flowing_current(previous_l3.current_1)) & (flowing_current(previous_l3.current_2)):
                append(cycle_count, cycle_step, "[P1,P2]","[P1,P2][]")
            else:
                raise Exception(f"Impossible state with {date}")
        else:
            if flowing_outflow(previous.outflow_level):
                cycle_count += 1
                append(np.nan, np.nan, None, None)
            else:
                if not next.water_level > now.water_level:
                    append_error("Water level should be rising")
                    continue
                append(np.nan, np.nan, None, None)
    else:
        raise Exception(f"Impossible state with {date}")


  1%|          | 7527/1407518 [00:01<04:27, 5242.25it/s]


Exception: Impossible state with 2020-05-04 17:05:27.642000

100%|█████████▉| 1407515/1407518 [04:35<00:00, 5100.63it/s]


IndexError: single positional indexer is out-of-bounds

In [None]:
df_cycles = pd.DataFrame()