In [2]:
# IMPORTS
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from collections import defaultdict
import json
import statistics as stat
from collections import deque

In [3]:
# READ IN DATA AS PANDAS DATAFRAME
filename = 'data/MEBL3/MEBL3_Pressure-data-2024-06-28 09_40_08.csv'
pressure_df = pd.read_csv(filename, parse_dates=['Time']).fillna(0)    
print(f"{filename} : file read into a pandas dataframe.")

pressure_df

data/MEBL3/MEBL3_Pressure-data-2024-06-28 09_40_08.csv : file read into a pandas dataframe.


Unnamed: 0,Time,Chamber Pressure,Column Pressure
0,2024-06-14 01:55:29,1.850000e-07,5.550000e-10
1,2024-06-14 01:55:30,1.840000e-07,5.670000e-10
2,2024-06-14 01:55:31,1.830000e-07,5.540000e-10
3,2024-06-14 01:55:32,1.800000e-07,5.610000e-10
4,2024-06-14 01:55:33,1.810000e-07,5.490000e-10
...,...,...,...
1059524,2024-06-27 23:59:54,1.650000e-07,3.380000e-10
1059525,2024-06-27 23:59:55,1.630000e-07,3.370000e-10
1059526,2024-06-27 23:59:56,1.640000e-07,3.410000e-10
1059527,2024-06-27 23:59:57,1.650000e-07,3.380000e-10


In [4]:
# CONVERT TO NUMPY ARRAY
pressure_arr = pressure_df.to_numpy()
pressure_arr

array([[Timestamp('2024-06-14 01:55:29'), 1.85e-07, 5.55e-10],
       [Timestamp('2024-06-14 01:55:30'), 1.84e-07, 5.67e-10],
       [Timestamp('2024-06-14 01:55:31'), 1.83e-07, 5.54e-10],
       ...,
       [Timestamp('2024-06-27 23:59:56'), 1.64e-07, 3.41e-10],
       [Timestamp('2024-06-27 23:59:57'), 1.65e-07, 3.38e-10],
       [Timestamp('2024-06-27 23:59:58'), 1.64e-07, 3.4e-10]],
      dtype=object)

In [5]:
x = np.array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
             [3, 2, 5, 1, 8, 4, 0, 9, 6, 7]])
x

array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
       [3, 2, 5, 1, 8, 4, 0, 9, 6, 7]])

In [6]:
queue = []
for row_idx in range(len(x[0])):
    queue.append(x[1][row_idx])
    if len(queue) < 5:
        continue
    elif len(queue) > 5:
        queue.pop(0)
    print(queue)
    local_mean = stat.median(queue)
    print(f"{local_mean=}")

[3, 2, 5, 1, 8]
local_mean=3
[2, 5, 1, 8, 4]
local_mean=4
[5, 1, 8, 4, 0]
local_mean=4
[1, 8, 4, 0, 9]
local_mean=4
[8, 4, 0, 9, 6]
local_mean=6
[4, 0, 9, 6, 7]
local_mean=6


In [7]:
# PRESSURE SPIKE SCRIPT 1
## strung out because ~10x slower than script 2
'''
for type_idx, type in enumerate(["chamber", "column"]):
    pressure_deque = []
    spike_times = []
    for row_idx in range(len(pressure_arr)): #O(n)
        pressure_value = pressure_arr[row_idx][type_idx+1]
        pressure_deque.append(pressure_value)
        if len(pressure_deque) < 5:
            continue
        elif len(pressure_deque) > 5:
            pressure_deque.pop(0) # don't know runtime of list.pop(), but size of pressure_queue is at most 6 so negligible
        local_mean = np.mean(pressure_deque) # np.mean > stat.mean for speed but not precision
        if pressure_value > 2*local_mean:
            spike_time = str(pressure_arr[row_idx][0])
            print(f"{spike_time=} : {pressure_value}")
            print(f"{pressure_deque=}")
            spike_times.append(spike_time)
    with open(f"data/pressure_spike_times/{type}_spike_times.json", "w") as outfile: 
        json.dump(spike_times, outfile)
'''
# 5 seconds

'\nfor type_idx, type in enumerate(["chamber", "column"]):\n    pressure_deque = []\n    spike_times = []\n    for row_idx in range(len(pressure_arr)): #O(n)\n        pressure_value = pressure_arr[row_idx][type_idx+1]\n        pressure_deque.append(pressure_value)\n        if len(pressure_deque) < 5:\n            continue\n        elif len(pressure_deque) > 5:\n            pressure_deque.pop(0) # don\'t know runtime of list.pop(), but size of pressure_queue is at most 6 so negligible\n        local_mean = np.mean(pressure_deque) # np.mean > stat.mean for speed but not precision\n        if pressure_value > 2*local_mean:\n            spike_time = str(pressure_arr[row_idx][0])\n            print(f"{spike_time=} : {pressure_value}")\n            print(f"{pressure_deque=}")\n            spike_times.append(spike_time)\n    with open(f"data/pressure_spike_times/{type}_spike_times.json", "w") as outfile: \n        json.dump(spike_times, outfile)\n'

In [15]:
# PRESSURE SPIKE SCRIPT 2
## difference: running tally of sum, manually calculate mean from rolling sum (no use of np.mean())
time_range = 5
for type_idx, type in enumerate(["chamber", "column"]):
    spike_times = []
    pressure_deque = deque(maxlen=time_range) # time_range seconds previous to curr pressure val
    sum_pressure = 0
    for row_idx in range(len(pressure_arr)-1): # each loop looks to next value to see if it is a spike
        #print(f"{row_idx=}")
        pressure_value = pressure_arr[row_idx][type_idx + 1]
        #print(f"    {pressure_deque=}")
        #print(f"    {pressure_value=}")
        queue_len = len(pressure_deque)
        if queue_len < time_range: # case succeeds for first 4 iterations only, while deque not filled
            sum_pressure += pressure_value
            pressure_deque.append(pressure_value)
            #print(f"    {pressure_deque=}")
            continue
        elif queue_len == time_range:
            local_mean = sum_pressure / time_range
            #print(f"    {local_mean=}")
        if pressure_value > 2 * local_mean:
            spike_time = str(pressure_arr[row_idx][0])
            print(f"{spike_time=} : {pressure_deque=}")
            print(f"    {pressure_value=}, {local_mean=}")
            spike_times.append(spike_time)
        sum_pressure -= pressure_deque.popleft()
        sum_pressure += pressure_value
        pressure_deque.append(pressure_value)
    with open(f"data/pressure_spike_times/{type}_spike_times.json", "w") as outfile:
        json.dump(spike_times, outfile)

### QUESTION, ARE PRESSURE SPIKES DEFINED BY JUST THE INCREASE OR WILL THERE ALSO BE A SHARP DECREASE AFTERWARDS
### May need to make rolling window threshold computation apply to the middle value in the deque
### Also, 0.6 seconds ?!?!

spike_time='2024-06-15 07:02:55' : pressure_deque=deque([1.83e-07, 1.78e-07, 1.81e-07, 1.84e-07, 1.82e-07], maxlen=5)
    pressure_value=7e-07, local_mean=1.8160000000000184e-07
spike_time='2024-06-17 07:03:28' : pressure_deque=deque([4.75e-10, 4.77e-10, 4.81e-10, 4.74e-10, 4.73e-10], maxlen=5)
    pressure_value=1.04e-08, local_mean=4.759999999999911e-10
spike_time='2024-06-17 16:45:07' : pressure_deque=deque([4.79e-10, 4.85e-10, 4.83e-10, 4.75e-10, 4.79e-10], maxlen=5)
    pressure_value=2.68e-09, local_mean=4.801999999999869e-10
spike_time='2024-06-22 18:50:10' : pressure_deque=deque([4.8e-10, 4.79e-10, 4.75e-10, 4.8e-10, 4.77e-10], maxlen=5)
    pressure_value=4.37e-09, local_mean=4.781999999999483e-10
spike_time='2024-06-23 06:34:54' : pressure_deque=deque([4.75e-10, 4.75e-10, 4.73e-10, 4.81e-10, 4.79e-10], maxlen=5)
    pressure_value=2.36e-09, local_mean=4.765999999999495e-10
spike_time='2024-06-23 06:35:05' : pressure_deque=deque([6.28e-10, 6.13e-10, 6.07e-10, 5.97e-10, 5.82e-1