In [34]:
import sched
import time
import datetime as dt
from prometheus_api_client.utils import parse_datetime
import pandas as pd
from detect import get_forecast_slice, is_anomaly
import numpy as np

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


one option is to schedule the detector inside the process, such that the process will run "forever" and each minute will execute fresh detection. For that here are some experiments with sched module:

In [4]:
s = sched.scheduler(time.time, time.sleep)
def print_time(a='default'):
    print("From print_time", time.time(), a)

In [15]:
time.mktime(dt.datetime(2021,11,26,12,40,5).timetuple())

1637923205.0

In [16]:
start = time.mktime(dt.datetime(2021,11,26,13,1).timetuple())
for i in range(5):
    s.enterabs(start, 1, print_time)
    start = start + 60
s.run()


From print_time 1637924460.002396 default
From print_time 1637924520.0050502 default
From print_time 1637924580.0155327 default
From print_time 1637924640.0046804 default
From print_time 1637924700.0149934 default


We can see that it is possible. However, running the process forever seems not to be a good pattern. A beeter one will be to use external scheduler like crontab and run the process each minute. In this way each process run independently for short time, hence we get more stability.

Note that we parse dates immediatly in the start of the script to extract the right time range from the forecast file. However, after that we need to read from prometheus with query_to_df() that expect to get strings. However it seems that parse_datetime(), the inner function in query_to_df() know how to deal with datetime objects so this is non-issue:

In [18]:
parse_datetime(pd.to_datetime("2021-11-23"))

  date_obj = stz.localize(date_obj)


datetime.datetime(2021, 11, 23, 0, 0)

test get_forecast_slice:

In [47]:
forecast = "prometheus_tsdb_head_chunks"
start_time = "2021-11-21 12:30:00"
end_time = "2021-11-21 12:32:15"
step='15s'

pred = get_forecast_slice(forecast, start_time=start_time, end_time=end_time, step=step)
pred

  date_obj = stz.localize(date_obj)


Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
0,2021-11-21 12:30:00,9144.699952,7334.268057,10952.135716
1,2021-11-21 12:30:15,9143.165769,7618.407557,10670.317276
2,2021-11-21 12:30:30,9143.165769,7618.407557,10670.317276
3,2021-11-21 12:30:45,9137.557913,7491.144256,10904.603329
4,2021-11-21 12:31:00,9137.557913,7491.144256,10904.603329
5,2021-11-21 12:31:15,9128.453239,7454.625331,10800.2292
6,2021-11-21 12:31:30,9128.453239,7454.625331,10800.2292
7,2021-11-21 12:31:45,9116.514968,7433.927557,10726.386351
8,2021-11-21 12:32:00,9116.514968,7433.927557,10726.386351
9,2021-11-21 12:32:15,9102.482171,7313.61982,10515.466608


test is_anomaly:

In [46]:
pred_min = pred.yhat_lower.min()
pred_mean = pred.yhat.mean()
pred_max = pred.yhat_upper.max()

no_anomaly1 = pd.DataFrame(np.array([pred_mean]*10),columns=['y'])
no_anomaly2 = pd.DataFrame(np.array([pred_mean]*6+[pred_min-1]*4),columns=['y'])
no_anomaly3 = pd.DataFrame(np.array([pred_mean]*6+[pred_max + 1]*4),columns=['y'])

upper = pd.DataFrame(np.array([pred_mean]*5+[pred_max+1]*5),columns=['y'])
lower = pd.DataFrame(np.array([pred_mean]*5+[pred_min-1]*5),columns=['y'])

print("no_anomaly 1, check-upper:", is_anomaly(no_anomaly1, pred))
print("no_anomaly 1, check-lower:", is_anomaly(no_anomaly1, pred, anomaly_type="lower"))
print("no_anomaly 1, check-both:", is_anomaly(no_anomaly1, pred, anomaly_type="both"))
print("no_anomaly 2:", is_anomaly(no_anomaly2, pred))
print("no_anomaly 2, check-lower:", is_anomaly(no_anomaly2, pred, anomaly_type="lower"))
print("no_anomaly 2, check-both:", is_anomaly(no_anomaly2, pred, anomaly_type="both"))
print("no_anomaly 3:", is_anomaly(no_anomaly3, pred))
print("no_anomaly 3, check-lower:", is_anomaly(no_anomaly3, pred, anomaly_type="lower"))
print("no_anomaly 3, check-both:", is_anomaly(no_anomaly3, pred, anomaly_type="both"))

print("actual-upper, check-upper:", is_anomaly(upper, pred))
print("actual-upper, check-lower:", is_anomaly(upper, pred, anomaly_type="lower"))
print("actual-upper, check-both:", is_anomaly(upper, pred, anomaly_type="both"))

print("actual-lower, check-upper:", is_anomaly(lower, pred))
print("actual-lower, check-lower:", is_anomaly(lower, pred, anomaly_type="lower"))
print("actual-lower, check-both:", is_anomaly(lower, pred, anomaly_type="both"))


no_anomaly 1, check-upper: 0
no_anomaly 1, check-lower: 0
no_anomaly 1, check-both: 0
no_anomaly 2: 0
no_anomaly 2, check-lower: 0
no_anomaly 2, check-both: 0
no_anomaly 3: 0
no_anomaly 3, check-lower: 0
no_anomaly 3, check-both: 0
actual-upper, check-upper: 1
actual-upper, check-lower: 0
actual-upper, check-both: 1
actual-lower, check-upper: 0
actual-lower, check-lower: -1
actual-lower, check-both: -1
