In [31]:
import numpy as np
import pandas as pd
from prometheus_api_client.utils import parse_datetime
from prometheus_api_client import PrometheusConnect
from utils import query_to_df
from prophet.plot import plot_plotly, plot_components_plotly
from train import fit_predict
from feature_engine.outliers import Winsorizer

[prometheus_api_client](https://github.com/AICoE/prometheus-api-client-python) module is a Python wrapper for the Prometheus http api and some tools for metrics processing.

first we need to connect to prometheus server:

In [12]:

prom = PrometheusConnect(url ="http://localhost:9090", disable_ssl=True)

# Get the list of all the metrics that the Prometheus host scrapes
prom.all_metrics()

['go_gc_duration_seconds',
 'go_gc_duration_seconds_count',
 'go_gc_duration_seconds_sum',
 'go_goroutines',
 'go_info',
 'go_memstats_alloc_bytes',
 'go_memstats_alloc_bytes_total',
 'go_memstats_buck_hash_sys_bytes',
 'go_memstats_frees_total',
 'go_memstats_gc_cpu_fraction',
 'go_memstats_gc_sys_bytes',
 'go_memstats_heap_alloc_bytes',
 'go_memstats_heap_idle_bytes',
 'go_memstats_heap_inuse_bytes',
 'go_memstats_heap_objects',
 'go_memstats_heap_released_bytes',
 'go_memstats_heap_sys_bytes',
 'go_memstats_last_gc_time_seconds',
 'go_memstats_lookups_total',
 'go_memstats_mallocs_total',
 'go_memstats_mcache_inuse_bytes',
 'go_memstats_mcache_sys_bytes',
 'go_memstats_mspan_inuse_bytes',
 'go_memstats_mspan_sys_bytes',
 'go_memstats_next_gc_bytes',
 'go_memstats_other_sys_bytes',
 'go_memstats_stack_inuse_bytes',
 'go_memstats_stack_sys_bytes',
 'go_memstats_sys_bytes',
 'go_threads',
 'net_conntrack_dialer_conn_attempted_total',
 'net_conntrack_dialer_conn_closed_total',
 'net_con

To get metric data into pandas dataframe we can use the MetricRangeDataFrame class:

In [13]:
from prometheus_api_client import MetricSnapshotDataFrame, MetricRangeDataFrame
import datetime as dt

metric_data = prom.get_metric_range_data(
    metric_name='up',
    start_time=(dt.datetime.now() - dt.timedelta(minutes=10)),
    end_time=dt.datetime.now(),
)
metric_df = MetricRangeDataFrame(metric_data)
metric_df.head()

Unnamed: 0_level_0,__name__,instance,job,value
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1638136000.0,up,localhost:9090,prometheus,1
1638136000.0,up,localhost:9090,prometheus,1
1638136000.0,up,localhost:9090,prometheus,1
1638136000.0,up,localhost:9090,prometheus,1
1638136000.0,up,localhost:9090,prometheus,1


and change the timestamp from unix to datetime:

In [14]:
metric_df.index = pd.to_datetime(metric_df.index,unit='s')
metric_df.head()

Unnamed: 0_level_0,__name__,instance,job,value
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-11-28 21:48:23.032000065,up,localhost:9090,prometheus,1
2021-11-28 21:48:28.026999950,up,localhost:9090,prometheus,1
2021-11-28 21:48:33.025000095,up,localhost:9090,prometheus,1
2021-11-28 21:48:38.029999971,up,localhost:9090,prometheus,1
2021-11-28 21:48:43.030999899,up,localhost:9090,prometheus,1


but in the more general case we want to use query data and not the raw metric. for this we use custom_query_range():

In [15]:
start_time = parse_datetime("30d")
end_time = parse_datetime("now")

query_range = prom.custom_query_range("rate(go_memstats_gc_cpu_fraction[10m])",start_time=start_time,
    end_time=end_time,
    step='5m')

we can see that MetricRangeDataFrame save the df with timestamps in the index and 2 columns for instance and job:

In [16]:
MetricRangeDataFrame(query_range)

Unnamed: 0_level_0,instance,job,value
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1636839200,localhost:9090,prometheus,0.0000024046837699224218
1636839500,localhost:9090,prometheus,0.0000025355528041467042
1636839800,localhost:9090,prometheus,3.274798821084284e-07
1636840100,localhost:9090,prometheus,2.5703413736382065e-07
1636840400,localhost:9090,prometheus,1.6321924369481857e-07
...,...,...,...
1638135500,localhost:9090,prometheus,6.697623212413512e-09
1638135800,localhost:9090,prometheus,6.69974493549595e-09
1638136100,localhost:9090,prometheus,6.711859308890429e-09
1638136400,localhost:9090,prometheus,1.3355198313173104e-08


if we look at the query_range structure...

In [17]:
query_range

[{'metric': {'instance': 'localhost:9090', 'job': 'prometheus'},
  'values': [[1636839200, '0.0000024046837699224218'],
   [1636839500, '0.0000025355528041467042'],
   [1636839800, '3.274798821084284e-07'],
   [1636840100, '2.5703413736382065e-07'],
   [1636840400, '1.6321924369481857e-07'],
   [1636840700, '1.1293591011648919e-07'],
   [1636841000, '1.1068195372564558e-07'],
   [1636841300, '1.4176440049274185e-07'],
   [1636841600, '1.344690456752908e-07'],
   [1636841900, '6.73697136398778e-08'],
   [1636842200, '2.1382252769906293e-09'],
   [1636852700, '1.239797931401309e-08'],
   [1636853000, '1.9883729116705133e-08'],
   [1636853300, '9.867170434296461e-09'],
   [1636862000, '1.75213364363055e-08'],
   [1636862300, '2.5492716184189747e-08'],
   [1636862600, '8.83785869590365e-09'],
   [1636862900, '1.7947889085587307e-08'],
   [1636863200, '2.6661961750923694e-08'],
   [1636863500, '1.815180270552482e-08'],
   [1636863800, '3.61451749514204e-08'],
   [1636864100, '3.602522469619

we can build the df directy on it using only pandas DataFrame constructor to get the desired 2 columns df as neede in fbProphet:

In [18]:
df = pd.DataFrame(query_range[0]['values'],columns=['ds','y'])
df['ds'] = pd.to_datetime(df['ds'],unit='s').astype('datetime64[ns, Asia/Jerusalem]').dt.tz_localize(None)
df['y'] = df['y'].astype(float)
df

Unnamed: 0,ds,y
0,2021-11-13 23:33:20,2.404684e-06
1,2021-11-13 23:38:20,2.535553e-06
2,2021-11-13 23:43:20,3.274799e-07
3,2021-11-13 23:48:20,2.570341e-07
4,2021-11-13 23:53:20,1.632192e-07
...,...,...
616,2021-11-28 23:38:20,6.697623e-09
617,2021-11-28 23:43:20,6.699745e-09
618,2021-11-28 23:48:20,6.711859e-09
619,2021-11-28 23:53:20,1.335520e-08


test reading the queries from txt files:

In [19]:
with open('test_queries.txt') as f:
    queries = f.read().splitlines()

In [20]:
queries

['go_memstats_alloc_bytes', 'rate(go_memstats_alloc_bytes_total[1m])']

test utils.query_to_df():

In [21]:
start_time = "2021-11-20 12:00:00"
end_time = "2021-11-21 12:00:00"
step = "15s"

df = query_to_df(prom, 'go_memstats_alloc_bytes', start_time, end_time, step)


The localize method is no longer necessary, as this time zone supports the fold attribute (PEP 495). For more details on migrating to a PEP 495-compliant implementation, see https://pytz-deprecation-shim.readthedocs.io/en/latest/migration.html



test fit_predict with non-default hourly seasonality:

In [22]:
season = {'hourly':[0.166667,10]}
m, forecast = fit_predict(df, periods=172, freq='30s', season=season)

INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


In [23]:
plot_plotly(m, forecast)

check that datetime column can be compared with full text timestamp that includes time:

In [24]:
forecast['ds'] >= "2021-11-21 12:15:00"

0       False
1       False
2       False
3       False
4       False
        ...  
5928     True
5929     True
5930     True
5931     True
5932     True
Name: ds, Length: 5933, dtype: bool

check the influence of winsorizing on the forecasting:

In [32]:
wins = Winsorizer(capping_method='iqr',tail='both', fold=1.5)
df['y'] = wins.fit_transform(pd.DataFrame(df['y']))
m, forecast = fit_predict(df, periods=172, freq='30s', season=season)
plot_plotly(m, forecast)

INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


we can see that wonsorizing cause the uncertinty intervals to be significantly more narower. However, not sure that this is good, again becasue we are focusing on precision.. specifically not sure that we want to alert on the points at around 7:45 in 21/11/2021. we want to detect only significant anomaly points like the one that were winsorized, but those points anyway will be detected, also without winsorizing, so again we come to conclusion that probably winsorizing is not needed for our purpose.