# viz-4b-anomaly-detection-annotation.ipynb

This notebook adds user created annotations as an additional layer to the visualization. It's a great example of how VEGA is able to fetch data from different sources/indices, something not easily doable with Kibana's own visualizations.


In [1]:
import datetime
import altair as alt
import eland as ed
import json
import numpy as np
import matplotlib.pyplot as plt
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [2]:
annotation_index_name = '.ml-annotations-read'
index_name = '.ml-anomalies-fq_single_count_15m'
vis_name = 'ml-anomalies-fq'

In [3]:
ed_df = ed.DataFrame('localhost:9200', index_name)
ed_df_number = ed_df.select_dtypes(include=np.number)
ed_df_number.shape

(976, 66)

In [4]:
url_annotation = 'http://localhost:9200/'+annotation_index_name+'/_search?size=10000'
url_data_annotation = alt.Data(
    url=url_annotation,
    format=alt.DataFormat(property='hits.hits',type='json'),
    name='ml-annotations'
)

fields_annotation = [
    'annotation',
    'timestamp',
    'end_timestamp',
    'job_id',
]

rename_dict_annotation = dict((a, 'datum._source.'+a) for a in fields_annotation)

base_annotation = alt.Chart(url_data_annotation).transform_calculate(**rename_dict_annotation)

annotations = base_annotation.transform_filter(
    alt.datum.job_id == "fq_single_count_15m"
).transform_calculate(
    y="350",
    y2="0"
).mark_rect(
    color="#369",
    opacity=0.15,
).encode(
    alt.X('timestamp:T'),
    alt.X2('end_timestamp:T'),
    alt.Y('y:Q'),
    alt.Y2('y2:Q'),
    tooltip=['annotation:N'],
)

annotations

In [5]:
url = 'http://localhost:9200/'+index_name+'/_search?size=10000'
url_data = alt.Data(
    url=url,
    format=alt.DataFormat(property='hits.hits',type='json'),
    name='ml-anomalies'
)

fields = [
    'anomaly_score',
    'actual',
    'typical',
    'event_count',
    'model_lower',
    'model_median',
    'model_upper',
    'timestamp',
    'result_type'
]

rename_dict = dict((a, 'datum._source.'+a) for a in fields)

base = alt.Chart(url_data).transform_calculate(**rename_dict)

url_chart = base.transform_fold(
    fields,
    as_=['attribute', 'value']
).mark_bar().encode(
    alt.X('value:Q', bin=True, title=''),
    alt.Y('count()', title=''),
    tooltip=[
        alt.Tooltip('value:Q', bin=True, title='x'),
        alt.Tooltip('count()', title='y')
    ]
).properties(
    width=100,
    height=100
)

url_charts = alt.ConcatChart(
    concat=[
      url_chart.transform_filter(alt.datum.attribute == attribute).properties(title=attribute)
      for attribute in sorted(fields)
    ],
    columns=4
).resolve_axis(
    x='independent',
    y='independent'
).resolve_scale(
    x='independent', 
    y='independent'
)

url_charts

In [6]:
# custom: ['#1f77b4', 'lightgray', 'gray']
# ml:     ['#32a7c2', '#c8e6ef', '#bbd7df']

colors = {'model_bounds':'lightgray','model_median':'gray','actual':'#1f77b4'}

base = alt.Chart(url_data, width=800, height=300).transform_calculate(**rename_dict)

model_bounds = base.mark_area(color=colors['model_bounds'], opacity=0.5).transform_filter(
    alt.datum.result_type == 'model_plot'
).encode(
    alt.X('timestamp:T', scale=alt.Scale(zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
    alt.Y('mean(model_upper):Q'),
    alt.Y2('mean(model_lower):Q'),
)

model_median = base.mark_line(color=colors['model_median'], opacity=0.5, strokeJoin='round').transform_filter(
    alt.datum.result_type == 'model_plot'
).encode(
    alt.X('timestamp:T', scale=alt.Scale(zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
    alt.Y('mean(model_median):Q', title='')
)

actual = base.mark_line(color=colors['actual'], strokeJoin='round').transform_filter(
    alt.datum.result_type == 'model_plot'
).encode(
    alt.X('timestamp:T', scale=alt.Scale(zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
    alt.Y('mean(actual):Q')
)

anomalies = base.mark_point(color='red', size=60).transform_aggregate(
    mean_actual='mean(actual)',
    mean_anomaly_score='mean(anomaly_score)',
    groupby=["timestamp"]
).transform_filter(
    alt.datum.mean_anomaly_score > 0
).encode(
    alt.X('timestamp:T', scale=alt.Scale(zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
    alt.Y('mean_actual:Q')
)

tooltip = base.mark_circle(opacity=0, size=100).transform_aggregate(
    mean_actual='mean(actual)',
    mean_anomaly_score='mean(anomaly_score)',
    groupby=["timestamp"]
).encode(
    alt.X('timestamp:T', scale=alt.Scale(zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
    alt.Y('mean_actual:Q'),
    tooltip=['mean_actual:Q', 'mean_anomaly_score:Q', 'timestamp:T']
)

chart = (annotations + (model_bounds + model_median + actual + anomalies + tooltip)).configure_axis(
    grid=True,
    gridColor="#eee",
    domainColor="#ddd",
    tickColor="#ddd",
    labelColor="gray",
    labelBound=True,
).configure_view(
    strokeWidth=0
).configure_title(
    fontSize=14,
    fontWeight='bold',
    anchor='start',
    color='gray'
)

chart

In [7]:
json.loads(chart.to_json())

{'$schema': 'https://vega.github.io/schema/vega-lite/v4.8.1.json',
 'config': {'axis': {'domainColor': '#ddd',
   'grid': True,
   'gridColor': '#eee',
   'labelBound': True,
   'labelColor': 'gray',
   'tickColor': '#ddd'},
  'title': {'anchor': 'start',
   'color': 'gray',
   'fontSize': 14,
   'fontWeight': 'bold'},
  'view': {'continuousHeight': 300, 'continuousWidth': 400, 'strokeWidth': 0}},
 'layer': [{'data': {'format': {'property': 'hits.hits', 'type': 'json'},
    'name': 'ml-annotations',
    'url': 'http://localhost:9200/.ml-annotations-read/_search?size=10000'},
   'encoding': {'tooltip': [{'field': 'annotation', 'type': 'nominal'}],
    'x': {'field': 'timestamp', 'type': 'temporal'},
    'x2': {'field': 'end_timestamp'},
    'y': {'field': 'y', 'type': 'quantitative'},
    'y2': {'field': 'y2'}},
   'mark': {'color': '#369', 'opacity': 0.15, 'type': 'rect'},
   'transform': [{'as': 'annotation', 'calculate': 'datum._source.annotation'},
    {'as': 'timestamp', 'calculate

In [8]:
from elasticsearch import Elasticsearch 
client=Elasticsearch([{'host':'localhost','port':9200}])
resultSize=10000
visName='ml-anomalies-fq-annotations'

chart_json = json.loads(chart.to_json())
chart_json['layer'][0]['data']['url'] = {
    "%context%": True,
    "%timefield%": "timestamp",
    "index": annotation_index_name,
    "body": {
        "size": resultSize
    }
}
chart_json['layer'][1]['data']['url'] = {
    "%context%": True,
    "%timefield%": "timestamp",
    "index": index_name,
    "body": {
        "size": resultSize
    }
}

visState = {
  "type": "vega",
  "aggs": [],
  "params": {
    "spec": json.dumps(chart_json, sort_keys=True, indent=4, separators=(',', ': ')),
  },
  "title": visName
}

visSavedObject={
    "visualization" : {
      "title" : visName,
      "visState" : json.dumps(visState, sort_keys=True, indent=4, separators=(',', ': ')),
      "uiStateJSON" : "{}",
      "description" : "",
      "version" : 1,
      "kibanaSavedObjectMeta" : {
        "searchSourceJSON" : json.dumps({
          "query": {
            "language": "kuery",
            "query": ""
          },
          "filter": []
        }),
      }
    },
    "type" : "visualization",
    "references" : [ ],
    "migrationVersion" : {
      "visualization" : "7.7.0"
    },
    "updated_at" : datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S.000Z")
}

client.index(index='.kibana',id='visualization:'+visName,body=visSavedObject)

{'_index': '.kibana_3',
 '_id': 'visualization:ml-anomalies-fq-annotations',
 '_version': 3,
 'result': 'updated',
 '_shards': {'total': 1, 'successful': 1, 'failed': 0},
 '_seq_no': 445,
 '_primary_term': 2}