# Azure Monitor Log Analytics Workspace Summary

Get a birds-eye view of the utilization and cost of your Log Analytics workspaces. 

## Parameters

**resource_filter**: Optional KQL where clause to limit Azure Monitor workspace resources in scope.

In [None]:
resource_filter = None

## Setup

In [None]:
from azmeta.access import resource_graph, monitor_logs, list_subscription_ids
from azmeta.access.billing import full_day_timespan, create_basic_filter, create_cost_query, GroupByColumn, GranularityType, query_cost_dataframe
from azmeta.access.kusto import serialize_to_kql
import azmeta.notebook.interactive as azmi
import pandas as pd
import itertools
from datetime import datetime, timedelta

# Log Analytics Workspace Selection

Retrieve all the workspaces selected for analysis using Azure Resource Graph.

In [None]:
context = azmi.resource_context()
all_subscription_ids = list_subscription_ids(context.subscriptions)
workspaces = resource_graph.query_dataframe(all_subscription_ids, f"""
Resources 
| where type == 'microsoft.operationalinsights/workspaces'
| where {resource_filter if resource_filter else "1 == 1"}
| join kind=leftouter (ResourceContainers | where type == 'microsoft.resources/subscriptions' | project subscriptionName=name, subscriptionId) on subscriptionId
| project subscriptionName, resourceGroup, name, sku = properties.sku.name, reservedGB = properties.sku.capacityReservationLevel, storeDays = properties.retentionInDays, id = properties.customerId, resourceId = tolower(id)
| order by subscriptionName asc
""").set_index('id')

In [None]:
workspaces.style.hide_columns('resourceId')

# Workspace Utilization

Retrieves the workspace utilization metrics from Log Analytics metadata tables.

In [None]:
today = datetime.today()
yesterday = today - timedelta(days=1)
yesterday_begin, yesterday_end = full_day_timespan(yesterday, end_midnight=True)

thirtyday = today - timedelta(days=30)
thirty_begin, thirty_end = full_day_timespan(thirtyday, yesterday, end_midnight=True)

In [None]:
def la_query(query):
    return monitor_logs.query_dataframe(query, workspaces.index.to_list()).primary_result.set_index('id')

df_lfd_volume = la_query(f"""
Usage
| where TimeGenerated > {serialize_to_kql(yesterday_begin)} and TimeGenerated <= {serialize_to_kql(yesterday_end)}  
| where IsBillable == true
| summarize lastFullDayGB = sum(Quantity) / 1000 by TenantId
| project-rename id = TenantId
""")

In [None]:
df_30d_volume = la_query(f"""
Usage
| where TimeGenerated > {serialize_to_kql(thirty_begin)} and TimeGenerated < {serialize_to_kql(thirty_end)}  
| where IsBillable == true
| summarize fullDayGB = sum(Quantity) / 1000 by TenantId, bin(TimeGenerated, 1d)
| summarize medianDayGB = percentile(fullDayGB, 50), minDayGB = min(fullDayGB) by TenantId 
| project-rename id = TenantId
""")

In [None]:
df_lfd_nodes = la_query(f"""
Heartbeat
| where TimeGenerated > {serialize_to_kql(yesterday_begin)} and TimeGenerated <= {serialize_to_kql(yesterday_end)}  
| summarize by SourceComputerId, TenantId
| summarize nodesReporting = count() by TenantId
| project-rename id = TenantId 
""")

# Workspace Cost

Retrieve the workspace cost information from Azure Cost Management.

In [None]:
workspace_resource_ids = workspaces.resourceId.to_list()

In [None]:
query_filter = create_basic_filter(
    resource_ids=workspace_resource_ids
)
query = create_cost_query(
    full_day_timespan(thirtyday, yesterday),
    grouping=GroupByColumn("ResourceId"),
    filter=query_filter,
    granularity=GranularityType.daily,
)

In [None]:
cost_df = query_cost_dataframe(context.default_billing_account, query)

In [None]:
total_cost = cost_df.groupby('ResourceId').sum().Cost
median_cost = cost_df.groupby('ResourceId').median().Cost
lfd_cost = cost_df[cost_df.UsageDate == cost_df.UsageDate.max()].set_index('ResourceId').Cost
cost_agg_df = pd.DataFrame({'thirty_day_cost': total_cost, 'thirty_day_median_cost': median_cost, 'last_full_day_cost': lfd_cost })

# Dashboard

Top cost workspaces.

In [None]:
full = workspaces \
    .join([df_lfd_volume, df_30d_volume, df_lfd_nodes]) \
    .join(cost_agg_df, on='resourceId')
full = full.assign(full_day_avg_cost=full.last_full_day_cost/full.nodesReporting)
full = full.sort_values(['medianDayGB', 'thirty_day_cost'], ascending=[True,False], key=lambda x:pd.isna(x) if x.name == 'medianDayGB' else x)

In [None]:
def build_header_style(col_groups):
    start = 0
    styles = []
    palette = { 'Config': '#f6f6f6', 'Thirty Day': '#eae9e9', 'Last Full Day': '#d4d7dd'}
    for group in itertools.groupby(col_groups, lambda c:c[0]):
        styles.append({'selector': f'.col_heading.level0.col{start}', 'props': [('background-color', palette[group[0]])]})
        group_len = len(tuple(group[1]))
        for j in range(group_len):
            styles.append({'selector': f'.col_heading.level1.col{start + j}', 'props': [('background-color', palette[group[0]])]})
        start += group_len
    return styles

fulls = full.copy().drop(columns='resourceId')
fulls.columns = pd.MultiIndex.from_tuples([*itertools.product(['Config'], ['Subscription Name', 'Resource Group', 'Name', 'SKU', 'Reserved GB', 'Retention (days)']),  ('Last Full Day', 'Total GB'),  ('Thirty Day', 'Median GB'), ('Thirty Day', 'Min GB'), ('Last Full Day', 'Nodes Reporting'), ('Thirty Day', 'Total Cost'), ('Thirty Day', 'Median Cost'), ('Last Full Day', 'Total Cost'), ('Last Full Day', 'Avg Cost Per Node')])
styler = fulls.style.hide_index() \
    .format('${:,.2f}', na_rep='N/A', subset=[x for x in fulls.columns if 'Cost' in x[1]]) \
    .format('{:,.1f}', na_rep='N/A', subset=[x for x in fulls.columns if  'GB' in x[1] and 'Config' != x[0]]) \
    .set_table_styles(build_header_style(fulls.columns))
for column in [x for x in fulls.columns if 'Cost' in x[1] or 'GB' in x[1] and 'Config' != x[0]]:
    styler.background_gradient(subset=[column])
styler