In [None]:
from azmeta.access import resource_graph, monitor_logs, list_subscription_ids
from azmeta.access.billing import full_day_timespan, create_basic_filter, create_cost_query, GroupByColumn, GranularityType, query_cost_dataframe
from azmeta.access.kusto import serialize_to_kql
import azmeta.notebook.interactive as azmi
import pandas as pd
import itertools
from datetime import datetime, timedelta

# Parameters

**resource_filter**: Optional KQL where clause to limit Azure Monitor workspace resources in scope.

In [None]:
resource_filter = None

# Log Analytics Workspace Selection

Shows all the workspaces selected for analysis.

In [None]:
context = azmi.resource_context()
all_subscription_ids = list_subscription_ids(context.subscriptions)
workspaces = resource_graph.query_dataframe(all_subscription_ids, f"""
Resources 
| where type == 'microsoft.operationalinsights/workspaces'
| where {resource_filter if resource_filter else "1 == 1"}
| join kind=leftouter (ResourceContainers | where type == 'microsoft.resources/subscriptions' | project subscriptionName=name, subscriptionId) on subscriptionId
| project subscriptionName, resourceGroup, name, sku = properties.sku.name, reservedGB = properties.sku.capacityReservationLevel, storeDays = properties.retentionInDays, id = properties.customerId, resourceId = tolower(id)
| order by subscriptionName asc
""").set_index('id')

In [None]:
workspaces.style.hide_columns('resourceId')

# Pull Workspace Utilization

In [None]:
today = datetime.today()
yesterday = today - timedelta(days=1)
yesterday_begin, yesterday_end = full_day_timespan(yesterday, end_midnight=True)

thirtyday = today - timedelta(days=30)
thirty_begin, thirty_end = full_day_timespan(thirtyday, yesterday, end_midnight=True)

In [None]:
def la_query(query):
    return monitor_logs.query_dataframe(query, workspaces.index.to_list()).primary_result.set_index('id')

df_lfd_volume = la_query(f"""
Usage
| where TimeGenerated > {serialize_to_kql(yesterday_begin)} and TimeGenerated <= {serialize_to_kql(yesterday_end)}  
| where IsBillable == true
| summarize lastFullDayGB = sum(Quantity) / 1000 by TenantId
| project-rename id = TenantId
""")

In [None]:
df_30d_volume = la_query(f"""
Usage
| where TimeGenerated > {serialize_to_kql(thirty_begin)} and TimeGenerated <= {serialize_to_kql(thirty_end)}  
| where IsBillable == true
| summarize fullDayGB = sum(Quantity) / 1000 by TenantId, bin(TimeGenerated, 1d)
| summarize medianDayGB = percentile(fullDayGB, 50) by TenantId 
| project-rename id = TenantId
""")

In [None]:
df_lfd_nodes = la_query(f"""
Heartbeat
| where TimeGenerated > {serialize_to_kql(yesterday_begin)} and TimeGenerated <= {serialize_to_kql(yesterday_end)}  
| summarize by SourceComputerId, TenantId
| summarize nodesReporting = count() by TenantId
| project-rename id = TenantId 
""")

# Pull Cost Data

In [None]:
workspace_resource_ids = workspaces.resourceId.to_list()

In [None]:
query_filter = create_basic_filter(
    resource_ids=workspace_resource_ids
)
query = create_cost_query(
    full_day_timespan(thirtyday, yesterday),
    grouping=GroupByColumn("ResourceId"),
    filter=query_filter,
    granularity=GranularityType.daily,
)

In [None]:
cost_df = query_cost_dataframe(context.default_billing_account, query)

In [None]:
total_cost = cost_df.groupby('ResourceId').sum().Cost
median_cost = cost_df.groupby('ResourceId').median().Cost
lfd_cost = cost_df[cost_df.UsageDate == cost_df.UsageDate.max()].set_index('ResourceId').Cost
cost_agg_df = pd.DataFrame({'thirty_day_cost': total_cost, 'thirty_day_median_cost': median_cost, 'last_full_day_cost': lfd_cost })

# Report

In [None]:
full = workspaces \
    .join([df_lfd_volume, df_30d_volume, df_lfd_nodes]) \
    .join(cost_agg_df, on='resourceId')
full = full.assign(periodAvgCostPerNode=full.last_full_day_cost/full.nodesReporting) 

In [None]:
full.sort_values(['lastFullDayGB', 'subscriptionName'], key=lambda x:pd.isna(x) if x.name == 'lastFullDayGB' else x) \
    .style.hide_index().hide_columns('resourceId').format('${:,.2f}', na_rep='N/A', subset=pd.IndexSlice[:,'thirty_day_cost':'periodAvgCostPerNode'])