# api

This is the primary interface to running squ wrappers 

In [None]:
#| default_exp api

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
import pandas, json, logging
from nbdev_squ.core import *
from diskcache import memoize_stampede
from concurrent.futures import ThreadPoolExecutor
from importlib.resources import path
from subprocess import run

In [None]:
#| export
logger = logging.basicConfig(level=logging.INFO)

## List Workspaces

The `list_workspaces` function retreives a list of workspaces from blob storage and returns it in various formats

In [None]:
#| exports
@memoize_stampede(cache, expire=60 * 60 * 3) # cache for 3 hours
def list_workspaces(fmt: str = "df", # df, csv, json, list
                    agency: str = "ALL"): # Agency alias or ALL
    path = datalake_path()
    df = pandas.read_csv((path / "notebooks/lists/SentinelWorkspaces.csv").open())
    df = df.join(pandas.read_csv((path / "notebooks/lists/SecOps Groups.csv").open()).set_index("Alias"), on="SecOps Group", rsuffix="_secops")
    df = df.rename(columns={"SecOps Group": "alias", "Domains and IPs": "domains"})
    df = df.dropna(subset=["customerId"]).sort_values(by="alias")
    if agency != "ALL":
        df = df[df["alias"] == agency]
    if fmt == "df":
        return df
    elif fmt == "csv":
        return df.to_csv()
    elif fmt == "json":
        return df.fillna("").to_dict("records")
    elif fmt == "list":
        return list(df["customerId"].unique())
    else:
        raise ValueError("Invalid format")

In [None]:
list_workspaces().head()

# Log Analytics Query
The below function makes it easy to query all workspaces with sentinel installed using log analytics.

In [None]:
#| exports
@memoize_stampede(cache, expire=60 * 60 * 3) # cache for 3 hours
def list_subscriptions():
    return pandas.DataFrame(azcli(["account", "list"]))["id"].unique()

@memoize_stampede(cache, expire=60 * 60 * 3) # cache for 3 hours
def list_securityinsights():
    return pandas.DataFrame(azcli([
        "graph", "query", "--first", "1000", "-q", 
        """
        resources
        | where type =~ 'microsoft.operationsmanagement/solutions'
        | where name startswith 'SecurityInsights'
        | project wlid = tolower(tostring(properties.workspaceResourceId))
        | join kind=leftouter (
            resources | where type =~ 'microsoft.operationalinsights/workspaces' | extend wlid = tolower(id))
            on wlid
        | extend customerId = properties.customerId
        """
    ])["data"])

def loganalytics_query(query: str):
    dfs = []
    customerids = list_securityinsights()["customerId"]
    with ThreadPoolExecutor(max_workers=32) as executor:
        futures = [executor.submit(azcli, [
            "monitor", "log-analytics", "query",
            "-w", workspace,
            "--analytics-query", query
        ]) for workspace in customerids]
        for future, customerid in zip(futures, customerids):
            try:
                df = pandas.DataFrame(future.result())
            except Exception as e:
                logger.warning(e)
                continue
            else:
                if "TenantId" not in df.columns:
                    df["TenantId"] = customerid
                dfs.append(df)
    return pandas.concat(dfs)

def query_all(query: str, fmt="df"):
    df = loganalytics_query(query)
    if fmt == "df":
        return df
    elif fmt == "csv":
        return df.to_csv()
    elif fmt == "json":
        return df.fillna("").to_dict("records")
    else:
        raise ValueError("Invalid format")

In [None]:
list_securityinsights()

In [None]:
df = query_all("""
SecurityIncident
| where TimeGenerated > ago(45d)
| where Classification == "TruePositive"
| mv-expand AlertIds
| project tostring(AlertIds)
| join SecurityAlert on $left.AlertIds == $right.SystemAlertId
| mv-expand todynamic(Entities)
| project Entities.Address
| where isnotempty(Entities_Address)
| distinct tostring(Entities_Address)
""")

In [None]:
df.shape

In [None]:
def hunt(iocs=list[str]):
    hunt_text = '" or "'.join(iocs)
    hunt_text = f'"{hunt_text}"'
    if len(iocs) > 1:
        hunt_text = f"({hunt_text})"
    query = f'search {hunt_text} and TimeGenerated > ago(14d) | summarize hits = count() by $table'
    print(query)
    return query_all(query)

# hunt(["91.191.209.190", "196.216.136.139", "66.203.112.86"])

In [None]:
hunt(df.Entities_Address.unique()[:100])

In [None]:
#| exports

def atlaskit_transformer(inputtext, inputfmt="md", outputfmt="wiki", runtime="node", transformer=path("squ", "atlaskit-transformer.bundle.js").absolute()):
    return run([runtime, transformer, inputfmt, outputfmt], input=inputtext, text=True, capture_output=True, check=True).stdout

In [None]:
print(atlaskit_transformer("""# Heading 1

- a bullet
- [a link](https://github.com)
"""))

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()