In [None]:
from IPython.display import Markdown, SVG, FileLink, HTML, Javascript, Image, display_png
from inspect import cleandoc
import json, os, base64, datetime
import pandas as pd
import numpy as np
from pathlib import Path
import plotly.express as px
import plotly.io as pio
pio.kaleido.scope.default_format = "svg"

outputs = Path("files")
outputs.mkdir(parents=True, exist_ok=True)
agency = os.environ.get("agency", False)
agency_name = os.environ.get("agency_name", "Sample Agency")
datalake = Path.home() / "datalake"
services_json = datalake / "rumble_outputs" / "services-latest.json"
report_date = os.environ.get("report_date", f"{datetime.datetime.now():%d %B %Y}")
logo = Image(datalake / "notebooks" / "style-images" / "dpclogo.png", width=400, alt="coverlogo")

In [None]:
display(logo, HTML("""
<style>
    @page {
        size: A4 portrait;
    }
    .jp-Cell { padding: 0px; }
    html body .jp-RenderedHTMLCommon {
      font-family: Arial;
    }
    span.title {
      color: #007a8a;
      font-size: 56px;
      font-weight: bold;
    }
    span.subtitle {
      color: #595959;
      font-size: 20px;
    }
    div#coverlogo {
      background-color: #cce6ea;
      padding: 50px;
    }
    div#coverpage {
      display: flex;
      align-items: center;
      height: 700px;
      padding: 50px;
      background-color: #cce6ea;
      margin-right: -20px;
    }
</style>""" + f"""
<div id="coverpage">
    <div>
        <span class="title">External Footprint Report (DRAFT)</span><br>
        <span class="subtitle">{agency_name} - {report_date}</span><br>
        <span class="subtitle"><b>WA Security Operations Centre</b></span>
    </div>
</div>
<script>
document.getElementById("coverpage").parentElement.parentElement.parentElement.getElementsByTagName("img")[0].parentElement.id = "coverlogo"
</script>
"""))

In [None]:
services = pd.json_normalize(json.load(services_json.open())).drop(columns="agent_name")
if agency:
    df_full = services[services["site_name"].str.contains(agency, case=False)].sort_values(by=['service_address']).replace(r'^s*$', float('NaN'), regex = True)
else:
    df_full = services.sort_values(by=['service_address']).replace(r'^s*$', float('NaN'), regex = True)
df = df_full[df_full["service_transport"] != "icmp"] # Ignore ICMP responses
df = df[df["service_data.http.head.location"].str.contains("https:") != True] # Ignore redirects to secure locations
df = df.drop(df[df["attributes.tls.supportedVersionNames"].str.contains("SSL|TLSv1.0|TLSv1.1") == False].index) # Ignore secure TLS sites
df = df.drop(df[df['names'].apply(lambda x: 'OUTLOOK.OFFICE.COM' in x)].index) # Ignore Exchange Online
df = df.drop(df[df['names'].apply(lambda x: 'MANAGE.MICROSOFT.COM' in x)].index) # Ignore intune management endpoint
df = df.dropna(how='all', axis=1)
df_full = df_full.dropna(how='all', axis=1)
df_full.to_csv(outputs/"exposed.csv", index=False)
df.to_csv(outputs/"high_risk.csv", index=False)

In [None]:
Markdown(cleandoc(f"""
    # {agency_name} external footprint report as of {report_date}.
    ## Methodology
    There were **{len(set(df_full.names.sum()))}** active domains and **{df_full.service_address.nunique()}** active IPs found by enumerating the agencies owned domains using [OWASP/Amass](https://github.com/OWASP/Amass). These were then filtered as follows:
    - ICMP responses alone were ignored
    - Redirects to secure (https) endpoints were ignored
    - HTTPS sites supporting secure versions of TLS (TLSv1.2 or higher) were ignored
    - CNAME's to Exchange Online and Defender/Intune management endpoints were ignored

    The remaining exposed services are high risk and should be targeted for decommissioning or network level constraints.

    ## Attached CSVs
    The HTML version of this report allows for direct downloads, otherwise please refer to the attached CSVs for the full dataset behind this report.
    
    - [All discovered services](files/exposed.csv) - `exposed.csv`
    - [High risk services](files/high_risk.csv) - `high_risk.csv`

    ## High risk internet facing services
    **{df.shape[0]}** services were found across **{len(set(df.names.sum()))}** domains and **{df.service_address.nunique()}** IPs. There were **{df.service_port.nunique()}** TCP/UDP ports and **{df.service_protocol.map(str).nunique()}** protocols found in use. The below summaries show the first 5 weak services discovered for some key areas, for the full scope of impact please refer to the attached CSVs.
"""))

In [None]:
rdp_services = df[df.service_port == 3389]
smtp_services = df[df.service_port.isin([25,465,587])]
cifs_services = df[df.service_port.isin([139,445])]
tftp_services = df[df.service_port == 69]
dns_services = df[df.service_port == 53]
ftp_services = df[df.service_port == 21]
ssh_services = df[df.service_port == 22]
weaktls_services = df[df["attributes.tls.supportedVersionNames"].str.contains("TLSv1.1") == True]

stats = pd.Series({
    "RDP": rdp_services.service_id.count(),
    "SMTP": smtp_services.service_id.count(),
    "CIFS": cifs_services.service_id.count(),
    "TFTP": tftp_services.service_id.count(),
    "DNS": dns_services.service_id.count(),
    "FTP": ftp_services.service_id.count(),
    "SSH": ssh_services.service_id.count(),
    "WEAK_TLS": weaktls_services.service_id.count(),
})
stats = stats[stats > 0] # hide empty services
stats = pd.DataFrame({'a' : stats.index, 'b': stats.values})
fig = px.pie(stats, names="a", values="b", title=f"High Risk Services by protocol")
SVG(fig.to_image(width=700))

In [None]:
warnings = []
if not rdp_services.empty:
    warnings.append(cleandoc(f"""
        #### High Risk: RDP services were found
        **Mitigate this risk**
        - For staff day to day use, [RDWeb with Azure Active Directory Application Proxy](https://docs.microsoft.com/en-us/azure/active-directory/app-proxy/application-proxy-integrate-with-remote-desktop-services) is a secure mechanism to RDP resources over the internet.
        With the pre-authentication flow you can use Azure AD authentication features like single sign-on, Conditional Access, and two-step verification for your on-premises resources.
        You also ensure that only authenticated traffic reaches your network.
        - For administrator or vendor remote access [Azure Bastion](https://docs.microsoft.com/en-au/azure/bastion/quickstart-host-portal) or a self managed bastion service is a secure way to provide remote access to private network RDP or SSH resources without exposing high risk endpoints.
    """) + "\n\n" + rdp_services[["service_address", "service_port", "service_vhost", "os"]][:5].to_markdown(index=False))

if not smtp_services.empty:
    warnings.append(cleandoc(f"""
        #### High Risk: SMTP services were found
        **Mitigate this risk** - Use an [Edge Transport Server](https://docs.microsoft.com/en-us/exchange/edge-transport-servers#mail-flow-with-an-edge-transport-server) if you are running a hybrid exchange environment to remove the requirement for any on-premise internet facing services.
        3rd party mail filtering should be decommissioned where possible.
    """) + "\n\n" + smtp_services[["service_address", "service_port", "names"]][:5].to_markdown(index=False))

if not cifs_services.empty:
    warnings.append(cleandoc(f"""
        #### Extreme Risk: Internet facing CIFS services were found
        **Mitigate this risk** - Migrate your fileshares to [Microsoft 365 (Sharepoint Online)](https://docs.microsoft.com/en-us/sharepointmigration/mm-get-started) or [Azure Blob Storage (Storage Explorer)](https://docs.microsoft.com/en-us/azure/vs-azure-tools-storage-explorer-blobs) for secure collaboration over the internet.
    """) + "\n\n" + cifs_services[["service_address", "service_port", "names", "os"]][:5].to_markdown(index=False))
    
if not tftp_services.empty:
    warnings.append(cleandoc(f"""#### Extreme Risk: Internet facing TFTP services were found""") + "\n\n" + tftp_services[["service_address", "service_port", "names", "os"]][:5].to_markdown(index=False))
    
if not dns_services.empty:
    warnings.append(cleandoc(f"""
        #### High Risk: Internet facing DNS services were found
        **Mitigate this risk** - Migrate your public dns zones to a public cloud provider like [Azure DNS](https://docs.microsoft.com/en-au/azure/dns/dns-delegate-domain-azure-dns) or [AWS Route 53](https://docs.aws.amazon.com/Route53/latest/DeveloperGuide/dns-configuring.html) to avoid exposing internal DNS infrastructure.
    """) + "\n\n" + dns_services[["service_address", "service_port", "names"]][:5].to_markdown(index=False))
    
if not ftp_services.empty:
    warnings.append(cleandoc(f"""
        #### High Risk: Internet facing FTP services were found
        **Mitigate this risk** - Migrate your FTP sites to [Microsoft 365 (Sharepoint Online)](https://docs.microsoft.com/en-us/sharepointmigration/mm-get-started) or [Azure Blob Storage (Storage Explorer)](https://docs.microsoft.com/en-us/azure/vs-azure-tools-storage-explorer-blobs) for secure collaboration over the internet.
    """) + "\n\n" + ftp_services[["service_address", "service_port", "names", "os"]][:5].to_markdown(index=False))
    
if not ssh_services.empty:
    warnings.append(cleandoc(f"""
        #### High Risk: Internet facing SSH services were found
        **Mitigate this risk**
        - For file transfers [Azure Blob Storage](https://docs.microsoft.com/en-us/azure/storage/blobs/secure-file-transfer-protocol-support-how-to?tabs=azure-portal) can provide secure SFTP access to object storage without exposing internal servers.
        - For administrator or vendor remote access [Azure Bastion](https://docs.microsoft.com/en-au/azure/bastion/quickstart-host-portal) or a self managed bastion service is a secure way to provide remote access to private network RDP or SSH resources without exposing high risk endpoints.
    """) + "\n\n" + ssh_services[["service_address", "service_port", "names"]][:5].to_markdown(index=False))
    
if not weaktls_services.empty:
    warnings.append(cleandoc(f"""
        #### Medium Risk: TLS V1.1 or weaker services were discovered
        **Mitigate this risk** - Deploy a secure CDN/WAF service such as [Fastly](https://docs.fastly.com/products/fastly-next-gen-waf), [Azure Front Door](https://docs.microsoft.com/en-us/azure/frontdoor/end-to-end-tls) or [AWS Cloudfront](https://docs.aws.amazon.com/waf/latest/developerguide/cloudfront-features.html) in front of your HTTPS resources to enforce secure TLS 1.2+, monitor for suspicious and anomalous web traffic and protect against attacks directed at your applications and origin servers.
    """) + "\n\n" + weaktls_services[["service_address", "service_port", "service_vhost"]][:5].to_markdown(index=False))
    
Markdown("\n".join(warnings))

In [None]:
Markdown(cleandoc("""## General Statistics
The below charts summarise exposed services to assist with prioritising risk management."""))

In [None]:
stats = df.groupby("service_port").size().head(10)
fig = px.pie(stats, names=stats.index, values=0, title=f"# of exposed risky services grouped by port (top 10)")
fig.update_traces(textinfo='value')
SVG(fig.to_image(width=700))

In [None]:
stats = df.groupby("service_vhost").size().head(10)
fig = px.pie(stats, names=stats.index, values=0, title=f"# of exposed risky services grouped by hostname (top 10)")
fig.update_traces(textinfo='value')
SVG(fig.to_image(width=700))

In [None]:
stats = df.groupby("os").size().head(10)
fig = px.pie(stats, names=stats.index, values=0, title=f"# of exposed risky services grouped by operating system (top 10)")
fig.update_traces(textinfo='value')
SVG(fig.to_image(width=700))