In [2]:
import pandas as pd
import boto3
import json
import os

In [3]:
os.chdir("/Users/smitht61/Desktop/cyber/inspector-remediator/")

#### Overview
This notebook is part of developing the `inspector-remediator` package. The package is designed to query package vulnerabilities on EC2 instances and leverage an LLM chosen by the user to recommend remediation. This notebook was used to develop several functions to get AWS Inspector findings and normalize findings as LLM input.

#### Connect to AWS Inspector
The following expects credentials in `~/.aws/credentials` under `[default]`.

In [4]:
client = boto3.client('inspector2')

#### Get Findings
This function lists active findings for packages vulnerabilities on EC2 instances.

In [5]:
def get_findings(client, status="ACTIVE"):
    """
    Query vulnerability findings from AWS Inspector2.

    Args:
        client (botocore.client.BaseClient):
            A boto3 Inspector2 client.
        status(str, optional):
            Filter by `findingStatus` in client.list_findings(). Default
            filters to active findings.

    Returns:
        list: A list of Inspector2 findings.
    """

    findings = []
    next_token = None

    finding_filter = {
        "findingStatus": [
            {
                "comparison": "EQUALS",
                "value": status
            }
        ],
        "findingType": [
            {
                "comparison": "EQUALS",
                "value": "PACKAGE_VULNERABILITY"
            }
        ],
        "resourceType": [
            {
                "comparison": "EQUALS",
                "value": "AWS_EC2_INSTANCE"
            }
        ]
    }
    
    while True:
        if next_token:
            response = client.list_findings(
                filterCriteria=finding_filter,
                nextToken=next_token
            )
        else:
            response = client.list_findings(
                filterCriteria=finding_filter
            )
    
        findings.extend(response.get("findings", []))
        next_token = response.get("nextToken")
    
        if not next_token:
            break
    
    return findings

In [6]:
active_findings = get_findings(client)

In [7]:
for f in active_findings:
    print(f["title"])

CVE-2025-39964 - linux-image-aws
CVE-2024-36357 - amd64-microcode
CVE-2024-36350 - amd64-microcode
CVE-2025-39993 - linux-image-aws
CVE-2025-40018 - linux-image-aws
CVE-2025-39946 - linux-image-aws


#### Normalize Finding Details for LLM
The following function extracts relevant fields from a finding and returns them as a dictionary, which is easily convertible to JSON for LLM input.

In [28]:
def normalize_finding_for_llm(finding, region=None):
    """
    Normalize an Inspector2 finding for LLM input.

    Args:
        finding (dict):
            A single Inspector2 finding contained in output from
            `get_active_findings`.

    Returns:
        dict:
            - finding_arn
            - cve
            - title
            - description
            - severity
            - resource
            - package
            - references
    """
    # finding
    pvd = finding.get("packageVulnerabilityDetails", {}) or {}
    vuln_id = pvd.get("vulnerabilityId")

    # resource
    resources = finding.get("resources") or []
    r0 = resources[0] if resources else {}
    r_details = r0.get("details", {}) or {}
    ec2 = r_details.get("awsEc2Instance", {}) or {}

    real_account_id = finding.get("awsAccountId")
    real_instance_id = r0.get("id") or ec2.get("instanceId")

    # package
    vulnerable_packages = pvd.get("vulnerablePackages") or []
    pkg = vulnerable_packages[0] if vulnerable_packages else {}
    
    # references
    refs = pvd.get("referenceUrls") or []
    remediation = finding.get("remediation") or {}
    remediation_url = remediation.get("url")
    if remediation_url and remediation_url not in refs:
        refs.append(remediation_url)

    normalized = {
        "finding_arn": finding.get("findingArn"),
        "cve": vuln_id,
        "title": finding.get("title"),
        "description": finding.get("description"),
        "severity": finding.get("severity"),
        
        "resource": {
            "account_id": real_account_id,
            "region": r0.get("region"),
            "instance_id": real_instance_id,
            "os": ec2.get("platformDetails") or ec2.get("platform")
        },
        
        "package": {
            "name": pkg.get("packageName"),
            "installed_version": pkg.get("installedVersion") or pkg.get("version"),
            "fixed_version": pkg.get("fixedVersion") or pkg.get("fixedInVersion"),
        },
        
        "references": refs,
    }

    return normalized

In [29]:
llm_payload = [normalize_finding_for_llm(f) for f in active_findings]

In [30]:
print(f"Total findings in payload: {len(llm_payload)}\n")
print("First finding in payload:\n")
print(json.dumps(llm_payload[0], indent=2))

Total findings in payload: 6

First finding in payload:

{
  "finding_arn": "arn:aws:inspector2:us-west-2:675895556060:finding/488b9a1906981fa14dcd297cfc8dbdfb",
  "cve": "CVE-2025-39964",
  "title": "CVE-2025-39964 - linux-image-aws",
  "description": "In the Linux kernel, the following vulnerability has been resolved: crypto: af_alg - Disallow concurrent writes in af_alg_sendmsg Issuing two writes to the same af_alg socket is bogus as the data will be interleaved in an unpredictable fashion. Furthermore, concurrent writes may create inconsistencies in the internal socket state. Disallow this by adding a new ctx->write field that indiciates exclusive ownership for writing.",
  "severity": "MEDIUM",
  "resource": {
    "account_id": "675895556060",
    "region": "us-west-2",
    "instance_id": "i-0203a0c192fab5fa2",
    "os": "UBUNTU_24_04"
  },
  "package": {
    "name": null,
    "installed_version": "6.14.0",
    "fixed_version": "0:6.14.0-1018.18~24.04.1 (pending)"
  },
  "referenc

In [None]:
import json
import boto3

BUCKET_NAME = "inspector-remediation"

def lambda_handler(event, context):
    s3 = boto3.client("s3")

    finding = event.get("detail", {})
    title = finding.get("title", "unknown_finding")

    object_key = f"{title}.json"

    try:
        s3.put_object(
            Bucket=BUCKET_NAME,
            Key=object_key,
            Body=json.dumps(finding, indent=2),
            ContentType="application/json"
        )
        print(f"Successfully uploaded {object_key} to s3://{BUCKET_NAME}")
        return {"status": "success", "key": object_key}

    except Exception as e:
        print(f"Error uploading object: {e}")
        raise