In [4]:
import requests
import pandas as pd
import time
import json

def process_vulnerability(vuln_item):
    cve = vuln_item.get("cve", {})
    
    # Base CVE details.
    result = {
        "cve_id": cve.get("id"),
        "sourceIdentifier": cve.get("sourceIdentifier"),
        "published": cve.get("published"),
        "lastModified": cve.get("lastModified"),
        "vulnStatus": cve.get("vulnStatus"),
    }
    
    # Process descriptions: create separate fields based on language.
    descriptions = cve.get("descriptions", [])
    for desc in descriptions:
        lang = desc.get("lang")
        key = f"description_{lang}"
        result[key] = desc.get("value")
    
    # Process metrics: Here we assume a single set of CVSS metrics (can be extended for multiple entries).
    metrics = cve.get("metrics", {}).get("cvssMetricV2", [])
    if metrics:
        metric = metrics[0]  # For example, take the primary metric.
        cvssData = metric.get("cvssData", {})
        result.update({
            "cvss_version": cvssData.get("version"),
            "vectorString": cvssData.get("vectorString"),
            "baseScore": cvssData.get("baseScore"),
            "accessVector": cvssData.get("accessVector"),
            "accessComplexity": cvssData.get("accessComplexity"),
            "authentication": cvssData.get("authentication"),
            "confidentialityImpact": cvssData.get("confidentialityImpact"),
            "integrityImpact": cvssData.get("integrityImpact"),
            "availabilityImpact": cvssData.get("availabilityImpact"),
            "baseSeverity": metric.get("baseSeverity"),
            "exploitabilityScore": metric.get("exploitabilityScore"),
            "impactScore": metric.get("impactScore")
        })
    
    # Process weaknesses: Concatenate all weaknesses into a single string or list.
    weaknesses = cve.get("weaknesses", [])
    weakness_details = []
    for weakness in weaknesses:
        source = weakness.get("source")
        weakness_type = weakness.get("type")
        for desc in weakness.get("description", []):
            weakness_details.append(f"{source}|{weakness_type}|{desc.get('lang')}:{desc.get('value')}")
    result["weaknesses"] = weakness_details
    
    # Process configurations: Extract detailed CPE match data.
    configurations = cve.get("configurations", [])
    config_details = []
    for config in configurations:
        nodes = config.get("nodes", [])
        for node in nodes:
            operator = node.get("operator")
            negate = node.get("negate")
            for cpe in node.get("cpeMatch", []):
                # Append details about the CPE match: criteria and vulnerability status.
                detail = {
                    "operator": operator,
                    "negate": negate,
                    "criteria": cpe.get("criteria"),
                    "vulnerable": cpe.get("vulnerable"),
                    "matchCriteriaId": cpe.get("matchCriteriaId")
                }
                config_details.append(detail)
    result["configurations"] = config_details
    
    # Process references: Collect each reference URL and its source.
    references = cve.get("references", [])
    ref_details = []
    for ref in references:
        ref_details.append({
            "url": ref.get("url"),
            "source": ref.get("source")
        })
    result["references"] = ref_details
    
    return result

def fetch_all_nist_data():
    """
    Fetches all vulnerabilities from the NIST API using pagination.
    Returns a list of dictionaries where each dictionary represents
    a vulnerability with its processed details.
    """
    api_url = "https://services.nvd.nist.gov/rest/json/cves/2.0"
    results_per_page = 2000
    start_index = 0
    all_results = []
    total_results = None
    
    while True:
        params = {
            "resultsPerPage": results_per_page,
            "startIndex": start_index,
        }
        print(f"Fetching records starting at index {start_index} ...")
        response = requests.get(api_url, params=params)
        if response.status_code != 200:
            print("Error fetching data:", response.status_code)
            break
        
        data = response.json()
        if total_results is None:
            total_results = data.get("totalResults", 0)
            print(f"Total results to fetch: {total_results}")
        
        vulnerabilities = data.get("vulnerabilities", [])
        if not vulnerabilities:
            print("No more vulnerabilities returned by API.")
            break
        
        for item in vulnerabilities:
            record = process_vulnerability(item)
            all_results.append(record)
        
        start_index += results_per_page
        print(f"Fetched {start_index} records so far.")
        
        # Delay to prevent overwhelming the API
        time.sleep(1)
        
        if start_index >= total_results:
            break
            
    return all_results

def main():
    # Fetch the data from the NIST API.
    all_data = fetch_all_nist_data()
    print(f"Total vulnerabilities processed: {len(all_data)}")
    
    # Convert processed data into a DataFrame.
    df = pd.DataFrame(all_data)
    
    # Save the DataFrame to a CSV file.
    output_filename = "nist_vulnerabilities.csv"
    df.to_csv(output_filename, index=False)
    print(f"Data saved to {output_filename}")

if __name__ == "__main__":
    main()

Fetching records starting at index 0 ...


KeyboardInterrupt: 