In [1]:
# CWE Descriptions, Parents, and Children finder...
# This module scrapes cwe descriptions, parents and children information from MITRE website and creates a master list.

# imports
import csv, sys, json, requests, timeit, os, threading, time
import pandas as pd
from bs4 import BeautifulSoup
from csv import DictReader
from datetime import datetime

# global variables and other classes
file_path, file_name, db_name = 'cwe_master', 'cwe_master.csv', 'cyvia_cwe_master' # Path and File name.
cwe_id_start, cwe_id_stop = 1, 2000
import functions as fn # functions class
func = fn.functions()
import Spinner as sp # spinner while performing db operations
spinner = sp.Spinner()

# Lookup parent and child relations for CWEs
def lookup_cwe_relations(cwe_id):
    lookup_url = 'https://cwe.mitre.org/data/definitions/'+cwe_id.replace('CWE-','')+'.html'
    df_list = pd.read_html(lookup_url)
    parents, children = [], []
    if (len(df_list)<8):
        return parents, children
    else:
        try:
            for i in range(0, (len(df_list[4]) - 1)):
                if ((df_list[4][0]).to_string(index=False).split()[i] == 'ChildOf'):
                    parents.append('CWE-'+((df_list[4][2]).to_string(index=False).split()[i]))
                if ((df_list[4][0]).to_string(index=False).split()[i] == 'ParentOf'):
                    children.append('CWE-'+((df_list[4][2]).to_string(index=False).split()[i]))
        except KeyError:
            pass
    return parents, children
    
# Lookup CWE Descriptions... 
def lookup_cwe_description(cwe_id):
    lookup_url = 'https://cwe.mitre.org/data/definitions/'+cwe_id.replace('CWE-','')+'.html'
    reqs = requests.get(lookup_url)
    soup = BeautifulSoup(reqs.text, 'lxml')
    for heading in soup.find_all(["h2"]):
        x = heading.text.strip().replace('CWE CATEGORY: ', '')
    return x

# Write data to csv and db
def write_data():
    # Check existing files and folders
    func.check_files(file_path)
    
    # connect database
    func.connect_db(db_name, True) # create a new database
    
    directory = os.path.join(os.getcwd()+'\\'+file_path+'\\')
    print('\nInserting records, please wait...')
    spinner.start()

    with open(directory+'/'+file_name, 'w', newline='') as file:
        writer = csv.writer(file)
        # write the header...
        # print('_id','* name', '* parents', '* children')
        writer.writerow(['_id','name', 'parents', 'children'])

        # Write 2 manual rows in csv
        writer.writerow(['NVD-CWE-noinfo', 'Insufficient Information', '[]', '[]'])
        writer.writerow(['NVD-CWE-Other', 'Other', '[]', '[]'])
        # Write 2 manual rows in db
        func.db.put({"_id": 'NVD-CWE-noinfo', "name": 'Insufficient Information', "parents": '[]', "children": '[]'})
        func.db.put({"_id": 'NVD-CWE-Other', "name": 'Other', "parents": '[]', "children": '[]'})
                
        #CWE-IDs from web, start from CWE-1, go upto cwe_id_stop
        for i in range(cwe_id_start, cwe_id_stop):
            lookup_cwe = 'CWE-'+str(i)
            
            try: # 404 Error on the URLs
                try: # 404 Error on the URLs
                    parent, child = lookup_cwe_relations(lookup_cwe)
                    lookup_name = lookup_cwe_description(lookup_cwe).replace(lookup_cwe+': ','')
                    # print(lookup_cwe, '*', lookup_name, '* Parents:', json.dumps(parent), '* Children:', json.dumps(child))
                    writer.writerow([lookup_cwe, lookup_name, json.dumps(parent), json.dumps(child)])
                    # create doc for insertion
                    doc = {
                        "_id": lookup_cwe,
                        "name": lookup_name,
                        "parents": parent,
                        "children": child
                    } # print('Doc:\n', json.dumps(doc, indent=4))
                    # Insert or update the document in the database
                    func.db.put(doc)
                except HTTPError: # print('HTTPError', i, end=', ') # DebugUse
                    pass
            except NameError: # print('NameError', i, end=', ') # DebugUse
                pass

    spinner.stop()
    print('Insertion complete.')
    
print('*** MITRE\'s CWE master data collector v4 ***\n')
# time the execution
now = datetime.now()
current_time = now.strftime("%H:%M:%S")
print("Start Time:", current_time)
startTime = datetime.now() # start timer

# Write data
write_data()

print('\nData collected.\nFile(s) saved at '+file_path+'/. and Database: '+db_name+' created.')
print("Execution time: "+str(datetime.now() - startTime))

*** MITRE's CWE master data collector v4 ***

Start Time: 15:52:58
Creating directory cwe_master to store feeds. Done.

Server status: (3.3.2) up and running!
Database cyvia_cwe_master does not exist, creating...
Database selected: cyvia_cwe_master

Inserting records, please wait...
Insertion complete.

Data collected.
File(s) saved at cwe_master/. and Database: cyvia_cwe_master created.
Execution time: 0:22:19.235767
