In [1]:
# CWE CSV Handler...

# Imports
import os, glob, requests, re, zipfile, timeit, time, json
import pandas as pd 
from time import time as timer
from os import listdir
from os.path import isfile, join
from datetime import datetime

# global variables and other classes
file_path, file_name, db_name, json_file = 'mitre_csv_files', 'cwe_combined.csv', 'cyvia_cwe_combined', 'cwe_combined.json'
feed_urls = [
    'https://cwe.mitre.org/data/csv/699.csv.zip', # Software development 
    'https://cwe.mitre.org/data/csv/1194.csv.zip', # Hardware design
    'https://cwe.mitre.org/data/csv/1000.csv.zip' # Research Concepts
]

import functions as fn # functions class
func = fn.functions()
import Spinner as sp # spinner while performing db operations
spinner = sp.Spinner()

# Download files...
def fetch_url_file(url):
    # Start obtaining new feeds...
    print('Obtaining '+url+'...')
    r = requests.get(url) # where the files are...
    r_file = requests.get(url, stream=True)
    with open(file_path + '/' + os.path.basename(url), 'wb') as f: # path, file name from URL
        for chunk in r_file.iter_content(chunk_size=1024):
            if chunk:
                f.write(chunk)

# Merge CSV file data
# ToDo: Automate the process to get all files inside directory and merge them.
def merge_csv_files(file_path):
    directory = os.path.join(os.getcwd()+'\\'+file_path+'\\') 
    sw_list = pd.read_csv(directory+'699.csv', engine='python', encoding='ISO-8859-1', index_col=False)
    # print('swlist=', sw_list.shape[0])
    hw_list = pd.read_csv(directory+'1000.csv', engine='python', encoding='ISO-8859-1', index_col=False)    
    res_con = pd.read_csv(directory+'1194.csv', engine='python', encoding='ISO-8859-1', index_col=False)
    combined_list = pd.concat([sw_list, hw_list, res_con])        
    combined_list = combined_list.sort_values(by = 'CWE-ID')    
    combined_list = combined_list.drop_duplicates()
    combined_list = combined_list.fillna('-') # Fill blank values with N/A instead of nan
    
    # append ids with CWE-####
    combined_list = combined_list.astype(str) # update column type to str to accept str changes # print(combined_list.dtypes)
    # loop through all items and update #### to CWE-####
    for j in range(combined_list.shape[0]):
        combined_list.iat[j, 0] = ''.join(('CWE-',str(combined_list.iat[j, 0])))
    
    # change column names
    new_col_names = []
    for i in range(len(combined_list.columns)):
        if combined_list.columns[i]=='CWE-ID': # if colname is CWE-ID change it to _id
            new_col_names.append('_id')
            continue
        # change all other col names to lowercase spaced by underscore
        new_col_names.append(combined_list.columns[i].lower().replace(" ", "_"))
    # update column names
    combined_list.columns = new_col_names
    # Write CSV from dataframe
    combined_list.to_csv(directory+file_name, index=False)
    # Write JSON from dataframe # combined_list.to_json(directory+json_file , orient="records")

    # Write CSV to JSON to DB
    print('\nInserting records, please wait...')
    spinner.start()
    func.csv_to_json_to_db(directory+file_name, directory+json_file, db_name, True)
    spinner.stop()
    print('Insertion complete.')

    
print('*** MITRE\'s CWE Handler v4 ***\n')
# time the execution
now = datetime.now()
current_time = now.strftime("%H:%M:%S")
print("Start Time:", current_time)
startTime = datetime.now() # start timer

# Step 1: Check and remove existing files and folders
func.check_files(file_path)

# Step 2: Download feed files
start = timer()
from multiprocessing.pool import ThreadPool
results = ThreadPool(8).imap_unordered(fetch_url_file, feed_urls)
for path in results:
    print('Done') # print(path)

# Step 3: extract and remove zip files
func.extract_and_remove_zips(file_path)

# Step 4
merge_csv_files(file_path)

print('\nData collected.\nFile(s) saved at '+file_path+'/. and Database: '+db_name+' created.')
print("Execution time: "+str(datetime.now() - startTime))

*** MITRE's CWE Handler v4 ***

Start Time: 23:05:31
Directory mitre_csv_files already exists.
Removing old files... Done.

Obtaining https://cwe.mitre.org/data/csv/699.csv.zip...Obtaining https://cwe.mitre.org/data/csv/1194.csv.zip...Obtaining https://cwe.mitre.org/data/csv/1000.csv.zip...


Done
Done
Done

Extracting files... Done.

Inserting records, please wait...
|Server status: (3.1.1) up and running!
-End of file.
Insertion complete.

Data collected.
File(s) saved at mitre_csv_files/. and Database: cyvia_cwe_combined created.
Execution time: 0:00:04.478619
