## Note that this parser was written to be compatible with Raw Excel/CSV Nessus output only.

In [13]:
import os
import pandas as pd
import openpyxl

def load_csvs_from_folders(root_folder):

    dataframes = {}

    subfolders = [
        folder for folder in os.listdir(root_folder) 
        if os.path.isdir(os.path.join(root_folder, folder))
    ]

    for subfolder in subfolders:
        subfolder_path = os.path.join(root_folder, subfolder)

        file_names = [
            f for f in os.listdir(subfolder_path) 
            if os.path.isfile(os.path.join(subfolder_path, f))
        ]

        for x, file_name in enumerate(file_names, start=1):
            file_path = os.path.join(subfolder_path, file_name)
            try:
                dataframes[f'{subfolder}_df_{x}'] = pd.read_csv(file_path)
            except Exception as e:
                print(f"Failed to load {file_name} in {subfolder}: {e}")

    return dataframes

def ensure_output_folder():
    output_folder = 'Output'
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    return output_folder

root_folder = './ALLHOSTS - Copy/'  ### replace with your root folder path

dataframes = load_csvs_from_folders(root_folder)

print(f"\nLoaded DataFrames: ${dataframes.keys()}")


Loaded DataFrames: $dict_keys(['Linux_df_1', 'Linux_df_2', 'Linux_df_3', 'Linux_df_4', 'Linux_df_5', 'Linux_df_6', 'Misc_df_1', 'Misc_df_2', 'Misc_df_3', 'Misc_df_4', 'Windows_df_1', 'Windows_df_2', 'Windows_df_3'])


In [14]:
def process_dataframes(dataframes, output_file):
    results = []
    output_folder = ensure_output_folder()
    output_file = os.path.join(output_folder, output_file)

    for df_name, df in dataframes.items():
        if 'Risk' in df.columns:
            dataframes[df_name] = df[df['Risk'].notna()]
        else:
            print(f"'{df_name}' does not have a 'Risk' column, skipping...")

    for df_name, df in dataframes.items():
        required_columns = {'Name', 'Plugin Output', 'Solution', 'Host', 'Port', 'Risk', 'See Also', 'CVE', 'Description'}
        if required_columns.issubset(df.columns):
            unique_outputs = df['Plugin Output'].unique()

            for plugin_output in unique_outputs:
                filtered_rows = df[df['Plugin Output'] == plugin_output]

                name = filtered_rows['Name'].iloc[0]
                solution = filtered_rows['Solution'].iloc[0]
                risk = filtered_rows['Risk'].iloc[0]
                see_also = filtered_rows['See Also'].iloc[0]
                cve = ", ".join(filtered_rows['CVE'].dropna().unique())
                description = filtered_rows['Description'].iloc[0]

                hosts = []
                for _, row in filtered_rows.iterrows():
                    
                    host = row['Host']
                    port = row['Port']
                    
                    # print(row)
                    # print(f"Processing dataframe: {df_name}")
                    # print(f"Host: {host}, Port: {port}")
                    ### For debugging only

                    if port == 0:
                        hosts.append(host)  # Only include the Host if Port is 0
                    else:
                        hosts.append(f"{host}/{port}")  # Include both Host and Port if Port is not 0

                hosts_str = ", ".join(hosts)

                results.append({
                    'Name': name,
                    'Plugin Output': plugin_output,
                    'Solution': solution,
                    'Risk': risk,
                    'See Also': see_also,
                    'CVE': cve,
                    'Description': description,
                    'Hosts': hosts_str
                })
        else:
            print(f"'{df_name}' is missing required columns, skipping...")

    if results:
        results_df = pd.DataFrame(results)
        results_df = results_df.drop_duplicates(subset=['Plugin Output', 'Solution', 'Risk', 'See Also', 'CVE', 'Description'])

        # for _, result in results_df.iterrows():
        #     print(f"Name: {result['Name']}")
        #     print(f"Plugin Output: {result['Plugin Output']}")
        #     print(f"Solution: {result['Solution']}")
        #     print(f"Risk: {result['Risk']}")
        #     print(f"See Also: {result['See Also']}")
        #     print(f"CVE: {result['CVE']}")
        #     print(f"Description: {result['Description']}")
        #     print(f"Hosts: {result['Hosts']}")
        #     print("-" * 50)

        results_df.to_excel(output_file, index=False)
        print(f"\nResults saved to '{output_file}'")
    else:
        print("No results to save.")
        
process_dataframes(dataframes, 'output_results_unique.xlsx')


Results saved to 'Output\output_results_unique.xlsx'


In [15]:
def process_and_sort_excel(input_file, output_file):

    output_folder = ensure_output_folder()
    input_file = os.path.join(output_folder, input_file)
    output_file = os.path.join(output_folder, output_file)
    
    try:
        df = pd.read_excel(input_file)
        
        if 'Name' not in df.columns or 'Hosts' not in df.columns:
            print(f"'{input_file}' does not have the required columns, unable to process.")
            return
        
        df['Hosts'] = df['Hosts'].apply(lambda x: ', '.join(sorted(set(x.split(', ')))))

        sorted_df = df.sort_values(by='Name')
        
        print(f"Rows before saving to Excel: {len(sorted_df)}")
        
        sorted_df.to_excel(output_file, index=False)
        
        print(f"Rows after saving to Excel: {len(sorted_df)}")
        print(f"Sorted results saved to '{output_file}'")

    except Exception as e:
        print(f"Error processing the file: {e}")

process_and_sort_excel('output_results_unique.xlsx', 'sorted_results.xlsx')

Rows before saving to Excel: 519
Rows after saving to Excel: 519
Sorted results saved to 'Output\sorted_results.xlsx'


In [16]:
def format_excel_data(input_file, output_file):

    output_folder = ensure_output_folder()
    input_file = os.path.join(output_folder, input_file)
    output_file = os.path.join(output_folder, output_file)
    
    try:
        df = pd.read_excel(input_file)

        required_columns = ['Name', 'Plugin Output', 'Solution', 'Hosts', 'CVE', 'Description']
        if not all(col in df.columns for col in required_columns):
            print(f"'{input_file}' is missing one or more required columns.")
            return

        grouped = df.groupby('Solution')

        with open(output_file, 'w') as f:
            for solution, group in grouped:
                names = ', '.join(sorted(group['Name'].unique()))
                cves = ', '.join(sorted(set(group['CVE'].dropna())))
                descriptions = '\n'.join(sorted(set(group['Description'])))
                hosts = ', '.join(sorted(set(host for host_list in group['Hosts'] for host in host_list.split(', '))))
                
                f.write(f"Name: {names}\n")
                f.write(f"Plugin Output: -\n")
                f.write(f"Solution: {solution}\n")
                f.write(f"CVE: {cves}\n")
                f.write(f"Description:\n{descriptions}\n")
                f.write(f"Affected Hosts: {hosts}\n\n")
                f.write("=" * 20 + "\n\n")

        print(f"Formatted results saved to '{output_file}'")

    except Exception as e:
        print(f"Error processing the file: {e}")

format_excel_data('sorted_results.xlsx', 'formatted_output.txt')

Formatted results saved to 'Output\formatted_output.txt'
