In [100]:
import os
import pandas as pd

def load_csvs_from_folders(root_folder):

    dataframes = {}

    subfolders = [
        folder for folder in os.listdir(root_folder) 
        if os.path.isdir(os.path.join(root_folder, folder))
    ]

    for subfolder in subfolders:
        subfolder_path = os.path.join(root_folder, subfolder)

        file_names = [
            f for f in os.listdir(subfolder_path) 
            if os.path.isfile(os.path.join(subfolder_path, f))
        ]

        for x, file_name in enumerate(file_names, start=1):
            file_path = os.path.join(subfolder_path, file_name)
            try:
                dataframes[f'{subfolder}_df_{x}'] = pd.read_csv(file_path)
            except Exception as e:
                print(f"Failed to load {file_name} in {subfolder}: {e}")

    return dataframes

root_folder = './ALLHOSTS - Copy/'  ### replace with your root folder path


dataframes = load_csvs_from_folders(root_folder)

print(f"\nLoaded DataFrames: ${dataframes['Linux_df_1'].iloc[0]}")




Loaded DataFrames: $Plugin ID                                                               10114
CVE                                                             CVE-1999-0524
CVSS v2.0 Base Score                                                      2.1
Risk                                                                      Low
Host                                                             10.46.254.70
Protocol                                                                 icmp
Port                                                                        0
Name                            ICMP Timestamp Request Remote Date Disclosure
Synopsis                    It is possible to determine the exact time set...
Description                 The remote host answers to an ICMP timestamp r...
Solution                    Filter out the ICMP timestamp requests (13), a...
See Also                                                                  NaN
Plugin Output               The difference 

In [None]:
def process_dataframes(dataframes):
    results = []

    for df_name, df in dataframes.items():
        if 'Risk' in df.columns:
            dataframes[df_name] = df[df['Risk'].isna() == False]
            print(df['Risk'].unique())
            
        else:
            print(f"'{df_name}' does not have a 'Risk' column, skipping...")

    for df_name, df in dataframes.items():

        if 'Name' in df.columns and 'Plugin Output' in df.columns and 'Solution' in df.columns and 'Host' in df.columns and 'Port' in df.columns and 'Risk' in df.columns:

            unique_names = df['Name'].unique()
            
            for name in unique_names:
                filtered_rows = df[df['Name'] == name]

                plugin_output = filtered_rows['Plugin Output'].iloc[0]
                solution = filtered_rows['Solution'].iloc[0]
                risk = filtered_rows['Risk'].iloc[0]
                
                hosts = [f"{row['Host']}/{row['Port']}" for _, row in filtered_rows.iterrows()]
                hosts_str = ", ".join(hosts)
                
                results.append({
                    'Name': name,
                    'Plugin Output': plugin_output,
                    'Solution': solution,
                    'Risk': risk,
                    'Hosts': hosts_str
                })
                
        else:
            print(f"'{df_name}' is missing required columns, skipping...")
    
    for result in results:
        print(f"Name: {result['Name']}")
        print(f"Plugin Output: {result['Plugin Output']}")
        print(f"Solution: {result['Solution']}")
        print(f"Risk: {result['Risk']}")
        print(f"Hosts: {result['Hosts']}")
        print("-" * 50)



process_dataframes(dataframes)

['Low' nan 'Medium' 'High']
['Low' nan 'High' 'Medium']
['Low' nan 'High' 'Medium' 'Critical']
[nan 'Medium' 'High' 'Critical' 'Low']
['Low' nan 'Medium' 'High' 'Critical']
[nan 'Critical']
[nan 'Low' 'High' 'Medium']
[nan 'Medium']
[nan 'Critical' 'High' 'Medium' 'Low']
['Low' nan 'High' 'Critical' 'Medium']
['Low' nan 'Medium' 'High' 'Critical']
['Low' nan 'High' 'Medium' 'Critical']
['Low' nan]
Name: ICMP Timestamp Request Remote Date Disclosure
Plugin Output: The difference between the local and remote clocks is -29824 seconds.

Solution: Filter out the ICMP timestamp requests (13), and the outgoing ICMP
timestamp replies (14).
Risk: Low
Hosts: 10.46.254.70/0, 10.46.254.236/0
--------------------------------------------------
Name: SSH Weak Key Exchange Algorithms Enabled
Plugin Output: 
The following weak key exchange algorithms are enabled : 

  diffie-hellman-group-exchange-sha1

Solution: Contact the vendor or consult product documentation to disable the weak algorithms.
Risk: 