In [6]:
import os
import pandas as pd

def load_csv_dataframes(folder_path):
    """
    Load all CSV files from the specified folder into DataFrames.

    Parameters:
    folder_path (str): Path to the folder containing the CSV files.

    Returns:
    dict: A dictionary with file names (without extensions) as keys and DataFrames as values.
    """
    dataframes = {}

    for file_name in os.listdir(folder_path):
        if file_name.endswith('.csv'):
            file_path = os.path.join(folder_path, file_name)
            try:
                df = pd.read_csv(file_path)
                base_name = os.path.splitext(file_name)[0]
                dataframes[base_name] = df
            except Exception as e:
                print(f"Error loading {file_name}: {e}")

    return dataframes

folder = "Datas"
dfs = load_csv_dataframes(folder)

for name, df in dfs.items():
    print(f"Loaded {name}: {df.shape} rows and columns")

all_columns = set()

for df_name, df in dfs.items():
    all_columns.update(df.columns)

common_columns_list = list(all_columns)

print("List of all column names across DataFrames:")
print(common_columns_list)
print(len(common_columns_list))


Loaded Table 3  Species by kingdom and class - show all: (74, 16) rows and columns
Loaded Table 4a Animal species (kingdom Animalia) by class and order - show all: (283, 16) rows and columns
Loaded Table 4b Plant species (kingdom Plantae) by class and family - show all: (520, 16) rows and columns
Loaded Table 4c Fungus species (kingdom Fungi) by class and order - show all: (50, 16) rows and columns
Loaded Table 4d Chromist species (kingdom Chromista) by class and order - show all: (7, 16) rows and columns
Loaded Table 5  Threatened species in each major group by country - show all: (250, 12) rows and columns
Loaded Table 6a Animal species (kingdom Animalia) by country - show all: (250, 16) rows and columns
Loaded Table 6b Plant species (kingdom Plantae) by country - show all: (250, 16) rows and columns
Loaded Table 6c Fungus species (kingdom Fungi) by country - show all: (250, 16) rows and columns
Loaded Table 6d Chromist species (kingdom Chromista) by country - show all: (250, 16) row

In [11]:
# Liste des DataFrames qui ne contiennent pas "dd" dans le nom de leurs colonnes
dfs_without_dd = {}

for df_name, df in dfs.items():
    # Vérifier si aucune colonne ne contient "dd"
    if not any('DD' in col for col in df.columns):
        dfs_without_dd[df_name] = df

# Afficher les noms des DataFrames qui n'ont pas "dd" dans les colonnes
print("DataFrames with no 'dd' in their column names:")
print(list(dfs_without_dd.keys()))


DataFrames with no 'dd' in their column names:
['Table 5  Threatened species in each major group by country - show all']


In [16]:
def clean_dd_column(dataframes):
    """
    Clean the 'dd' column in all DataFrames by removing commas and converting to numeric.

    Parameters:
    dataframes (dict): A dictionary of DataFrames.

    Returns:
    dict: Updated dictionary with cleaned 'dd' columns (if present).
    """
    for df_name, df in dataframes.items():
        if 'DD' in df.columns:  # Vérifier si la colonne 'dd' existe
            try:
                # Remplacer les virgules et convertir en numérique
                df['DD'] = df['DD'].replace(',', '', regex=True).astype(float)
            except Exception as e:
                print(f"Error processing 'dd' column in {df_name}: {e}")
    return dataframes

dfs = clean_dd_column(dfs)

In [17]:
# Compter les occurrences de dd == 1 pour les DataFrames contenant une colonne 'dd'
dd_counts = {}

for df_name, df in dfs.items():
    if 'DD' in df.columns:  # Vérifier si 'dd' est une colonne
        count = df['DD'].sum()  # Compter les occurrences de dd == 1
        dd_counts[df_name] = count

dd_counts
# # Afficher les résultats
# print("Count of 'dd == 1' for each DataFrame containing 'dd':")
# for name, count in dd_counts.items():
#     print(f"{name}: {count}")

{'Table 3  Species by kingdom and class - show all': np.float64(44080.0),
 'Table 4a Animal species (kingdom Animalia) by class and order - show all': np.float64(31734.0),
 'Table 4b Plant species (kingdom Plantae) by class and family - show all': np.float64(11996.0),
 'Table 4c Fungus species (kingdom Fungi) by class and order - show all': np.float64(326.0),
 'Table 4d Chromist species (kingdom Chromista) by class and order - show all': np.float64(24.0),
 'Table 6a Animal species (kingdom Animalia) by country - show all': np.float64(32799.0),
 'Table 6b Plant species (kingdom Plantae) by country - show all': np.float64(8464.0),
 'Table 6c Fungus species (kingdom Fungi) by country - show all': np.float64(190.0),
 'Table 6d Chromist species (kingdom Chromista) by country - show all': np.float64(12.0)}

## IUCN Red List Categories and Descriptions

The IUCN Red List categorizes species based on their risk of extinction. Below are the main categories:

- **EX - Extinct**: The species is no longer observed despite exhaustive surveys in its known habitat.
- **EW - Extinct in the Wild**: Survives only in captivity, cultivation, or reintroduced populations outside its historical range.
- **CR - Critically Endangered**: Faces an extremely high risk of extinction in the immediate future.
  - **CR(PE)**: Possibly Extinct – Likely extinct but requires confirmation.
  - **CR(PEW)**: Possibly Extinct in the Wild – Likely extinct in the wild but alive in captivity.
- **EN - Endangered**: Faces a very high risk of extinction in the near future.
- **VU - Vulnerable**: Faces a high risk of extinction in the medium term.
- **NT - Near Threatened**: Close to meeting the criteria for a threatened category.
- **LC - Least Concern**: Widespread and abundant, not at risk of extinction.
- **DD - Data Deficient**: Insufficient data to assess the risk of extinction.
- **LR/cd - Lower Risk/conservation dependent**: Not currently threatened but dependent on ongoing conservation efforts.

**Note**: CR(PE) and CR(PEW) are not official IUCN categories but are used to highlight potentially extinct species.

Visit the [IUCN Red List website](https://www.iucnredlist.org) for more information.
