In [None]:
%pip install pandas

In [None]:
import pandas as pd
import os

raw_folder = '../../raw/emissioni'

replacements = {'VII': '7', 'VI': '6', 'V': '5', 'IV': '4', 'III': '3', 'II': '2', 'I': '1'}

# Define a function to aggregate the columns
def aggregate_columns(group):
    # Create a dictionary to hold the aggregated values
    agg_dict = {}
    # Loop through each column in the group
    for col in group.columns:
        # If the column is the 'Tier' column, add it to the dictionary
        if col == 'Tier':
            agg_dict[col] = group[col].iloc[0]
        # If the column contains numbers, calculate the mean value and add it to the dictionary
        elif pd.api.types.is_numeric_dtype(group[col]):
            agg_dict[col] = group[col].mean()
        # If the column contains strings, take the last value and add it to the dictionary
        elif pd.api.types.is_string_dtype(group[col]):
            agg_dict[col] = group[col].iloc[-1]
    # Return the aggregated values as a Series
    return pd.Series(agg_dict)

# Loop through all files in the folder
for filename in os.listdir(raw_folder):
    if filename.endswith('.csv'):
        # Read CSV file
        df = pd.read_csv(os.path.join(raw_folder, filename))
        # Drop every column that contains the text 'Date' or 'Brake' or 'Smoke' or 'Test'
        df = df.drop(columns=df.columns[df.columns.str.contains('Date|Brake|Smoke|Test')])
        # Convert the column names called 'Standard' to 'Tier'
        df = df.rename(columns={'Standard': 'Tier'})
        # Replace NaN values with 0
        df = df.fillna(0)
        # Replace '–' values with 0
        df = df.replace('–', 0)
        # Replace roman numbers with arabic numbers
        for key, value in replacements.items():
            df.loc[df['Tier'].str.contains(key), 'Tier'] = 'Euro ' + value
        # Drop rows that contain 'Euro 7' in the 'Tier' column
        df = df[df['Tier'].str.contains('Euro 7') == False]
        
        df = df.groupby('Tier').apply(aggregate_columns).reset_index(drop=True)

        print(df)