In [4]:
import pandas as pd
import unicodedata

In [5]:
# Nettoyage des noms de colonnes (minuscules et sans accents)
def clean_column_names(df):
    def strip_accents(s):
        return ''.join(c for c in unicodedata.normalize('NFD', s)
                       if unicodedata.category(c) != 'Mn')
    
    df.columns = [strip_accents(col.strip().lower()) for col in df.columns]
    return df

In [6]:
fichiers = {
    'centres': 'centres_service.csv',
    'demandes': 'demandes_service_public.csv',
    'details': 'details_communes.csv',
    'dev': 'developpement.csv',
    'docs': 'documents_administratifs_ext.csv',
    'socio': 'donnees_socioeconomiques.csv',
    'logs': 'logs_activite.csv',
    'routes': 'reseau_routier_togo_ext.csv'
} #LES FICHIERS A JOINDRE


In [7]:
#CHEMINS
dfs = {}
for name, path in fichiers.items():
    df = pd.read_csv(path)
    dfs[name] = clean_column_names(df) 

In [8]:
df_logs_agg = dfs['logs'].groupby('centre_id').agg({
    'delai_effectif': 'mean', 
    'nombre_rejete': 'sum'
}).reset_index()


In [9]:
df_routes_agg = dfs['routes'].groupby('prefecture_origine').agg({
    'longueur_km': 'sum',
    'temps_parcours_heures': 'mean'
}).reset_index().rename(columns={'prefecture_origine': 'prefecture'})

In [10]:
cles_geo = ['region', 'prefecture', 'commune']
df_final = dfs['demandes'].copy()

In [11]:
# Fusions de colonnes
df_final = pd.merge(df_final, dfs['centres'], on=cles_geo, how='left')
df_final = pd.merge(df_final, dfs['details'], on=cles_geo, how='left', suffixes=('', '_geo'))
df_final = pd.merge(df_final, dfs['dev'], on=cles_geo, how='left', suffixes=('', '_dev'))
df_final = pd.merge(df_final, dfs['socio'], on=cles_geo, how='left', suffixes=('', '_socio'))

In [12]:
# liaison documents 
df_final = pd.merge(df_final, dfs['docs'], on=cles_geo + ['type_document'], how='left', suffixes=('', '_mensuel'))

In [13]:
# Fusion Logs et Routes
df_final = pd.merge(df_final, df_logs_agg, on='centre_id', how='left')
df_final = pd.merge(df_final, df_routes_agg, on='prefecture', how='left')

In [14]:
df_final.to_csv('../datasets/togo_dataset_final_propre.csv', index=False)