In [1]:
from db_utils import load_from_db
df = load_from_db()

### Calculation of 4G and 5G deployment delays (days) for fully valid rows

In [2]:
import pandas as pd

df['mise_en_serv'] = pd.to_datetime(df['mise_en_serv'], errors='coerce')
df['mise_en_serv_4g'] = pd.to_datetime(df['mise_en_serv_4g'], errors='coerce')
df['mise_en_serv_5g_3500'] = pd.to_datetime(df['mise_en_serv_5g_3500'], errors='coerce')

df_valid = df.dropna(subset=['mise_en_serv', 'mise_en_serv_4g', 'mise_en_serv_5g_3500']).copy()

df_valid.loc[:, 'delai_4g'] = (df_valid['mise_en_serv_4g'] - df_valid['mise_en_serv']).dt.days
df_valid.loc[:, 'delai_5g'] = (df_valid['mise_en_serv_5g_3500'] - df_valid['mise_en_serv']).dt.days

df_valid[['operateur','arrondissement','delai_4g','delai_5g']]


### Compute the average 5G deployment delay per arrondissement.

In [3]:
delai_par_arr = (
    df_valid.groupby("arrondissement")['delai_5g']
    .mean()
    .reset_index()
    .sort_values(by="delai_5g")
)

delai_par_arr


### Repartition of antennas per operator and arrondissement 

In [5]:
counts = (
    df.groupby(['arrondissement', 'operateur'])
    .size()
    .reset_index(name='nb_antennes')
)

counts


### The leader operator for every arrondissement 

In [6]:
counts.loc[counts.groupby("arrondissement")['nb_antennes'].idxmax()]

In [9]:
import numpy as np

cols = ['operateur', 'arrondissement', 'type_clean']
df_new = df[cols].copy()

def detect_4G(x):
    if x == 'Unknown' :
        return np.nan  
    return 1 if '4G' in x else 0

def detect_5G(x):
    if x == 'Unknown' :
        return np.nan
    return 1 if '5G' in x else 0

df_new['has_4G'] = df_new['type_clean'].apply(detect_4G)
df_new['has_5G'] = df_new['type_clean'].apply(detect_5G)
df_new


In [10]:
repartition = df_new.groupby(['arrondissement','operateur']).agg(
    nb_4G=('has_4G','sum'), 
    nb_5G=('has_5G','sum'),
    nb_total=('type_clean','count'),  
    nb_unknown=('type_clean', lambda x: (x=='Unknown').sum())
).reset_index()
repartition


In [11]:
from sklearn.cluster import DBSCAN
import numpy as np

df['lon'] = df.geom.x
df['lat'] = df.geom.y

coords = df[['lon','lat']].to_numpy()

model = DBSCAN(eps=0.002, min_samples=5).fit(coords)

df['cluster'] = model.labels_

df[['operateur','arrondissement','cluster']].head(10)
