In [1]:
from db_utils import load_from_db
df = load_from_db()

### Calculation of 4G and 5G deployment delays (days) for fully valid rows

In [2]:
import pandas as pd

df['mise_en_serv'] = pd.to_datetime(df['mise_en_serv'], errors='coerce')
df['mise_en_serv_4g'] = pd.to_datetime(df['mise_en_serv_4g'], errors='coerce')
df['mise_en_serv_5g_3500'] = pd.to_datetime(df['mise_en_serv_5g_3500'], errors='coerce')

df_valid = df.dropna(subset=['mise_en_serv', 'mise_en_serv_4g', 'mise_en_serv_5g_3500']).copy()

df_valid.loc[:, 'delai_4g'] = (df_valid['mise_en_serv_4g'] - df_valid['mise_en_serv']).dt.days
df_valid.loc[:, 'delai_5g'] = (df_valid['mise_en_serv_5g_3500'] - df_valid['mise_en_serv']).dt.days

df_valid[['operateur','arrondissement','delai_4g','delai_5g']]


Unnamed: 0,operateur,arrondissement,delai_4g,delai_5g
1,FREE MOBILE,16,0,5
4,FREE MOBILE,5,489,4387
5,ORANGE,15,867,4282
11,ORANGE,20,879,3703
15,FREE MOBILE,4,0,1647
...,...,...,...,...
2156,FREE MOBILE,16,0,4
2161,FREE MOBILE,15,0,67
2162,FREE MOBILE,20,273,3589
2167,FREE MOBILE,18,0,3034


### Compute the average 5G deployment delay per arrondissement.

In [3]:
delai_par_arr = (
    df_valid.groupby("arrondissement")['delai_5g']
    .mean()
    .reset_index()
    .sort_values(by="delai_5g")
)

delai_par_arr


Unnamed: 0,arrondissement,delai_5g
0,1,1032.416667
5,6,1776.941176
1,2,1783.857143
15,16,2021.676471
11,12,2074.04878
12,13,2117.931034
8,9,2194.642857
7,8,2206.125
17,18,2217.088889
2,3,2255.333333


### Repartition of antennas per operator and arrondissement 

In [5]:
counts = (
    df.groupby(['arrondissement', 'operateur'])
    .size()
    .reset_index(name='nb_antennes')
)

counts


Unnamed: 0,arrondissement,operateur,nb_antennes
0,1,BOUYGUES,12
1,1,FREE MOBILE,13
2,1,ORANGE,8
3,1,SFR,12
4,2,BOUYGUES,11
...,...,...,...
75,19,SFR,30
76,20,BOUYGUES,47
77,20,FREE MOBILE,40
78,20,ORANGE,26


### The leader operator for every arrondissement 

In [6]:
counts.loc[counts.groupby("arrondissement")['nb_antennes'].idxmax()]

Unnamed: 0,arrondissement,operateur,nb_antennes
1,1,FREE MOBILE,13
7,2,SFR,12
8,3,BOUYGUES,8
13,4,FREE MOBILE,16
18,5,ORANGE,16
20,6,BOUYGUES,16
26,7,ORANGE,25
29,8,FREE MOBILE,32
32,9,BOUYGUES,17
36,10,BOUYGUES,27


In [9]:
import numpy as np

cols = ['operateur', 'arrondissement', 'type_clean']
df_new = df[cols].copy()

def detect_4G(x):
    if x == 'Unknown' :
        return np.nan  
    return 1 if '4G' in x else 0

def detect_5G(x):
    if x == 'Unknown' :
        return np.nan
    return 1 if '5G' in x else 0

df_new['has_4G'] = df_new['type_clean'].apply(detect_4G)
df_new['has_5G'] = df_new['type_clean'].apply(detect_5G)
df_new


Unnamed: 0,operateur,arrondissement,type_clean,has_4G,has_5G
0,FREE MOBILE,16,3G/4G/5G,1.0,1.0
1,FREE MOBILE,16,3G/4G/5G,1.0,1.0
2,BOUYGUES,17,Unknown,,
3,BOUYGUES,8,Unknown,,
4,FREE MOBILE,5,3G/4G/5G,1.0,1.0
...,...,...,...,...,...
2164,SFR,14,2G/3G/4G/5G,1.0,1.0
2165,BOUYGUES,6,Unknown,,
2166,BOUYGUES,4,Unknown,,
2167,FREE MOBILE,18,3G/4G/5G,1.0,1.0


In [10]:
repartition = df_new.groupby(['arrondissement','operateur']).agg(
    nb_4G=('has_4G','sum'), 
    nb_5G=('has_5G','sum'),
    nb_total=('type_clean','count'),  
    nb_unknown=('type_clean', lambda x: (x=='Unknown').sum())
).reset_index()
repartition


Unnamed: 0,arrondissement,operateur,nb_4G,nb_5G,nb_total,nb_unknown
0,1,BOUYGUES,0.0,0.0,12,12
1,1,FREE MOBILE,13.0,12.0,13,0
2,1,ORANGE,8.0,7.0,8,0
3,1,SFR,12.0,7.0,12,0
4,2,BOUYGUES,0.0,0.0,11,11
...,...,...,...,...,...,...
75,19,SFR,30.0,27.0,30,0
76,20,BOUYGUES,0.0,0.0,47,47
77,20,FREE MOBILE,40.0,40.0,40,0
78,20,ORANGE,26.0,20.0,26,0


In [11]:
from sklearn.cluster import DBSCAN
import numpy as np

df['lon'] = df.geom.x
df['lat'] = df.geom.y

coords = df[['lon','lat']].to_numpy()

model = DBSCAN(eps=0.002, min_samples=5).fit(coords)

df['cluster'] = model.labels_

df[['operateur','arrondissement','cluster']].head(10)


Unnamed: 0,operateur,arrondissement,cluster
0,FREE MOBILE,16,0
1,FREE MOBILE,16,88
2,BOUYGUES,17,73
3,BOUYGUES,8,-1
4,FREE MOBILE,5,1
5,ORANGE,15,2
6,BOUYGUES,9,75
7,BOUYGUES,17,128
8,SFR,17,128
9,SFR,4,-1
