In [69]:
import pandas as pd
import csv

### Global Variables

In [71]:
# File paths
Path_Apl_France_Medecins = "APL_France_Medecins.csv"
Path_Distance_Medecins_Communes = "communes_distance_et_evolution_5_ans.csv"

### Création des DataFrames

In [76]:
APL_France_Medecins_df = pd.read_csv(Path_Apl_France_Medecins, dtype='str',encoding='UTF-8', sep=",")

In [77]:
Distance_Medecins_Communes_df = pd.read_csv(Path_Distance_Medecins_Communes, dtype='str',encoding='UTF-8', sep=";")

In [78]:
APL_France_Medecins_df.head()

Unnamed: 0,Code commune INSEE,Communes,APL aux médecins généralistes,APL aux médecins généralistes de moins de 65 ans,Population standardisée 2014 pour la médecine générale,Nom d'exercice
0,1001,L' Abergement-Clémenciat,2.305,2.021,765.2604758149912,[nan]
1,1002,L' Abergement-de-Varey,2.773,2.683,235.4996212788099,[nan]
2,1004,Ambérieu-en-Bugey,4.428,4.325,13638.75838414545,"['ERRARD', 'CELLIER', 'BILLANDON', 'BELEY', 'G..."
3,1005,Ambérieux-en-Dombes,4.442,4.442,1571.2112473180186,"['PIGNARD', 'DURAFFOURG']"
4,1006,Ambléon,1.084,0.936,119.10786904950405,[nan]


In [79]:
Distance_Medecins_Communes_df.head()

Unnamed: 0,Code,Nom Commune,Distance au plus proche généraliste 2017,Evolution du nbre de géné sur 5 ans 2017
0,1001,L'Abergement-Clémenciat,5,0
1,1002,L'Abergement-de-Varey,4,0
2,1004,Ambérieu-en-Bugey,0,-1
3,1005,Ambérieux-en-Dombes,0,0
4,1006,Ambléon,11,0


### Jointures des tables

In [80]:
# Renommage des colonnes (facultatif)

Distance_Medecins_Communes_df = Distance_Medecins_Communes_df.rename(columns={ Distance_Medecins_Communes_df.columns[0]: "Code INSEE" })
APL_France_Medecins_df = APL_France_Medecins_df.rename(columns={ APL_France_Medecins_df.columns[0]: "Code INSEE" })

In [99]:
result_df = pd.merge(APL_France_Medecins_df,Distance_Medecins_Communes_df[['Distance au plus proche généraliste 2017','Evolution du nbre de géné sur 5 ans 2017', 'Code INSEE']], how = 'left', on = 'Code INSEE')

In [205]:
result_df.head(5)

Unnamed: 0,Code INSEE,Communes,APL aux médecins généralistes,APL aux médecins généralistes de moins de 65 ans,Population standardisée 2014 pour la médecine générale,Nom d'exercice,Distance au plus proche généraliste 2017,Evolution du nbre de géné sur 5 ans 2017,inverse APL,APL 65 ans Standardisé,Indice pop,Score,Indice evolution,Score_2
0,1001,L' Abergement-Clémenciat,2.305,2.021,765,[nan],5,0.0,0.494805,2.021,0.5,0.994805,0.0,0.994805
1,1002,L' Abergement-de-Varey,2.773,2.683,235,[nan],4,0.0,0.372717,2.683,0.0,0.372717,0.0,0.372717
2,1004,Ambérieu-en-Bugey,4.428,4.325,13639,"['ERRARD', 'CELLIER', 'BILLANDON', 'BELEY', 'G...",0,-1.0,0.231214,4.325,0.0,0.431214,0.2,0.431214
3,1005,Ambérieux-en-Dombes,4.442,4.442,1571,"['PIGNARD', 'DURAFFOURG']",0,0.0,0.225124,4.442,0.5,0.725124,0.0,0.725124
4,1006,Ambléon,1.084,0.936,119,[nan],11,0.0,1.0,1.0,0.0,1.0,0.0,1.0


### Nettoyage du fichier

In [101]:
# On passe la variable de la population en format numérique
result_df['Population standardisée 2014 pour la médecine générale'] = result_df['Population standardisée 2014 pour la médecine générale'].apply(pd.to_numeric, errors='coerce')

In [104]:
# On arrondit la variable à l'unité près, et sans le "0." ensuite.
result_df['Population standardisée 2014 pour la médecine générale'] = result_df['Population standardisée 2014 pour la médecine générale'].apply(lambda x: round(x))

### Stastistiques basiques

In [115]:
# Statistiques de l'évolution des médecins
result_df['Evolution du nbre de géné sur 5 ans 2017'].value_counts()

0      26315
-1      1738
1       1383
-2       433
2        323
-3       159
3        117
-4        77
4         39
-5        34
-6        31
-7        24
5         21
-9        18
7         12
-8         9
-14        7
-12        7
-11        5
-10        5
6          5
-15        3
-16        3
-17        3
12         2
8          2
-23        2
9          2
11         1
16         1
-20        1
22         1
-31        1
38         1
32         1
-30        1
-13        1
66         1
-21        1
-39        1
-67        1
-19        1
10         1
-18        1
Name: Evolution du nbre de géné sur 5 ans 2017, dtype: int64

In [None]:
# Afficher les communes où l'APL = 0
result_df.loc[result_df['APL aux médecins généralistes de moins de 65 ans'] == 0]

In [None]:
# statistiques de base sur l'APL
result_df['APL aux médecins généralistes de moins de 65 ans'].describe()

### Création de nouvelles colonnes

#### APL Standardisé

On modifie toutes les valeurs inférieures ou égal à 1 pour qu'elles soient égales à 1. Quant aux valeurs supérieures ou égales à 5 on les fixe à 5.

In [151]:
def standardisation(x):
    if x <= 1:
        return 1
    elif x >= 5:
        return 5
    else:
        return x

In [152]:
result_df['APL 65 ans Standardisé'] = result_df['APL aux médecins généralistes de moins de 65 ans'].apply(lambda x: standardisation(x))

#### Inverse de l'APL

In [116]:
# On passe la variable de la population en format numérique
result_df['APL aux médecins généralistes de moins de 65 ans'] = result_df['APL aux médecins généralistes de moins de 65 ans'].apply(pd.to_numeric, errors='coerce')

In [137]:
def inverse(x):
    if x != 0:
        return 1/x
    else:
        return x
    

In [155]:
result_df['inverse APL'] = result_df['APL 65 ans Standardisé'].apply(lambda x: inverse(x))

#### Population

Si la population est comprise entre 400 et 3000, Indice pop = 0.5.

In [161]:
def indice_pop(x):
    if 400 <= x <= 3000:
        return 0.5
    else:
        return 0

In [162]:
result_df['Indice pop'] = result_df['Population standardisée 2014 pour la médecine générale'].apply(lambda x: indice_pop(x))

In [165]:
result_df['Indice pop'].value_counts()

0.0    20271
0.5    15633
Name: Indice pop, dtype: int64

#### Evolution

Si l'évolution du niombre de médecins est négative, on lui assigne 0.2.

In [179]:
def evolution(x):
    if x < 0:
        return 0.2
    else:
        return 0

In [181]:
# On passe la variable en format numérique
result_df['Evolution du nbre de géné sur 5 ans 2017']= result_df['Evolution du nbre de géné sur 5 ans 2017'].apply(pd.to_numeric, errors='coerce')

In [182]:
result_df['Indice evolution'] = result_df['Evolution du nbre de géné sur 5 ans 2017'].apply(lambda x: evolution(x))

### Calcul du score

In [206]:
#colonnes utilisées pour calculer le score
colonnes_pour_score_liste = ['inverse APL','Indice pop','Indice evolution']

In [200]:
result_df['Score_2'] = pd.concat(liste_colonnes, axis=1).sum(axis=1)

In [204]:
result_df['Score_2'] = result_df[liste_colonnes].sum(axis=1)

In [201]:

result_df.loc[result_df['Score'] > 1]

Unnamed: 0,Code INSEE,Communes,APL aux médecins généralistes,APL aux médecins généralistes de moins de 65 ans,Population standardisée 2014 pour la médecine générale,Nom d'exercice,Distance au plus proche généraliste 2017,Evolution du nbre de géné sur 5 ans 2017,inverse APL,APL 65 ans Standardisé,Indice pop,Score,Indice evolution,Score_2
15,01017,Argis,1.592,1.592,449,[nan],3,0.0,0.628141,1.592,0.5,1.128141,0.0,1.128141
40,01044,Billiat,1.72,1.610,563,[nan],7,0.0,0.621118,1.610,0.5,1.121118,0.0,1.121118
49,01054,Bourg-Saint-Christophe,1.948,1.838,1160,[nan],4,0.0,0.544070,1.838,0.5,1.044070,0.0,1.044070
52,01058,Brégnier-Cordon,1.623,1.434,835,['ROBERT'],0,0.0,0.697350,1.434,0.5,1.197350,0.0,1.197350
54,01060,Brénod,1.999,1.069,587,['KACZMARCZYK'],0,0.0,0.935454,1.069,0.5,1.435454,0.0,1.435454
56,01062,Bressolles,2.062,1.914,784,[nan],4,0.0,0.522466,1.914,0.5,1.022466,0.0,1.022466
62,01068,Cerdon,2.144,1.754,808,[nan],5,0.0,0.570125,1.754,0.5,1.070125,0.0,1.070125
73,01080,Champdor-Corcelles,2.371,1.339,662,[nan],5,0.0,0.746826,1.339,0.5,1.246826,0.0,1.246826
74,01081,Champfromier,1.442,1.387,708,['GROSREY'],0,0.0,0.720981,1.387,0.5,1.220981,0.0,1.220981
75,01082,Chanay,0.649,0.649,583,[nan],9,0.0,1.000000,1.000,0.5,1.500000,0.0,1.500000


In [198]:
result_df.head(25)

Unnamed: 0,Code INSEE,Communes,APL aux médecins généralistes,APL aux médecins généralistes de moins de 65 ans,Population standardisée 2014 pour la médecine générale,Nom d'exercice,Distance au plus proche généraliste 2017,Evolution du nbre de géné sur 5 ans 2017,inverse APL,APL 65 ans Standardisé,Indice pop,Score,Indice evolution,Score_2
0,1001,L' Abergement-Clémenciat,2.305,2.021,765,[nan],5,0.0,0.494805,2.021,0.5,0.994805,0.0,0.994805
1,1002,L' Abergement-de-Varey,2.773,2.683,235,[nan],4,0.0,0.372717,2.683,0.0,0.372717,0.0,0.372717
2,1004,Ambérieu-en-Bugey,4.428,4.325,13639,"['ERRARD', 'CELLIER', 'BILLANDON', 'BELEY', 'G...",0,-1.0,0.231214,4.325,0.0,0.431214,0.2,0.231214
3,1005,Ambérieux-en-Dombes,4.442,4.442,1571,"['PIGNARD', 'DURAFFOURG']",0,0.0,0.225124,4.442,0.5,0.725124,0.0,0.725124
4,1006,Ambléon,1.084,0.936,119,[nan],11,0.0,1.0,1.0,0.0,1.0,0.0,1.0
5,1007,Ambronay,4.837,4.672,2474,"['TANGUY', 'BLANCHON', 'BARD']",0,-1.0,0.214041,4.672,0.5,0.914041,0.2,0.714041
6,1008,Ambutrix,4.313,3.995,685,[nan],2,0.0,0.250313,3.995,0.5,0.750313,0.0,0.750313
7,1009,Andert-et-Condon,4.451,4.009,351,[nan],7,0.0,0.249439,4.009,0.0,0.249439,0.0,0.249439
8,1010,Anglefort,2.133,2.01,1089,[nan],6,0.0,0.497512,2.01,0.5,0.997512,0.0,0.997512
9,1011,Apremont,1.817,1.587,371,[nan],7,0.0,0.63012,1.587,0.0,0.63012,0.0,0.63012


### Export du fichier en CSV

In [188]:
result_df.to_csv('Communes_APL_et_Distances.csv', sep=',', encoding='utf-8', quotechar='"', index=False, quoting=csv.QUOTE_ALL)