# Social statistics

In [None]:
import pandas as pd

X_train_path = "data/X_train_Hi5.csv"
X_train = pd.read_csv(X_train_path)


In [None]:
# Extract the water withdrawal for 2021
X_train["date"] = pd.to_datetime(X_train["piezo_measurement_date"])
X_2021 = X_train[X_train["date"].dt.year == 2021]

X_2021["total_withdraw"] = (
    (
        X_2021["prelev_other_volume_sum"]
        + X_2021["prelev_volume_0"]
        + X_2021["prelev_volume_1"]
        + X_2021["prelev_volume_2"]
    )
    / 123396566321
    * 500
)
prelev_df = []
for prelev in range(3):
    selected_cols = [
        f"prelev_longitude_{prelev}",
        f"prelev_latitude_{prelev}",
        f"prelev_volume_{prelev}",
        f"prelev_usage_label_{prelev}",
    ]
    sub_prelev_df = X_2021[selected_cols]
    sub_prelev_df.columns = [
        "prelev_longitude",
        "prelev_latitude",
        "prelev_volume",
        "prelev_usage_label",
    ]
    prelev_df.append(sub_prelev_df)

prelev_df = pd.concat(prelev_df, axis=0)

prelev_df.shape

In [None]:
prelev_df = prelev_df.drop_duplicates()
print(prelev_df.shape)
print(X_2021["total_withdraw"].max())


In [None]:
# Prepare for plotting
# Size
prelev_df["point_size"] = (
    1_000 * prelev_df["prelev_volume"] / prelev_df["prelev_volume"].max()
)
# categroy
prelev_cat = list(prelev_df["prelev_usage_label"].unique())
print(prelev_cat)
prelev_cat_dic = {name: index for index, name in enumerate(prelev_cat)}
prelev_df["cat"] = prelev_df["prelev_usage_label"].apply(
    lambda x: prelev_cat_dic.get(x, 6)
)
prelev_df

In [None]:
prelev_cat_dic

In [None]:
# cartopy to display maps
%pip install cartopy
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt

In [None]:
# ax = fig.add_subplot(1, 1, 1, projection=ccrs.PlateCarree())
def display_France_map(ax):
    """Display the map of France"""

    ax.set_extent([-5, 10, 42, 52], crs=ccrs.PlateCarree())
    # Draw the background
    ax.add_feature(cfeature.LAND)
    ax.add_feature(cfeature.OCEAN)
    ax.add_feature(cfeature.COASTLINE)
    ax.add_feature(cfeature.BORDERS, linestyle=":")
    ax.add_feature(cfeature.RIVERS)
    ax.gridlines(draw_labels=True)

In [None]:
fig = plt.figure(figsize=(20, 20), layout="constrained")
#
ax_1 = fig.add_subplot(121, projection=ccrs.PlateCarree())
display_France_map(ax_1)
ax_1.set_title("Stations")
ax_1.scatter(
    data=X_2021,
    x="piezo_station_longitude",
    y="piezo_station_latitude",
    s="total_withdraw",
)

# Right prelev
ax_2 = fig.add_subplot(122, projection=ccrs.PlateCarree())
display_France_map(ax_2)
ax_2.set_title("Main withdrawals of 2021")
scatter = ax_2.scatter(
    data=prelev_df,
    x="prelev_longitude",
    y="prelev_latitude",
    s="point_size",
    c="cat",
    alpha=0.7,
)
legend2 = ax_2.legend(*scatter.legend_elements(), loc="best", title="Usage")
ax_2.add_artist(legend2)
ax_2.legend
plt.show()

In [None]:
fig = plt.figure(figsize=(20, 20), layout="constrained")
# Left pop
ax_1 = fig.add_subplot(121, projection=ccrs.PlateCarree())
X_2021_pop = X_2021["insee_pop_commune"] / X_2021["insee_pop_commune"].max() * 500
display_France_map(ax_1)
ax_1.set_title("Population centers")
ax_1.scatter(
    data=X_2021, x="piezo_station_longitude", y="piezo_station_latitude", s=X_2021_pop
)

# Right prelev
ax_2 = fig.add_subplot(122, projection=ccrs.PlateCarree())
display_France_map(ax_2)
ax_2.set_title("Main energy usage 2021")
scatter = ax_2.scatter(
    data=prelev_df[prelev_df["prelev_usage_label"] == "ENERGIE"],
    x="prelev_longitude",
    y="prelev_latitude",
    s="point_size",
    c="cat",
)
legend2 = ax_2.legend(*scatter.legend_elements(), loc="best", title="Usage")
ax_2.add_artist(legend2)
ax_2.legend
plt.show()

## Buisiness case illustraion

In [None]:
X_train = pd.read_csv("data/X_train_Hi5.csv")

stations_coords = X_train[
    [
        "piezo_station_bss_id",
        "piezo_station_latitude",
        "piezo_station_longitude",
        "piezo_station_department_code",
    ]
].drop_duplicates()
stations_coords.index = stations_coords["piezo_station_bss_id"]
stations_coords = stations_coords.drop(columns=["piezo_station_bss_id"])
stations_coords.head()

In [None]:
# Bar plot
impact = ["Pop impact", "€ impact", "Incendy risk"]
counts = [
    [40_000_000, 1000, 1_000_000],
    [20_000_000, 100_000, 10_500_000],
    [50_000_000, 10_000_0000, 75_000_000],
]
bar_colors = ["tab:orange", "tab:blue", "tab:red"]

# Sations
import numpy as np

rng = np.random.default_rng(1)
stations_coords["danger_00"] = rng.uniform(0, 2, len(stations_coords))
stations_coords["danger_01"] = rng.uniform(0, 2, len(stations_coords))
stations_coords["danger_02"] = rng.uniform(0, 4, len(stations_coords))

# 00 add for depts
depts_00 = ["88", "40", "34", "83"]
stations_coords["danger_00"] = stations_coords["danger_00"] + stations_coords[
    "piezo_station_department_code"
].apply(lambda x: 2 if x in depts_00 else 0)

# 01 add for depts
depts_01 = ["17", "33", "40", "47"]
stations_coords["danger_01"] = stations_coords["danger_01"] + stations_coords[
    "piezo_station_department_code"
].apply(lambda x: 2 if x in depts_01 else 0)

# 02 add for national
depts_02 = [
    "88",
    "40",
    "34",
    "83",
    "35",
    "53",
    "45",
    "18",
    "32",
    "83",
    "06",
    "04",
    "11",
    "66",
    "34",
]
stations_coords["danger_02"] = stations_coords["danger_02"] + stations_coords[
    "piezo_station_department_code"
].apply(lambda x: 1 if x in depts_01 else 0)


In [None]:
from matplotlib.ticker import FuncFormatter

fig = plt.figure(figsize=(21, 10), layout="constrained")


def format_millions(x, pos):
    "The two args are the value and tick position"
    return "%1.1fM" % (x * 1e-6)


formatter = FuncFormatter(format_millions)


ax_00 = fig.add_subplot(2, 3, 1, projection=ccrs.PlateCarree())
# Low generalize drought
ax_00.set_title("Low national drought")
display_France_map(ax_00)
ax_00.scatter(
    data=stations_coords,
    y="piezo_station_latitude",
    x="piezo_station_longitude",
    c="danger_00",
    cmap="Reds",
    vmax=5,
)
ax_10 = fig.add_subplot(
    2,
    3,
    4,
)
ax_10.bar(
    impact,
    counts[0],
    color=bar_colors,
)
ax_10.set_ylim(0, 101_000_000)
ax_10.set_title("WaterRichter 4", fontsize=20)

# High regional drought
ax_01 = fig.add_subplot(2, 3, 2, projection=ccrs.PlateCarree())
ax_01.scatter(
    data=stations_coords,
    y="piezo_station_latitude",
    x="piezo_station_longitude",
    c="danger_01",
    cmap="Reds",
    vmax=5,
)
ax_01.set_title("High regional drought")
display_France_map(ax_01)
ax_11 = fig.add_subplot(2, 3, 5)
ax_11.bar(
    impact,
    counts[1],
    color=bar_colors,
)
ax_11.set_ylim(0, 101_000_000)
ax_11.set_title("WaterRichter 6", fontsize=20)

# High national drought
ax_02 = fig.add_subplot(2, 3, 3, projection=ccrs.PlateCarree())
ax_02.set_title("High national drought")
ax_02.scatter(
    data=stations_coords,
    y="piezo_station_latitude",
    x="piezo_station_longitude",
    c="danger_02",
    cmap="Reds",
    vmax=5,
)
display_France_map(ax_02)
ax_12 = fig.add_subplot(
    2,
    3,
    6,
)
ax_12.bar(
    impact,
    counts[2],
    color=bar_colors,
)
ax_12.set_ylim(0, 101_000_000)
ax_12.set_title("WaterRichter 8", fontsize=20)

for ax in [ax_10, ax_11, ax_12]:
    ax.yaxis.set_major_formatter(formatter)

plt.show()