In [1]:
import geopandas as gpd
import pandas as pd

## Open Geojson

In [2]:
bulgaria_gdf = gpd.read_file("data/bg.json")

In [3]:
bulgaria_gdf["name"] = bulgaria_gdf["name"].str.lower()
bulgaria_gdf = bulgaria_gdf.replace("grad sofiya", "sofia-grad")

## Open 2021 Population data

In [4]:
raw_population_2021_df = pd.read_csv("data/report_1750632621734.csv", sep=";")

In [5]:
formatted_raw_population_2021_df = (
    raw_population_2021_df.rename(
        columns={"Unnamed: 0": "name", "Number": "POPULATION", "Number.1": "TURKISH"}
    )
    .iloc[2:]
    .reset_index(drop=True)
)
formatted_raw_population_2021_df = formatted_raw_population_2021_df[
    formatted_raw_population_2021_df["name"].str.match(r"^([A-Z]{3})\b", na=False)
]
formatted_raw_population_2021_df["name"] = (
    formatted_raw_population_2021_df["name"].str.split(" ").str[1:].str.join(" ")
)
formatted_raw_population_2021_df["POPULATION"] = formatted_raw_population_2021_df[
    "POPULATION"
].astype(int)
formatted_raw_population_2021_df["TURKISH"] = formatted_raw_population_2021_df[
    "TURKISH"
].replace("-", 0)
formatted_raw_population_2021_df["TURKISH"] = formatted_raw_population_2021_df[
    "TURKISH"
].astype(int)
formatted_raw_population_2021_df["name"] = formatted_raw_population_2021_df[
    "name"
].str.lower()

In [6]:
formatted_raw_population_2021_df = formatted_raw_population_2021_df[
    ["name", "POPULATION", "TURKISH"]
]

## Open 2011 Population Data

In [7]:
raw_population_2011_df = pd.read_excel(
    "data/population-2011censusdata.xls", sheet_name="Ethnic group and districts"
)

In [8]:
formatted_raw_population_2011_df = raw_population_2011_df.rename(
    columns={
        "POPULATION BY ETHNIC GROUP AND DISTRICTS AS OF 01.02.2011 ": "name",
        "Unnamed: 1": "POPULATION",
        "Unnamed: 3": "TURKISH",
    }
).iloc[4:]
formatted_raw_population_2011_df["POPULATION"] = formatted_raw_population_2011_df[
    "POPULATION"
].astype(int)
formatted_raw_population_2011_df["TURKISH"] = formatted_raw_population_2011_df[
    "TURKISH"
].astype(int)
formatted_raw_population_2011_df["name"] = formatted_raw_population_2011_df[
    "name"
].str.lower()

In [9]:
formatted_raw_population_2011_df = formatted_raw_population_2011_df[
    ["name", "POPULATION", "TURKISH"]
]
formatted_raw_population_2011_df = formatted_raw_population_2011_df.iloc[
    1:
].reset_index(drop=True)

## Merge Data

In [10]:
formatted_raw_population_2011_df = formatted_raw_population_2011_df.replace(
    {"sofia cap": "sofia-grad"}
)

In [11]:
population_change_df = formatted_raw_population_2011_df.merge(
    formatted_raw_population_2021_df, on="name", how="left", suffixes=("_2011", "_2021")
)

In [13]:
population_change_df["DIFF_TURKISH"] = (
    population_change_df["TURKISH_2021"] - population_change_df["TURKISH_2011"]
)
population_change_df["PERCENT_DIFF_TURKISH"] = (
    (population_change_df["DIFF_TURKISH"] / population_change_df["TURKISH_2011"]) * 100
).round(decimals=2)
population_change_df["DIFF_TOTAL"] = (
    population_change_df["POPULATION_2021"] - population_change_df["POPULATION_2011"]
)
population_change_df["PERCENT_DIFF_TOTAL"] = (
    (population_change_df["DIFF_TOTAL"] / population_change_df["POPULATION_2011"]) * 100
).round(decimals=2)

## Merge data and export

In [17]:
population_gdf = bulgaria_gdf.merge(population_change_df, on="name", how="right")

In [18]:
population_gdf = population_gdf.to_crs(7803)
population_gdf.to_file("data/bulgaria_population.gpkg")