# 03 Census data merging

This notebook merges preprocessed and cleaned data from CSU with polygons

In [None]:
from glob import glob
from pathlib import Path

import geopandas as gpd
import geoplanar
import pandas as pd
from libpysal.cg import voronoi_frames

In [None]:
# Read polygons
polygons = gpd.read_file("d_zsj_010122_join.shp", driver="ESRI Shapefile")
polygons.head(2)

In [None]:
# Prepare data for merging
# polygons = polygons.iloc[:, [0, 1, -1]]
polygons["geometry"] = polygons.geometry.force_2d()
polygons.head(2)

In [None]:
# Read data for converting between zsj and nadzsj
converter = pd.read_excel(
    "/data/uscuni-restricted/Data_SLDB_2021/zsj_data/ZSJD_slouceneZSJD/sloucene/_prevodnik_zsjd_nadzsjd.xlsx"
)

In [None]:
# Prepare data for merging
converter.columns = converter.iloc[2]
converter = converter.iloc[4:]
converter = converter.iloc[:, 18:]
# converter["kod_nadzsj_d"] = converter["kod_nadzsj_d"].astype(int)
converter.head(5)

In [None]:
mapping = converter.set_index("kod_zsj_d")["kod_nadzsj_d"]

In [None]:
mapping.head()

In [None]:
polygons["kod_nadzsj_d"] = polygons["KOD_ZSJ_D"].map(mapping)

In [None]:
polygons["kod_nadzsj_d"].isna().sum()

In [None]:
polygons.loc[158, "kod_nadzsj_d"] = "53806003"

In [None]:
polygons = polygons.replace({None: 0})

In [None]:
polygons["Bulgaria"] = polygons["Bulgaria"].astype(float)
polygons["USA"] = polygons["USA"].astype(float)

In [None]:
nadzjs_polygons = polygons.dissolve("kod_nadzsj_d")

In [None]:
nadzjs_polygons

## Link to data files

In [None]:
nadzjs_polygons = nadzjs_polygons.drop(
    columns=[
        "OBJECTID",
        "KOD_ZSJ_D",
        "NAZ_ZSJ_D",
        "KOD_ZSJ",
        "NAZ_ZSJ",
        "KOD_CAST_D",
        "NAZ_CAST_D",
        "KOD_UTJ",
        "NAZ_UTJ",
        "KOD_KU",
        "KOD_KRAJ",
        "NAZ_KU",
        "KOD_ZUJ",
        "NAZ_ZUJ",
        "LAU1",
        "PLATIOD",
        "NEPLATIPO",
        "Shape_Leng",
        "Shape_Area",
        "OBJECTID_1",
        "kod_d_zsj_",
        "kod_d_zsj1",
    ]
)

In [None]:
nadzjs_polygons = nadzjs_polygons.rename(
    columns={
        "NUTS3_KRAJ": "NUTS_3",
        "KOD_KRAJ": "kod_kraj",
        "NAZ_KRAJ": "naz_kraj",
        "NAZ_OKRES": "naz_okres",
        "KOD_OKRES": "kod_okres",
        "NAZ_OBEC": "naz_obec",
        "KOD_OBEC": "kod_obec",
    }
)

In [None]:
nadzjs_polygons.to_parquet(
    "/data/uscuni-restricted/04_spatial_census/nadzsjd_foreigners.parquet", index=False
)